From 82e9c6b9f424edd8b3e1f714cbe28347e7d7a470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Tue, 2 Jul 2024 20:10:29 +0200 Subject: [PATCH] implement cache --- Cargo.lock | 8 ++++ Cargo.toml | 2 + src/cache.rs | 60 ++++++++++++++++++++++++++++ src/canteen.rs | 2 +- src/dish.rs | 8 ++-- src/lib.rs | 27 +++++++++++++ src/main.rs | 19 +++++---- src/menu.rs | 103 +++++++++++++++++++++++++------------------------ 8 files changed, 165 insertions(+), 64 deletions(-) create mode 100644 src/cache.rs diff --git a/Cargo.lock b/Cargo.lock index 6425539..f66cbf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -264,6 +264,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + [[package]] name = "atomic-waker" version = "1.1.2" @@ -1191,9 +1197,11 @@ version = "0.1.0" dependencies = [ "actix-governor", "actix-web", + "anyhow", "chrono", "const_format", "dotenvy", + "futures", "itertools", "reqwest", "scraper", diff --git a/Cargo.toml b/Cargo.toml index 9509f87..4999cc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,9 +7,11 @@ edition = "2021" [dependencies] actix-governor = { version = "0.5.0", features = ["log"] } actix-web = "4.8.0" +anyhow = "1.0.86" chrono = "0.4.38" const_format = "0.2.32" dotenvy = "0.15.7" +futures = "0.3.30" itertools = "0.13.0" reqwest = "0.12.5" scraper = "0.19.0" diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000..4af910d --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,60 @@ +use std::{collections::HashMap, sync::Arc}; + +use chrono::{NaiveDate, Utc}; +use futures::StreamExt; +use itertools::Itertools; +use tokio::sync::RwLock; + +use crate::{Canteen, Menu}; + +#[derive(Debug, Clone, Default)] +pub struct MenuCache { + cache: Arc>>, +} + +impl MenuCache { + pub async fn get_combined(&self, canteens: &[Canteen], date: NaiveDate) -> Menu { + futures::stream::iter(canteens) + .then(|canteen| async move { self.get(*canteen, date).await }) + .filter_map(|c| async { c }) + .fold(Menu::default(), |a, b| async move { a.merged(b) }) + .await + } + + pub async fn get(&self, canteen: Canteen, date: NaiveDate) -> Option { + let query = (date, canteen); + let (is_in_cache, is_cache_too_large) = { + let cache = self.cache.read().await; + (cache.contains_key(&query), cache.len() > 100) + }; + if is_cache_too_large { + self.clean_outdated().await; + } + if is_in_cache { + let cache = self.cache.read().await; + Some(cache.get(&query)?.clone()) + } else { + let menu = Menu::new(date, canteen).await.ok()?; + + self.cache.write().await.insert(query, menu.clone()); + + Some(menu) + } + } + + pub async fn clean_outdated(&self) { + let today = Utc::now().date_naive(); + let outdated_keys = self + .cache + .read() + .await + .keys() + .map(|x| x.to_owned()) + .filter(|(date, _)| date < &today) + .collect_vec(); + let mut cache = self.cache.write().await; + for key in outdated_keys { + cache.remove(&key); + } + } +} diff --git a/src/canteen.rs b/src/canteen.rs index 5dd0c63..15b2900 100644 --- a/src/canteen.rs +++ b/src/canteen.rs @@ -3,7 +3,7 @@ use std::str::FromStr; use const_format::concatcp; use strum::EnumIter; -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash)] pub enum Canteen { Forum, Academica, diff --git a/src/dish.rs b/src/dish.rs index ebdae74..d0f2f40 100644 --- a/src/dish.rs +++ b/src/dish.rs @@ -54,7 +54,7 @@ impl Dish { impl Dish { pub fn from_element(element: ElementRef, canteen: Canteen) -> Option { - let html_name_selector = scraper::Selector::parse(".desc h4").unwrap(); + let html_name_selector = scraper::Selector::parse(".desc h4").ok()?; let name = element .select(&html_name_selector) .next()? @@ -64,11 +64,11 @@ impl Dish { .trim() .to_string(); - let img_selector = scraper::Selector::parse(".img img").unwrap(); + let img_selector = scraper::Selector::parse(".img img").ok()?; let img_src_path = element.select(&img_selector).next()?.value().attr("src")?; let img_src = format!("https://www.studierendenwerk-pb.de/{}", img_src_path); - let html_price_selector = scraper::Selector::parse(".desc .price").unwrap(); + let html_price_selector = scraper::Selector::parse(".desc .price").ok()?; let mut prices = element .select(&html_price_selector) .filter_map(|price| { @@ -91,7 +91,7 @@ impl Dish { }) .collect::>(); - let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").unwrap(); + let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").ok()?; let extras = element .select(&html_extras_selector) .filter_map(|extra| extra.value().attr("title").map(|title| title.to_string())) diff --git a/src/lib.rs b/src/lib.rs index ba6b365..683bb32 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,34 @@ +mod cache; mod canteen; mod dish; mod menu; +use std::{error::Error, fmt::Display}; + +pub use cache::MenuCache; pub use canteen::Canteen; pub use dish::Dish; pub use menu::Menu; + +#[derive(Debug, Clone)] +struct CustomError(String); + +impl Display for CustomError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Error for CustomError {} + +impl From<&str> for CustomError { + fn from(s: &str) -> Self { + CustomError(s.to_string()) + } +} + +impl From for CustomError { + fn from(s: String) -> Self { + CustomError(s) + } +} diff --git a/src/main.rs b/src/main.rs index 376218e..2dd674e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,7 @@ use actix_governor::{Governor, GovernorConfigBuilder}; use actix_web::{get, web, App, HttpResponse, HttpServer, Responder}; use chrono::{Duration as CDuration, Utc}; use itertools::Itertools; -use mensa_upb_api::{Canteen, Menu}; +use mensa_upb_api::{Canteen, MenuCache}; use serde::{Deserialize, Serialize}; use serde_json::json; use strum::IntoEnumIterator; @@ -35,9 +35,12 @@ async fn main() -> io::Result<()> { .finish() .unwrap(); + let menu_cache = MenuCache::default(); + HttpServer::new(move || { App::new() .wrap(Governor::new(&governor_conf)) + .app_data(web::Data::new(menu_cache.clone())) .service(index) .service(menu_today) }) @@ -62,7 +65,11 @@ struct MenuQuery { } #[get("/menu/{canteen}")] -async fn menu_today(path: web::Path, query: web::Query) -> impl Responder { +async fn menu_today( + cache: web::Data, + path: web::Path, + query: web::Query, +) -> impl Responder { let canteens = path .into_inner() .split(',') @@ -71,13 +78,9 @@ async fn menu_today(path: web::Path, query: web::Query) -> im if canteens.iter().all(Result::is_ok) { let canteens = canteens.into_iter().filter_map(Result::ok).collect_vec(); let days_ahead = query.days_ahead.unwrap_or(0); + let date = (Utc::now() + CDuration::days(days_ahead as i64)).date_naive(); - let menu = Menu::new( - (Utc::now() + CDuration::days(days_ahead as i64)).date_naive(), - &canteens, - ) - .await - .unwrap(); + let menu = cache.get_combined(&canteens, date).await; HttpResponse::Ok().json(menu) } else { diff --git a/src/menu.rs b/src/menu.rs index b8d70b0..6373f9f 100644 --- a/src/menu.rs +++ b/src/menu.rs @@ -1,9 +1,10 @@ +use anyhow::Result; use chrono::NaiveDate; use serde::{Deserialize, Serialize}; -use crate::{Canteen, Dish}; +use crate::{Canteen, CustomError, Dish}; -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct Menu { main_dishes: Vec, side_dishes: Vec, @@ -11,47 +12,8 @@ pub struct Menu { } impl Menu { - pub async fn new(day: NaiveDate, canteens: &[Canteen]) -> Result { - let mut main_dishes = Vec::new(); - let mut side_dishes = Vec::new(); - let mut desserts = Vec::new(); - - for canteen in canteens.iter().copied() { - let (main, side, des) = scrape_menu(canteen, day).await?; - for dish in main { - if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) { - existing.merge(dish); - } else { - main_dishes.push(dish); - } - } - for dish in side { - if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) { - existing.merge(dish); - } else { - side_dishes.push(dish); - } - } - for dish in des { - if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) { - existing.merge(dish); - } else { - desserts.push(dish); - } - } - } - - let compare_name = |a: &Dish, b: &Dish| a.get_name().cmp(b.get_name()); - - main_dishes.sort_by(compare_name); - side_dishes.sort_by(compare_name); - desserts.sort_by(compare_name); - - Ok(Self { - main_dishes, - side_dishes, - desserts, - }) + pub async fn new(day: NaiveDate, canteen: Canteen) -> Result { + scrape_menu(canteen, day).await } pub fn get_main_dishes(&self) -> &[Dish] { @@ -65,12 +27,47 @@ impl Menu { pub fn get_desserts(&self) -> &[Dish] { &self.desserts } + + pub fn merged(self, other: Self) -> Self { + let mut main_dishes = self.main_dishes; + let mut side_dishes = self.side_dishes; + let mut desserts = self.desserts; + + for dish in other.main_dishes { + if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) { + existing.merge(dish); + } else { + main_dishes.push(dish); + } + } + for dish in other.side_dishes { + if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) { + existing.merge(dish); + } else { + side_dishes.push(dish); + } + } + for dish in other.desserts { + if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) { + existing.merge(dish); + } else { + desserts.push(dish); + } + } + + main_dishes.sort_by(|a, b| a.get_name().cmp(b.get_name())); + side_dishes.sort_by(|a, b| a.get_name().cmp(b.get_name())); + desserts.sort_by(|a, b| a.get_name().cmp(b.get_name())); + + Self { + main_dishes, + side_dishes, + desserts, + } + } } -async fn scrape_menu( - canteen: Canteen, - day: NaiveDate, -) -> Result<(Vec, Vec, Vec), reqwest::Error> { +async fn scrape_menu(canteen: Canteen, day: NaiveDate) -> Result { let url = canteen.get_url(); let client = reqwest::Client::new(); let request_builder = client @@ -84,7 +81,7 @@ async fn scrape_menu( let html_main_dishes_selector = scraper::Selector::parse( "table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row", ) - .unwrap(); + .map_err(|_| CustomError::from("Failed to parse selector"))?; let html_main_dishes = document.select(&html_main_dishes_selector); let main_dishes = html_main_dishes .filter_map(|dish| Dish::from_element(dish, canteen)) @@ -93,7 +90,7 @@ async fn scrape_menu( let html_side_dishes_selector = scraper::Selector::parse( "table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row", ) - .unwrap(); + .map_err(|_| CustomError::from("Failed to parse selector"))?; let html_side_dishes = document.select(&html_side_dishes_selector); let side_dishes = html_side_dishes .filter_map(|dish| Dish::from_element(dish, canteen)) @@ -102,11 +99,15 @@ async fn scrape_menu( let html_desserts_selector = scraper::Selector::parse( "table.table-dishes.soups > tbody > tr.odd > td.description > div.row", ) - .unwrap(); + .map_err(|_| CustomError::from("Failed to parse selector"))?; let html_desserts = document.select(&html_desserts_selector); let desserts = html_desserts .filter_map(|dish| Dish::from_element(dish, canteen)) .collect::>(); - Ok((main_dishes, side_dishes, desserts)) + Ok(Menu { + main_dishes, + side_dishes, + desserts, + }) }