implement cache

This commit is contained in:
Moritz Hölting 2024-07-02 20:10:29 +02:00
parent 239a33c1e5
commit 82e9c6b9f4
8 changed files with 165 additions and 64 deletions

8
Cargo.lock generated
View File

@ -264,6 +264,12 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "anyhow"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da"
[[package]] [[package]]
name = "atomic-waker" name = "atomic-waker"
version = "1.1.2" version = "1.1.2"
@ -1191,9 +1197,11 @@ version = "0.1.0"
dependencies = [ dependencies = [
"actix-governor", "actix-governor",
"actix-web", "actix-web",
"anyhow",
"chrono", "chrono",
"const_format", "const_format",
"dotenvy", "dotenvy",
"futures",
"itertools", "itertools",
"reqwest", "reqwest",
"scraper", "scraper",

View File

@ -7,9 +7,11 @@ edition = "2021"
[dependencies] [dependencies]
actix-governor = { version = "0.5.0", features = ["log"] } actix-governor = { version = "0.5.0", features = ["log"] }
actix-web = "4.8.0" actix-web = "4.8.0"
anyhow = "1.0.86"
chrono = "0.4.38" chrono = "0.4.38"
const_format = "0.2.32" const_format = "0.2.32"
dotenvy = "0.15.7" dotenvy = "0.15.7"
futures = "0.3.30"
itertools = "0.13.0" itertools = "0.13.0"
reqwest = "0.12.5" reqwest = "0.12.5"
scraper = "0.19.0" scraper = "0.19.0"

60
src/cache.rs Normal file
View File

@ -0,0 +1,60 @@
use std::{collections::HashMap, sync::Arc};
use chrono::{NaiveDate, Utc};
use futures::StreamExt;
use itertools::Itertools;
use tokio::sync::RwLock;
use crate::{Canteen, Menu};
#[derive(Debug, Clone, Default)]
pub struct MenuCache {
cache: Arc<RwLock<HashMap<(NaiveDate, Canteen), Menu>>>,
}
impl MenuCache {
pub async fn get_combined(&self, canteens: &[Canteen], date: NaiveDate) -> Menu {
futures::stream::iter(canteens)
.then(|canteen| async move { self.get(*canteen, date).await })
.filter_map(|c| async { c })
.fold(Menu::default(), |a, b| async move { a.merged(b) })
.await
}
pub async fn get(&self, canteen: Canteen, date: NaiveDate) -> Option<Menu> {
let query = (date, canteen);
let (is_in_cache, is_cache_too_large) = {
let cache = self.cache.read().await;
(cache.contains_key(&query), cache.len() > 100)
};
if is_cache_too_large {
self.clean_outdated().await;
}
if is_in_cache {
let cache = self.cache.read().await;
Some(cache.get(&query)?.clone())
} else {
let menu = Menu::new(date, canteen).await.ok()?;
self.cache.write().await.insert(query, menu.clone());
Some(menu)
}
}
pub async fn clean_outdated(&self) {
let today = Utc::now().date_naive();
let outdated_keys = self
.cache
.read()
.await
.keys()
.map(|x| x.to_owned())
.filter(|(date, _)| date < &today)
.collect_vec();
let mut cache = self.cache.write().await;
for key in outdated_keys {
cache.remove(&key);
}
}
}

View File

@ -3,7 +3,7 @@ use std::str::FromStr;
use const_format::concatcp; use const_format::concatcp;
use strum::EnumIter; use strum::EnumIter;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter)] #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash)]
pub enum Canteen { pub enum Canteen {
Forum, Forum,
Academica, Academica,

View File

@ -54,7 +54,7 @@ impl Dish {
impl Dish { impl Dish {
pub fn from_element(element: ElementRef, canteen: Canteen) -> Option<Self> { pub fn from_element(element: ElementRef, canteen: Canteen) -> Option<Self> {
let html_name_selector = scraper::Selector::parse(".desc h4").unwrap(); let html_name_selector = scraper::Selector::parse(".desc h4").ok()?;
let name = element let name = element
.select(&html_name_selector) .select(&html_name_selector)
.next()? .next()?
@ -64,11 +64,11 @@ impl Dish {
.trim() .trim()
.to_string(); .to_string();
let img_selector = scraper::Selector::parse(".img img").unwrap(); let img_selector = scraper::Selector::parse(".img img").ok()?;
let img_src_path = element.select(&img_selector).next()?.value().attr("src")?; let img_src_path = element.select(&img_selector).next()?.value().attr("src")?;
let img_src = format!("https://www.studierendenwerk-pb.de/{}", img_src_path); let img_src = format!("https://www.studierendenwerk-pb.de/{}", img_src_path);
let html_price_selector = scraper::Selector::parse(".desc .price").unwrap(); let html_price_selector = scraper::Selector::parse(".desc .price").ok()?;
let mut prices = element let mut prices = element
.select(&html_price_selector) .select(&html_price_selector)
.filter_map(|price| { .filter_map(|price| {
@ -91,7 +91,7 @@ impl Dish {
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").unwrap(); let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").ok()?;
let extras = element let extras = element
.select(&html_extras_selector) .select(&html_extras_selector)
.filter_map(|extra| extra.value().attr("title").map(|title| title.to_string())) .filter_map(|extra| extra.value().attr("title").map(|title| title.to_string()))

View File

@ -1,7 +1,34 @@
mod cache;
mod canteen; mod canteen;
mod dish; mod dish;
mod menu; mod menu;
use std::{error::Error, fmt::Display};
pub use cache::MenuCache;
pub use canteen::Canteen; pub use canteen::Canteen;
pub use dish::Dish; pub use dish::Dish;
pub use menu::Menu; pub use menu::Menu;
#[derive(Debug, Clone)]
struct CustomError(String);
impl Display for CustomError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Error for CustomError {}
impl From<&str> for CustomError {
fn from(s: &str) -> Self {
CustomError(s.to_string())
}
}
impl From<String> for CustomError {
fn from(s: String) -> Self {
CustomError(s)
}
}

View File

@ -4,7 +4,7 @@ use actix_governor::{Governor, GovernorConfigBuilder};
use actix_web::{get, web, App, HttpResponse, HttpServer, Responder}; use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
use chrono::{Duration as CDuration, Utc}; use chrono::{Duration as CDuration, Utc};
use itertools::Itertools; use itertools::Itertools;
use mensa_upb_api::{Canteen, Menu}; use mensa_upb_api::{Canteen, MenuCache};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use strum::IntoEnumIterator; use strum::IntoEnumIterator;
@ -35,9 +35,12 @@ async fn main() -> io::Result<()> {
.finish() .finish()
.unwrap(); .unwrap();
let menu_cache = MenuCache::default();
HttpServer::new(move || { HttpServer::new(move || {
App::new() App::new()
.wrap(Governor::new(&governor_conf)) .wrap(Governor::new(&governor_conf))
.app_data(web::Data::new(menu_cache.clone()))
.service(index) .service(index)
.service(menu_today) .service(menu_today)
}) })
@ -62,7 +65,11 @@ struct MenuQuery {
} }
#[get("/menu/{canteen}")] #[get("/menu/{canteen}")]
async fn menu_today(path: web::Path<String>, query: web::Query<MenuQuery>) -> impl Responder { async fn menu_today(
cache: web::Data<MenuCache>,
path: web::Path<String>,
query: web::Query<MenuQuery>,
) -> impl Responder {
let canteens = path let canteens = path
.into_inner() .into_inner()
.split(',') .split(',')
@ -71,13 +78,9 @@ async fn menu_today(path: web::Path<String>, query: web::Query<MenuQuery>) -> im
if canteens.iter().all(Result::is_ok) { if canteens.iter().all(Result::is_ok) {
let canteens = canteens.into_iter().filter_map(Result::ok).collect_vec(); let canteens = canteens.into_iter().filter_map(Result::ok).collect_vec();
let days_ahead = query.days_ahead.unwrap_or(0); let days_ahead = query.days_ahead.unwrap_or(0);
let date = (Utc::now() + CDuration::days(days_ahead as i64)).date_naive();
let menu = Menu::new( let menu = cache.get_combined(&canteens, date).await;
(Utc::now() + CDuration::days(days_ahead as i64)).date_naive(),
&canteens,
)
.await
.unwrap();
HttpResponse::Ok().json(menu) HttpResponse::Ok().json(menu)
} else { } else {

View File

@ -1,9 +1,10 @@
use anyhow::Result;
use chrono::NaiveDate; use chrono::NaiveDate;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{Canteen, Dish}; use crate::{Canteen, CustomError, Dish};
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Menu { pub struct Menu {
main_dishes: Vec<Dish>, main_dishes: Vec<Dish>,
side_dishes: Vec<Dish>, side_dishes: Vec<Dish>,
@ -11,47 +12,8 @@ pub struct Menu {
} }
impl Menu { impl Menu {
pub async fn new(day: NaiveDate, canteens: &[Canteen]) -> Result<Self, reqwest::Error> { pub async fn new(day: NaiveDate, canteen: Canteen) -> Result<Self> {
let mut main_dishes = Vec::new(); scrape_menu(canteen, day).await
let mut side_dishes = Vec::new();
let mut desserts = Vec::new();
for canteen in canteens.iter().copied() {
let (main, side, des) = scrape_menu(canteen, day).await?;
for dish in main {
if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
main_dishes.push(dish);
}
}
for dish in side {
if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
side_dishes.push(dish);
}
}
for dish in des {
if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
desserts.push(dish);
}
}
}
let compare_name = |a: &Dish, b: &Dish| a.get_name().cmp(b.get_name());
main_dishes.sort_by(compare_name);
side_dishes.sort_by(compare_name);
desserts.sort_by(compare_name);
Ok(Self {
main_dishes,
side_dishes,
desserts,
})
} }
pub fn get_main_dishes(&self) -> &[Dish] { pub fn get_main_dishes(&self) -> &[Dish] {
@ -65,12 +27,47 @@ impl Menu {
pub fn get_desserts(&self) -> &[Dish] { pub fn get_desserts(&self) -> &[Dish] {
&self.desserts &self.desserts
} }
pub fn merged(self, other: Self) -> Self {
let mut main_dishes = self.main_dishes;
let mut side_dishes = self.side_dishes;
let mut desserts = self.desserts;
for dish in other.main_dishes {
if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
main_dishes.push(dish);
}
}
for dish in other.side_dishes {
if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
side_dishes.push(dish);
}
}
for dish in other.desserts {
if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
desserts.push(dish);
}
} }
async fn scrape_menu( main_dishes.sort_by(|a, b| a.get_name().cmp(b.get_name()));
canteen: Canteen, side_dishes.sort_by(|a, b| a.get_name().cmp(b.get_name()));
day: NaiveDate, desserts.sort_by(|a, b| a.get_name().cmp(b.get_name()));
) -> Result<(Vec<Dish>, Vec<Dish>, Vec<Dish>), reqwest::Error> {
Self {
main_dishes,
side_dishes,
desserts,
}
}
}
async fn scrape_menu(canteen: Canteen, day: NaiveDate) -> Result<Menu> {
let url = canteen.get_url(); let url = canteen.get_url();
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let request_builder = client let request_builder = client
@ -84,7 +81,7 @@ async fn scrape_menu(
let html_main_dishes_selector = scraper::Selector::parse( let html_main_dishes_selector = scraper::Selector::parse(
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row", "table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
) )
.unwrap(); .map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_main_dishes = document.select(&html_main_dishes_selector); let html_main_dishes = document.select(&html_main_dishes_selector);
let main_dishes = html_main_dishes let main_dishes = html_main_dishes
.filter_map(|dish| Dish::from_element(dish, canteen)) .filter_map(|dish| Dish::from_element(dish, canteen))
@ -93,7 +90,7 @@ async fn scrape_menu(
let html_side_dishes_selector = scraper::Selector::parse( let html_side_dishes_selector = scraper::Selector::parse(
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row", "table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
) )
.unwrap(); .map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_side_dishes = document.select(&html_side_dishes_selector); let html_side_dishes = document.select(&html_side_dishes_selector);
let side_dishes = html_side_dishes let side_dishes = html_side_dishes
.filter_map(|dish| Dish::from_element(dish, canteen)) .filter_map(|dish| Dish::from_element(dish, canteen))
@ -102,11 +99,15 @@ async fn scrape_menu(
let html_desserts_selector = scraper::Selector::parse( let html_desserts_selector = scraper::Selector::parse(
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row", "table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
) )
.unwrap(); .map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_desserts = document.select(&html_desserts_selector); let html_desserts = document.select(&html_desserts_selector);
let desserts = html_desserts let desserts = html_desserts
.filter_map(|dish| Dish::from_element(dish, canteen)) .filter_map(|dish| Dish::from_element(dish, canteen))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
Ok((main_dishes, side_dishes, desserts)) Ok(Menu {
main_dishes,
side_dishes,
desserts,
})
} }