refresh canteens with missing entries

This commit is contained in:
Moritz Hölting 2025-12-17 12:52:03 +01:00
parent 340258e461
commit 83026cfcac
6 changed files with 53 additions and 19 deletions

View File

@ -1,5 +1,6 @@
FROM rust:latest AS chef
RUN cargo install cargo-chef
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
RUN cargo binstall cargo-chef -y
WORKDIR /app
FROM chef AS planner

View File

@ -4,11 +4,12 @@ mod menu;
mod refresh;
pub mod util;
use std::{error::Error, fmt::Display};
use std::{collections::HashSet, error::Error, fmt::Display, sync::LazyLock};
pub use dish::Dish;
pub use menu::scrape_menu;
pub use refresh::check_refresh;
use shared::Canteen;
pub use util::scrape_canteens_at_days;
#[derive(Debug, Clone)]
@ -33,3 +34,14 @@ impl From<String> for CustomError {
CustomError(s)
}
}
pub static FILTER_CANTEENS: LazyLock<HashSet<Canteen>> = LazyLock::new(|| {
std::env::var("FILTER_CANTEENS")
.ok()
.map(|s| {
s.split(',')
.filter_map(|el| el.parse::<Canteen>().ok())
.collect::<HashSet<_>>()
})
.unwrap_or_default()
});

View File

@ -1,9 +1,9 @@
use std::{collections::HashSet, env};
use std::collections::HashSet;
use anyhow::Result;
use chrono::{Duration, Utc};
use itertools::Itertools as _;
use mensa_upb_scraper::util;
use mensa_upb_scraper::{util, FILTER_CANTEENS};
use shared::Canteen;
use strum::IntoEnumIterator as _;
@ -38,20 +38,11 @@ async fn main() -> Result<()> {
})
.collect::<HashSet<_>>();
let filter_canteens = env::var("FILTER_CANTEENS")
.ok()
.map(|s| {
s.split(',')
.filter_map(|el| el.parse::<Canteen>().ok())
.collect::<HashSet<_>>()
})
.unwrap_or_default();
let date_canteen_combinations = (0..1)
let date_canteen_combinations = (0..7)
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
.cartesian_product(Canteen::iter())
.filter(|entry @ (_, canteen)| {
!filter_canteens.contains(canteen) && !already_scraped.contains(entry)
!FILTER_CANTEENS.contains(canteen) && !already_scraped.contains(entry)
})
.collect::<Vec<_>>();

View File

@ -1,11 +1,32 @@
use std::{collections::BTreeSet, str::FromStr};
use std::{
collections::{BTreeSet, HashSet},
str::FromStr,
sync::LazyLock,
};
use chrono::{NaiveDate, Utc};
use itertools::Itertools;
use shared::Canteen;
use strum::IntoEnumIterator as _;
use crate::util;
static NON_FILTERED_CANTEENS: LazyLock<Vec<Canteen>> = LazyLock::new(|| {
let all_canteens = Canteen::iter().collect::<HashSet<_>>();
all_canteens
.difference(&super::FILTER_CANTEENS)
.cloned()
.collect::<Vec<_>>()
});
#[tracing::instrument(skip(db))]
pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Canteen]) -> bool {
if date > Utc::now().date_naive() + chrono::Duration::days(7) {
tracing::debug!("Not refreshing menu for date {date} as it is too far in the future");
return false;
}
let canteens_needing_refresh = match sqlx::query!(
r#"SELECT canteen, max(scraped_at) AS "scraped_at!" FROM canteens_scraped WHERE canteen = ANY($1) AND scraped_for = $2 GROUP BY canteen"#,
&canteens
@ -17,7 +38,14 @@ pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Cante
.fetch_all(db)
.await
{
Ok(v) => v.iter().filter_map(|r| if needs_refresh(r.scraped_at, date) { Some(Canteen::from_str(&r.canteen).expect("malformed db canteen entry")) } else { None }).collect::<BTreeSet<_>>(),
Ok(v) => v
.iter()
.map(|r| (Canteen::from_str(&r.canteen).expect("malformed db entry"), Some(r.scraped_at)))
.chain(NON_FILTERED_CANTEENS.iter().filter(|c| canteens.contains(c)).map(|c| (*c, None)))
.unique_by(|(c, _)| *c)
.filter(|(_, scraped_at)| scraped_at.is_none_or(|scraped_at| needs_refresh(scraped_at, date)))
.map(|(c, _)| c)
.collect::<BTreeSet<_>>(),
Err(err) => {
tracing::error!("Error checking for existing scrapes: {}", err);
return false;

View File

@ -1,6 +1,7 @@
FROM rust:latest AS chef
RUN cargo install cargo-chef
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
RUN cargo binstall cargo-chef -y
WORKDIR /app
FROM chef AS planner

View File

@ -19,7 +19,8 @@ async fn main() -> Result<()> {
.with_default_directive(LevelFilter::WARN.into())
.from_env()
.expect("Invalid filter")
.add_directive("mensa_upb_api=debug".parse().unwrap());
.add_directive("mensa_upb_api=debug".parse().unwrap())
.add_directive("mensa_upb_scraper=debug".parse().unwrap());
tracing_subscriber::fmt().with_env_filter(env_filter).init();
match dotenvy::dotenv() {