refresh canteens with missing entries
This commit is contained in:
parent
340258e461
commit
83026cfcac
|
|
@ -1,5 +1,6 @@
|
|||
FROM rust:latest AS chef
|
||||
RUN cargo install cargo-chef
|
||||
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
|
||||
RUN cargo binstall cargo-chef -y
|
||||
WORKDIR /app
|
||||
|
||||
FROM chef AS planner
|
||||
|
|
|
|||
|
|
@ -4,11 +4,12 @@ mod menu;
|
|||
mod refresh;
|
||||
pub mod util;
|
||||
|
||||
use std::{error::Error, fmt::Display};
|
||||
use std::{collections::HashSet, error::Error, fmt::Display, sync::LazyLock};
|
||||
|
||||
pub use dish::Dish;
|
||||
pub use menu::scrape_menu;
|
||||
pub use refresh::check_refresh;
|
||||
use shared::Canteen;
|
||||
pub use util::scrape_canteens_at_days;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
|
@ -33,3 +34,14 @@ impl From<String> for CustomError {
|
|||
CustomError(s)
|
||||
}
|
||||
}
|
||||
|
||||
pub static FILTER_CANTEENS: LazyLock<HashSet<Canteen>> = LazyLock::new(|| {
|
||||
std::env::var("FILTER_CANTEENS")
|
||||
.ok()
|
||||
.map(|s| {
|
||||
s.split(',')
|
||||
.filter_map(|el| el.parse::<Canteen>().ok())
|
||||
.collect::<HashSet<_>>()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use std::{collections::HashSet, env};
|
||||
use std::collections::HashSet;
|
||||
|
||||
use anyhow::Result;
|
||||
use chrono::{Duration, Utc};
|
||||
use itertools::Itertools as _;
|
||||
use mensa_upb_scraper::util;
|
||||
use mensa_upb_scraper::{util, FILTER_CANTEENS};
|
||||
use shared::Canteen;
|
||||
use strum::IntoEnumIterator as _;
|
||||
|
||||
|
|
@ -38,20 +38,11 @@ async fn main() -> Result<()> {
|
|||
})
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let filter_canteens = env::var("FILTER_CANTEENS")
|
||||
.ok()
|
||||
.map(|s| {
|
||||
s.split(',')
|
||||
.filter_map(|el| el.parse::<Canteen>().ok())
|
||||
.collect::<HashSet<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let date_canteen_combinations = (0..1)
|
||||
let date_canteen_combinations = (0..7)
|
||||
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
|
||||
.cartesian_product(Canteen::iter())
|
||||
.filter(|entry @ (_, canteen)| {
|
||||
!filter_canteens.contains(canteen) && !already_scraped.contains(entry)
|
||||
!FILTER_CANTEENS.contains(canteen) && !already_scraped.contains(entry)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,32 @@
|
|||
use std::{collections::BTreeSet, str::FromStr};
|
||||
use std::{
|
||||
collections::{BTreeSet, HashSet},
|
||||
str::FromStr,
|
||||
sync::LazyLock,
|
||||
};
|
||||
|
||||
use chrono::{NaiveDate, Utc};
|
||||
use itertools::Itertools;
|
||||
use shared::Canteen;
|
||||
use strum::IntoEnumIterator as _;
|
||||
|
||||
use crate::util;
|
||||
|
||||
static NON_FILTERED_CANTEENS: LazyLock<Vec<Canteen>> = LazyLock::new(|| {
|
||||
let all_canteens = Canteen::iter().collect::<HashSet<_>>();
|
||||
|
||||
all_canteens
|
||||
.difference(&super::FILTER_CANTEENS)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
|
||||
#[tracing::instrument(skip(db))]
|
||||
pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Canteen]) -> bool {
|
||||
if date > Utc::now().date_naive() + chrono::Duration::days(7) {
|
||||
tracing::debug!("Not refreshing menu for date {date} as it is too far in the future");
|
||||
return false;
|
||||
}
|
||||
|
||||
let canteens_needing_refresh = match sqlx::query!(
|
||||
r#"SELECT canteen, max(scraped_at) AS "scraped_at!" FROM canteens_scraped WHERE canteen = ANY($1) AND scraped_for = $2 GROUP BY canteen"#,
|
||||
&canteens
|
||||
|
|
@ -17,7 +38,14 @@ pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Cante
|
|||
.fetch_all(db)
|
||||
.await
|
||||
{
|
||||
Ok(v) => v.iter().filter_map(|r| if needs_refresh(r.scraped_at, date) { Some(Canteen::from_str(&r.canteen).expect("malformed db canteen entry")) } else { None }).collect::<BTreeSet<_>>(),
|
||||
Ok(v) => v
|
||||
.iter()
|
||||
.map(|r| (Canteen::from_str(&r.canteen).expect("malformed db entry"), Some(r.scraped_at)))
|
||||
.chain(NON_FILTERED_CANTEENS.iter().filter(|c| canteens.contains(c)).map(|c| (*c, None)))
|
||||
.unique_by(|(c, _)| *c)
|
||||
.filter(|(_, scraped_at)| scraped_at.is_none_or(|scraped_at| needs_refresh(scraped_at, date)))
|
||||
.map(|(c, _)| c)
|
||||
.collect::<BTreeSet<_>>(),
|
||||
Err(err) => {
|
||||
tracing::error!("Error checking for existing scrapes: {}", err);
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
|
||||
FROM rust:latest AS chef
|
||||
RUN cargo install cargo-chef
|
||||
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
|
||||
RUN cargo binstall cargo-chef -y
|
||||
WORKDIR /app
|
||||
|
||||
FROM chef AS planner
|
||||
|
|
|
|||
|
|
@ -19,7 +19,8 @@ async fn main() -> Result<()> {
|
|||
.with_default_directive(LevelFilter::WARN.into())
|
||||
.from_env()
|
||||
.expect("Invalid filter")
|
||||
.add_directive("mensa_upb_api=debug".parse().unwrap());
|
||||
.add_directive("mensa_upb_api=debug".parse().unwrap())
|
||||
.add_directive("mensa_upb_scraper=debug".parse().unwrap());
|
||||
tracing_subscriber::fmt().with_env_filter(env_filter).init();
|
||||
|
||||
match dotenvy::dotenv() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue