refresh canteens with missing entries
This commit is contained in:
parent
340258e461
commit
83026cfcac
|
|
@ -1,5 +1,6 @@
|
||||||
FROM rust:latest AS chef
|
FROM rust:latest AS chef
|
||||||
RUN cargo install cargo-chef
|
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
|
||||||
|
RUN cargo binstall cargo-chef -y
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
FROM chef AS planner
|
FROM chef AS planner
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,12 @@ mod menu;
|
||||||
mod refresh;
|
mod refresh;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
||||||
use std::{error::Error, fmt::Display};
|
use std::{collections::HashSet, error::Error, fmt::Display, sync::LazyLock};
|
||||||
|
|
||||||
pub use dish::Dish;
|
pub use dish::Dish;
|
||||||
pub use menu::scrape_menu;
|
pub use menu::scrape_menu;
|
||||||
pub use refresh::check_refresh;
|
pub use refresh::check_refresh;
|
||||||
|
use shared::Canteen;
|
||||||
pub use util::scrape_canteens_at_days;
|
pub use util::scrape_canteens_at_days;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
|
@ -33,3 +34,14 @@ impl From<String> for CustomError {
|
||||||
CustomError(s)
|
CustomError(s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub static FILTER_CANTEENS: LazyLock<HashSet<Canteen>> = LazyLock::new(|| {
|
||||||
|
std::env::var("FILTER_CANTEENS")
|
||||||
|
.ok()
|
||||||
|
.map(|s| {
|
||||||
|
s.split(',')
|
||||||
|
.filter_map(|el| el.parse::<Canteen>().ok())
|
||||||
|
.collect::<HashSet<_>>()
|
||||||
|
})
|
||||||
|
.unwrap_or_default()
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
use std::{collections::HashSet, env};
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use chrono::{Duration, Utc};
|
use chrono::{Duration, Utc};
|
||||||
use itertools::Itertools as _;
|
use itertools::Itertools as _;
|
||||||
use mensa_upb_scraper::util;
|
use mensa_upb_scraper::{util, FILTER_CANTEENS};
|
||||||
use shared::Canteen;
|
use shared::Canteen;
|
||||||
use strum::IntoEnumIterator as _;
|
use strum::IntoEnumIterator as _;
|
||||||
|
|
||||||
|
|
@ -38,20 +38,11 @@ async fn main() -> Result<()> {
|
||||||
})
|
})
|
||||||
.collect::<HashSet<_>>();
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
let filter_canteens = env::var("FILTER_CANTEENS")
|
let date_canteen_combinations = (0..7)
|
||||||
.ok()
|
|
||||||
.map(|s| {
|
|
||||||
s.split(',')
|
|
||||||
.filter_map(|el| el.parse::<Canteen>().ok())
|
|
||||||
.collect::<HashSet<_>>()
|
|
||||||
})
|
|
||||||
.unwrap_or_default();
|
|
||||||
|
|
||||||
let date_canteen_combinations = (0..1)
|
|
||||||
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
|
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
|
||||||
.cartesian_product(Canteen::iter())
|
.cartesian_product(Canteen::iter())
|
||||||
.filter(|entry @ (_, canteen)| {
|
.filter(|entry @ (_, canteen)| {
|
||||||
!filter_canteens.contains(canteen) && !already_scraped.contains(entry)
|
!FILTER_CANTEENS.contains(canteen) && !already_scraped.contains(entry)
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,32 @@
|
||||||
use std::{collections::BTreeSet, str::FromStr};
|
use std::{
|
||||||
|
collections::{BTreeSet, HashSet},
|
||||||
|
str::FromStr,
|
||||||
|
sync::LazyLock,
|
||||||
|
};
|
||||||
|
|
||||||
use chrono::{NaiveDate, Utc};
|
use chrono::{NaiveDate, Utc};
|
||||||
|
use itertools::Itertools;
|
||||||
use shared::Canteen;
|
use shared::Canteen;
|
||||||
|
use strum::IntoEnumIterator as _;
|
||||||
|
|
||||||
use crate::util;
|
use crate::util;
|
||||||
|
|
||||||
|
static NON_FILTERED_CANTEENS: LazyLock<Vec<Canteen>> = LazyLock::new(|| {
|
||||||
|
let all_canteens = Canteen::iter().collect::<HashSet<_>>();
|
||||||
|
|
||||||
|
all_canteens
|
||||||
|
.difference(&super::FILTER_CANTEENS)
|
||||||
|
.cloned()
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
});
|
||||||
|
|
||||||
|
#[tracing::instrument(skip(db))]
|
||||||
pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Canteen]) -> bool {
|
pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Canteen]) -> bool {
|
||||||
|
if date > Utc::now().date_naive() + chrono::Duration::days(7) {
|
||||||
|
tracing::debug!("Not refreshing menu for date {date} as it is too far in the future");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
let canteens_needing_refresh = match sqlx::query!(
|
let canteens_needing_refresh = match sqlx::query!(
|
||||||
r#"SELECT canteen, max(scraped_at) AS "scraped_at!" FROM canteens_scraped WHERE canteen = ANY($1) AND scraped_for = $2 GROUP BY canteen"#,
|
r#"SELECT canteen, max(scraped_at) AS "scraped_at!" FROM canteens_scraped WHERE canteen = ANY($1) AND scraped_for = $2 GROUP BY canteen"#,
|
||||||
&canteens
|
&canteens
|
||||||
|
|
@ -17,7 +38,14 @@ pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Cante
|
||||||
.fetch_all(db)
|
.fetch_all(db)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(v) => v.iter().filter_map(|r| if needs_refresh(r.scraped_at, date) { Some(Canteen::from_str(&r.canteen).expect("malformed db canteen entry")) } else { None }).collect::<BTreeSet<_>>(),
|
Ok(v) => v
|
||||||
|
.iter()
|
||||||
|
.map(|r| (Canteen::from_str(&r.canteen).expect("malformed db entry"), Some(r.scraped_at)))
|
||||||
|
.chain(NON_FILTERED_CANTEENS.iter().filter(|c| canteens.contains(c)).map(|c| (*c, None)))
|
||||||
|
.unique_by(|(c, _)| *c)
|
||||||
|
.filter(|(_, scraped_at)| scraped_at.is_none_or(|scraped_at| needs_refresh(scraped_at, date)))
|
||||||
|
.map(|(c, _)| c)
|
||||||
|
.collect::<BTreeSet<_>>(),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
tracing::error!("Error checking for existing scrapes: {}", err);
|
tracing::error!("Error checking for existing scrapes: {}", err);
|
||||||
return false;
|
return false;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
FROM rust:latest AS chef
|
FROM rust:latest AS chef
|
||||||
RUN cargo install cargo-chef
|
RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
|
||||||
|
RUN cargo binstall cargo-chef -y
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
FROM chef AS planner
|
FROM chef AS planner
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,8 @@ async fn main() -> Result<()> {
|
||||||
.with_default_directive(LevelFilter::WARN.into())
|
.with_default_directive(LevelFilter::WARN.into())
|
||||||
.from_env()
|
.from_env()
|
||||||
.expect("Invalid filter")
|
.expect("Invalid filter")
|
||||||
.add_directive("mensa_upb_api=debug".parse().unwrap());
|
.add_directive("mensa_upb_api=debug".parse().unwrap())
|
||||||
|
.add_directive("mensa_upb_scraper=debug".parse().unwrap());
|
||||||
tracing_subscriber::fmt().with_env_filter(env_filter).init();
|
tracing_subscriber::fmt().with_env_filter(env_filter).init();
|
||||||
|
|
||||||
match dotenvy::dotenv() {
|
match dotenvy::dotenv() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue