mensa-upb-api/scraper/src/main.rs

64 lines
1.6 KiB
Rust

use std::{collections::HashSet, env};
use anyhow::Result;
use chrono::{Duration, Utc};
use itertools::Itertools as _;
use mensa_upb_scraper::util;
use shared::Canteen;
use strum::IntoEnumIterator as _;
#[tokio::main]
async fn main() -> Result<()> {
dotenvy::dotenv().ok();
let db = util::get_db()?;
tracing_subscriber::fmt::init();
sqlx::migrate!("../migrations").run(&db).await?;
tracing::info!("Starting up...");
let start_date = Utc::now().date_naive();
let end_date = (Utc::now() + Duration::days(6)).date_naive();
let already_scraped = sqlx::query!(
"SELECT DISTINCT scraped_for, canteen FROM canteens_scraped WHERE scraped_for >= $1 AND scraped_for <= $2",
start_date,
end_date
)
.fetch_all(&db)
.await?
.into_iter()
.map(|r| {
(
r.scraped_for,
r.canteen.parse::<Canteen>().expect("Invalid db entry"),
)
})
.collect::<HashSet<_>>();
let filter_canteens = env::var("FILTER_CANTEENS")
.ok()
.map(|s| {
s.split(',')
.filter_map(|el| el.parse::<Canteen>().ok())
.collect::<HashSet<_>>()
})
.unwrap_or_default();
let date_canteen_combinations = (0..1)
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
.cartesian_product(Canteen::iter())
.filter(|entry @ (_, canteen)| {
!filter_canteens.contains(canteen) && !already_scraped.contains(entry)
})
.collect::<Vec<_>>();
util::scrape_canteens_at_days(&db, &date_canteen_combinations).await?;
tracing::info!("Finished scraping menu");
Ok(())
}