only update outdated db entries
This commit is contained in:
parent
8a1df4946d
commit
c56fe5a8a8
|
|
@ -0,0 +1,106 @@
|
|||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT canteen, name, image_src, price_students, price_employees, price_guests, vegetarian, vegan, dish_type AS \"dish_type: DishType\", kjoules, proteins, carbohydrates, fats FROM meals WHERE date = $1 AND is_latest = TRUE AND canteen = ANY($2)",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "canteen",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "name",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "image_src",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 3,
|
||||
"name": "price_students",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 4,
|
||||
"name": "price_employees",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 5,
|
||||
"name": "price_guests",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 6,
|
||||
"name": "vegetarian",
|
||||
"type_info": "Bool"
|
||||
},
|
||||
{
|
||||
"ordinal": 7,
|
||||
"name": "vegan",
|
||||
"type_info": "Bool"
|
||||
},
|
||||
{
|
||||
"ordinal": 8,
|
||||
"name": "dish_type: DishType",
|
||||
"type_info": {
|
||||
"Custom": {
|
||||
"name": "dish_type_enum",
|
||||
"kind": {
|
||||
"Enum": [
|
||||
"main",
|
||||
"side",
|
||||
"dessert"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"ordinal": 9,
|
||||
"name": "kjoules",
|
||||
"type_info": "Int4"
|
||||
},
|
||||
{
|
||||
"ordinal": 10,
|
||||
"name": "proteins",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 11,
|
||||
"name": "carbohydrates",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 12,
|
||||
"name": "fats",
|
||||
"type_info": "Numeric"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Date",
|
||||
"TextArray"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true
|
||||
]
|
||||
},
|
||||
"hash": "87707bff13b4ce6ff47d2f79ee5d40b677042a20c217acc347ecdd04ebf3e6e0"
|
||||
}
|
||||
|
|
@ -1,11 +1,12 @@
|
|||
use std::sync::LazyLock;
|
||||
|
||||
use itertools::Itertools;
|
||||
use num_bigint::BigInt;
|
||||
use scraper::{ElementRef, Selector};
|
||||
use shared::DishType;
|
||||
use sqlx::types::BigDecimal;
|
||||
|
||||
use crate::util::normalize_price_bigdecimal;
|
||||
|
||||
static IMG_SELECTOR: LazyLock<Selector> =
|
||||
LazyLock::new(|| Selector::parse(".img img").expect("Failed to parse selector"));
|
||||
static HTML_PRICE_SELECTOR: LazyLock<Selector> =
|
||||
|
|
@ -15,19 +16,20 @@ static HTML_EXTRAS_SELECTOR: LazyLock<Selector> =
|
|||
static HTML_NUTRITIONS_SELECTOR: LazyLock<Selector> =
|
||||
LazyLock::new(|| Selector::parse(".nutritions > p").expect("Failed to parse selector"));
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Dish {
|
||||
name: String,
|
||||
image_src: Option<String>,
|
||||
price_students: BigDecimal,
|
||||
price_employees: BigDecimal,
|
||||
price_guests: BigDecimal,
|
||||
extras: Vec<String>,
|
||||
dish_type: DishType,
|
||||
pub name: String,
|
||||
pub image_src: Option<String>,
|
||||
pub price_students: BigDecimal,
|
||||
pub price_employees: BigDecimal,
|
||||
pub price_guests: BigDecimal,
|
||||
pub vegetarian: bool,
|
||||
pub vegan: bool,
|
||||
pub dish_type: DishType,
|
||||
pub nutrition_values: NutritionValues,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
|
||||
pub struct NutritionValues {
|
||||
pub kjoule: Option<i32>,
|
||||
pub protein: Option<BigDecimal>,
|
||||
|
|
@ -52,13 +54,10 @@ impl Dish {
|
|||
self.image_src.as_deref()
|
||||
}
|
||||
pub fn is_vegan(&self) -> bool {
|
||||
self.extras.contains(&"vegan".to_string())
|
||||
self.vegan
|
||||
}
|
||||
pub fn is_vegetarian(&self) -> bool {
|
||||
self.extras.contains(&"vegetarisch".to_string())
|
||||
}
|
||||
pub fn get_extras(&self) -> &[String] {
|
||||
&self.extras
|
||||
self.vegetarian
|
||||
}
|
||||
pub fn get_type(&self) -> DishType {
|
||||
self.dish_type
|
||||
|
|
@ -69,8 +68,9 @@ impl Dish {
|
|||
&& self.price_employees == other.price_employees
|
||||
&& self.price_guests == other.price_guests
|
||||
&& self.price_students == other.price_students
|
||||
&& self.extras.iter().sorted().collect_vec()
|
||||
== self.extras.iter().sorted().collect_vec()
|
||||
&& self.vegan == other.vegan
|
||||
&& self.vegetarian == other.vegetarian
|
||||
&& self.dish_type == other.dish_type
|
||||
}
|
||||
|
||||
pub fn from_element(
|
||||
|
|
@ -156,6 +156,8 @@ impl Dish {
|
|||
NutritionValues::default()
|
||||
};
|
||||
|
||||
let vegan = extras.contains(&"vegan".to_string());
|
||||
|
||||
Some(Self {
|
||||
name,
|
||||
image_src: img_src,
|
||||
|
|
@ -171,13 +173,25 @@ impl Dish {
|
|||
.iter_mut()
|
||||
.find(|(price_for, _)| price_for == "Gäste")
|
||||
.map(|(_, price)| price_to_bigdecimal(Some(price)))?,
|
||||
extras,
|
||||
vegetarian: vegan || extras.contains(&"vegetarisch".to_string()),
|
||||
vegan,
|
||||
dish_type,
|
||||
nutrition_values,
|
||||
nutrition_values: nutrition_values.normalize(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl NutritionValues {
|
||||
pub fn normalize(self) -> Self {
|
||||
Self {
|
||||
kjoule: self.kjoule,
|
||||
protein: self.protein.map(|p| p.with_prec(6).with_scale(2)),
|
||||
carbs: self.carbs.map(|c| c.with_prec(6).with_scale(2)),
|
||||
fat: self.fat.map(|f| f.with_prec(6).with_scale(2)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Dish {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.name.partial_cmp(&other.name)
|
||||
|
|
@ -185,7 +199,13 @@ impl PartialOrd for Dish {
|
|||
}
|
||||
|
||||
fn price_to_bigdecimal(s: Option<&str>) -> BigDecimal {
|
||||
s.and_then(|p| p.trim_end_matches(" €").replace(',', ".").parse().ok())
|
||||
s.and_then(|p| {
|
||||
p.trim_end_matches(" €")
|
||||
.replace(',', ".")
|
||||
.parse::<BigDecimal>()
|
||||
.ok()
|
||||
})
|
||||
.map(normalize_price_bigdecimal)
|
||||
.unwrap_or_else(|| BigDecimal::from_bigint(BigInt::from(99999), 2))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ pub use dish::Dish;
|
|||
pub use menu::scrape_menu;
|
||||
pub use refresh::check_refresh;
|
||||
use shared::Canteen;
|
||||
pub use util::scrape_canteens_at_days;
|
||||
pub use util::scrape_canteens_at_days_and_insert;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CustomError(String);
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ async fn main() -> Result<()> {
|
|||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
util::scrape_canteens_at_days(&db, &date_canteen_combinations).await?;
|
||||
util::scrape_canteens_at_days_and_insert(&db, &date_canteen_combinations).await?;
|
||||
|
||||
tracing::info!("Finished scraping menu");
|
||||
|
||||
|
|
|
|||
|
|
@ -5,11 +5,17 @@ use std::{
|
|||
};
|
||||
|
||||
use chrono::{NaiveDate, Utc};
|
||||
use futures::{StreamExt, TryStreamExt as _};
|
||||
use itertools::Itertools;
|
||||
use shared::Canteen;
|
||||
use shared::{Canteen, DishType};
|
||||
use sqlx::QueryBuilder;
|
||||
use strum::IntoEnumIterator as _;
|
||||
|
||||
use crate::util;
|
||||
use crate::{
|
||||
dish::NutritionValues,
|
||||
util::{self, add_menu_to_db, normalize_price_bigdecimal},
|
||||
Dish,
|
||||
};
|
||||
|
||||
static NON_FILTERED_CANTEENS: LazyLock<Vec<Canteen>> = LazyLock::new(|| {
|
||||
let all_canteens = Canteen::iter().collect::<HashSet<_>>();
|
||||
|
|
@ -61,21 +67,71 @@ pub async fn check_refresh(db: &sqlx::PgPool, date: NaiveDate, canteens: &[Cante
|
|||
canteens_needing_refresh
|
||||
);
|
||||
|
||||
if let Err(err) = util::scrape_canteens_at_days(
|
||||
db,
|
||||
&canteens_needing_refresh
|
||||
let canteen_date_pairs = canteens_needing_refresh
|
||||
.iter()
|
||||
.map(|c| (date, *c))
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::error!("Error during refresh scrape: {}", err);
|
||||
return false;
|
||||
}
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let scraped_dishes = util::scrape_canteens_at_days(&canteen_date_pairs)
|
||||
.filter_map(|res| async move { res.ok() })
|
||||
.flat_map(|(_, canteen, menu)| {
|
||||
futures::stream::iter(menu).map(move |dish| (canteen, dish))
|
||||
})
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let db_data = sqlx::query!(
|
||||
r#"SELECT canteen, name, image_src, price_students, price_employees, price_guests, vegetarian, vegan, dish_type AS "dish_type: DishType", kjoules, proteins, carbohydrates, fats FROM meals WHERE date = $1 AND is_latest = TRUE AND canteen = ANY($2)"#,
|
||||
date,
|
||||
&canteens_needing_refresh
|
||||
.iter()
|
||||
.map(|c| c.get_identifier().to_string())
|
||||
.collect::<Vec<_>>(),
|
||||
).map(|r| {
|
||||
(
|
||||
Canteen::from_str(&r.canteen).expect("malformed db entry") ,
|
||||
Dish {
|
||||
name: r.name,
|
||||
image_src: r.image_src,
|
||||
price_students: normalize_price_bigdecimal(r.price_students),
|
||||
price_employees: normalize_price_bigdecimal(r.price_employees),
|
||||
price_guests: normalize_price_bigdecimal(r.price_guests),
|
||||
vegetarian: r.vegetarian,
|
||||
vegan: r.vegan,
|
||||
dish_type: r.dish_type,
|
||||
nutrition_values: NutritionValues {
|
||||
kjoule: r.kjoules,
|
||||
protein: r.proteins,
|
||||
carbs: r.carbohydrates,
|
||||
fat: r.fats,
|
||||
}.normalize(),
|
||||
}
|
||||
)
|
||||
}).fetch(db).try_collect::<HashSet<_>>();
|
||||
|
||||
let (scraped_dishes, db_data) = futures::join!(scraped_dishes, db_data);
|
||||
|
||||
match db_data {
|
||||
Ok(db_dishes) => {
|
||||
let stale_dishes = db_dishes
|
||||
.difference(&scraped_dishes)
|
||||
.collect::<HashSet<_>>();
|
||||
let new_dishes = scraped_dishes
|
||||
.difference(&db_dishes)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
if let Err(err) = update_stale_dishes(db, date, &stale_dishes, &new_dishes).await {
|
||||
tracing::error!("Error updating stale dishes in db: {}", err);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!("Error fetching existing dishes from db: {}", err);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn needs_refresh(last_refreshed: chrono::DateTime<Utc>, date_entry: chrono::NaiveDate) -> bool {
|
||||
|
|
@ -89,3 +145,44 @@ fn needs_refresh(last_refreshed: chrono::DateTime<Utc>, date_entry: chrono::Naiv
|
|||
now.signed_duration_since(last_refreshed) >= chrono::Duration::days(2)
|
||||
}
|
||||
}
|
||||
|
||||
async fn update_stale_dishes(
|
||||
db: &sqlx::PgPool,
|
||||
date: NaiveDate,
|
||||
stale_dishes: &HashSet<&(Canteen, Dish)>,
|
||||
new_dishes: &HashSet<&(Canteen, Dish)>,
|
||||
) -> Result<(), sqlx::Error> {
|
||||
let mut tx = db.begin().await?;
|
||||
|
||||
QueryBuilder::new("UPDATE meals SET is_latest = FALSE WHERE date = ")
|
||||
.push_bind(date)
|
||||
.push(r#" AND ("name", canteen) IN "#)
|
||||
.push_tuples(stale_dishes, |mut sep, (canteen, dish)| {
|
||||
sep.push_bind(&dish.name)
|
||||
.push_bind(canteen.get_identifier());
|
||||
})
|
||||
.push(";")
|
||||
.build()
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
let chunks = new_dishes
|
||||
.iter()
|
||||
.sorted_by_key(|(c, _)| c)
|
||||
.chunk_by(|(c, _)| c);
|
||||
|
||||
let new_dishes_iter = chunks.into_iter().map(|(canteen, g)| {
|
||||
(
|
||||
*canteen,
|
||||
g.map(|(_, dish)| dish).cloned().collect::<Vec<_>>(),
|
||||
)
|
||||
});
|
||||
|
||||
for (canteen, menu) in new_dishes_iter {
|
||||
add_menu_to_db(&mut tx, &date, canteen, menu).await?;
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ use std::env;
|
|||
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
use futures::StreamExt as _;
|
||||
use futures::{Stream, StreamExt as _};
|
||||
use shared::{Canteen, DishType};
|
||||
use sqlx::{postgres::PgPoolOptions, PgPool, PgTransaction};
|
||||
use sqlx::{postgres::PgPoolOptions, types::BigDecimal, PgPool, PgTransaction};
|
||||
|
||||
use crate::{scrape_menu, Dish};
|
||||
|
||||
|
|
@ -13,7 +13,7 @@ pub fn get_db() -> Result<PgPool> {
|
|||
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?)
|
||||
}
|
||||
|
||||
pub async fn scrape_canteens_at_days(
|
||||
pub async fn scrape_canteens_at_days_and_insert(
|
||||
db: &PgPool,
|
||||
date_canteen_combinations: &[(NaiveDate, Canteen)],
|
||||
) -> Result<()> {
|
||||
|
|
@ -40,26 +40,45 @@ pub async fn scrape_canteens_at_days(
|
|||
transaction.commit().await
|
||||
});
|
||||
|
||||
futures::stream::iter(date_canteen_combinations)
|
||||
.then(|(date, canteen)| async move { (*date, *canteen, scrape_menu(date, *canteen).await) })
|
||||
.filter_map(
|
||||
|(date, canteen, menu)| async move { menu.ok().map(|menu| (date, canteen, menu)) },
|
||||
)
|
||||
.for_each(|(date, canteen, menu)| {
|
||||
let errs = scrape_canteens_at_days(date_canteen_combinations)
|
||||
.then(|res| {
|
||||
let tx = tx.clone();
|
||||
async move {
|
||||
match res {
|
||||
Ok((date, canteen, menu)) => {
|
||||
tx.send((date, canteen, menu)).await.ok();
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!("Error scraping menu: {err}");
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.await;
|
||||
|
||||
drop(tx);
|
||||
|
||||
insert_handle.await??;
|
||||
|
||||
if let Some(err) = errs.into_iter().find_map(Result::err) {
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn scrape_canteens_at_days<'a>(
|
||||
date_canteen_combinations: &'a [(NaiveDate, Canteen)],
|
||||
) -> impl Stream<Item = Result<(NaiveDate, Canteen, Vec<Dish>)>> + 'a {
|
||||
futures::stream::iter(date_canteen_combinations).then(|(date, canteen)| async move {
|
||||
scrape_menu(date, *canteen)
|
||||
.await
|
||||
.map(|menu| (*date, *canteen, menu))
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn add_menu_to_db(
|
||||
db: &mut PgTransaction<'_>,
|
||||
date: &NaiveDate,
|
||||
|
|
@ -107,3 +126,7 @@ pub async fn add_menu_to_db(
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn normalize_price_bigdecimal(price: BigDecimal) -> BigDecimal {
|
||||
price.with_prec(6).with_scale(2)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ use std::fmt::Display;
|
|||
mod canteen;
|
||||
pub use canteen::Canteen;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, sqlx::Type)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, sqlx::Type)]
|
||||
#[sqlx(type_name = "dish_type_enum")]
|
||||
#[sqlx(rename_all = "lowercase")]
|
||||
pub enum DishType {
|
||||
|
|
|
|||
Loading…
Reference in New Issue