scrape nutritional values
This commit is contained in:
parent
4729c1afab
commit
8e3dd731c5
|
|
@ -0,0 +1,15 @@
|
|||
-- Add down migration script here
|
||||
|
||||
DROP VIEW IF EXISTS meals_view;
|
||||
|
||||
ALTER TABLE meals
|
||||
DROP COLUMN kjoules;
|
||||
|
||||
ALTER TABLE meals
|
||||
DROP COLUMN proteins;
|
||||
|
||||
ALTER TABLE meals
|
||||
DROP COLUMN carbohydrates;
|
||||
|
||||
ALTER TABLE meals
|
||||
DROP COLUMN fats;
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
-- Add up migration script here
|
||||
|
||||
ALTER TABLE meals
|
||||
ADD COLUMN kjoules INT;
|
||||
|
||||
ALTER TABLE meals
|
||||
ADD COLUMN proteins NUMERIC(6,2);
|
||||
|
||||
ALTER TABLE meals
|
||||
ADD COLUMN carbohydrates NUMERIC(6,2);
|
||||
|
||||
ALTER TABLE meals
|
||||
ADD COLUMN fats NUMERIC(6,2);
|
||||
|
||||
CREATE OR REPLACE VIEW meals_view AS
|
||||
SELECT
|
||||
id,
|
||||
date,
|
||||
canteen,
|
||||
name,
|
||||
dish_type,
|
||||
image_src,
|
||||
price_students,
|
||||
price_employees,
|
||||
price_guests,
|
||||
vegan,
|
||||
vegetarian,
|
||||
kjoules,
|
||||
proteins,
|
||||
carbohydrates,
|
||||
fats,
|
||||
round(kjoules / 4.184) AS kcal
|
||||
FROM meals
|
||||
WHERE is_latest = TRUE;
|
||||
|
|
@ -1,30 +1,52 @@
|
|||
use std::sync::LazyLock;
|
||||
|
||||
use itertools::Itertools;
|
||||
use scraper::ElementRef;
|
||||
use num_bigint::BigInt;
|
||||
use scraper::{ElementRef, Selector};
|
||||
use shared::DishType;
|
||||
use sqlx::types::BigDecimal;
|
||||
|
||||
static IMG_SELECTOR: LazyLock<Selector> =
|
||||
LazyLock::new(|| Selector::parse(".img img").expect("Failed to parse selector"));
|
||||
static HTML_PRICE_SELECTOR: LazyLock<Selector> =
|
||||
LazyLock::new(|| Selector::parse(".desc .price").expect("Failed to parse selector"));
|
||||
static HTML_EXTRAS_SELECTOR: LazyLock<Selector> =
|
||||
LazyLock::new(|| Selector::parse(".desc .buttons > *").expect("Failed to parse selector"));
|
||||
static HTML_NUTRITIONS_SELECTOR: LazyLock<Selector> =
|
||||
LazyLock::new(|| Selector::parse(".nutritions > p").expect("Failed to parse selector"));
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Dish {
|
||||
name: String,
|
||||
image_src: Option<String>,
|
||||
price_students: Option<String>,
|
||||
price_employees: Option<String>,
|
||||
price_guests: Option<String>,
|
||||
price_students: BigDecimal,
|
||||
price_employees: BigDecimal,
|
||||
price_guests: BigDecimal,
|
||||
extras: Vec<String>,
|
||||
dish_type: DishType,
|
||||
pub nutrition_values: NutritionValues,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct NutritionValues {
|
||||
pub kjoule: Option<i64>,
|
||||
pub protein: Option<BigDecimal>,
|
||||
pub carbs: Option<BigDecimal>,
|
||||
pub fat: Option<BigDecimal>,
|
||||
}
|
||||
|
||||
impl Dish {
|
||||
pub fn get_name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
pub fn get_price_students(&self) -> Option<&str> {
|
||||
self.price_students.as_deref()
|
||||
pub fn get_price_students(&self) -> &BigDecimal {
|
||||
&self.price_students
|
||||
}
|
||||
pub fn get_price_employees(&self) -> Option<&str> {
|
||||
self.price_employees.as_deref()
|
||||
pub fn get_price_employees(&self) -> &BigDecimal {
|
||||
&self.price_employees
|
||||
}
|
||||
pub fn get_price_guests(&self) -> Option<&str> {
|
||||
self.price_guests.as_deref()
|
||||
pub fn get_price_guests(&self) -> &BigDecimal {
|
||||
&self.price_guests
|
||||
}
|
||||
pub fn get_image_src(&self) -> Option<&str> {
|
||||
self.image_src.as_deref()
|
||||
|
|
@ -51,8 +73,12 @@ impl Dish {
|
|||
== self.extras.iter().sorted().collect_vec()
|
||||
}
|
||||
|
||||
pub fn from_element(element: ElementRef, dish_type: DishType) -> Option<Self> {
|
||||
let html_name_selector = scraper::Selector::parse(".desc h4").ok()?;
|
||||
pub fn from_element(
|
||||
element: ElementRef,
|
||||
details: ElementRef,
|
||||
dish_type: DishType,
|
||||
) -> Option<Self> {
|
||||
let html_name_selector = Selector::parse(".desc h4").ok()?;
|
||||
let name = element
|
||||
.select(&html_name_selector)
|
||||
.next()?
|
||||
|
|
@ -62,16 +88,14 @@ impl Dish {
|
|||
.trim()
|
||||
.to_string();
|
||||
|
||||
let img_selector = scraper::Selector::parse(".img img").ok()?;
|
||||
let img_src = element.select(&img_selector).next().and_then(|el| {
|
||||
let img_src = element.select(&IMG_SELECTOR).next().and_then(|el| {
|
||||
el.value()
|
||||
.attr("src")
|
||||
.map(|img_src_path| format!("https://www.studierendenwerk-pb.de/{}", img_src_path))
|
||||
});
|
||||
|
||||
let html_price_selector = scraper::Selector::parse(".desc .price").ok()?;
|
||||
let mut prices = element
|
||||
.select(&html_price_selector)
|
||||
.select(&HTML_PRICE_SELECTOR)
|
||||
.filter_map(|price| {
|
||||
let price_for = price.first_child().and_then(|strong| {
|
||||
strong.first_child().and_then(|text_element| {
|
||||
|
|
@ -92,29 +116,64 @@ impl Dish {
|
|||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").ok()?;
|
||||
let extras = element
|
||||
.select(&html_extras_selector)
|
||||
.select(&HTML_EXTRAS_SELECTOR)
|
||||
.filter_map(|extra| extra.value().attr("title").map(|title| title.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let nutritions_element = details.select(&HTML_NUTRITIONS_SELECTOR).next();
|
||||
let nutrition_values = if let Some(nutritions_element) = nutritions_element {
|
||||
let mut kjoule = None;
|
||||
let mut protein = None;
|
||||
let mut carbs = None;
|
||||
let mut fat = None;
|
||||
|
||||
for s in nutritions_element.text() {
|
||||
let s = s.trim();
|
||||
if !s.is_empty() {
|
||||
if let Some(rest) = s.strip_prefix("Brennwert = ") {
|
||||
kjoule = rest
|
||||
.split_whitespace()
|
||||
.next()
|
||||
.and_then(|num_str| num_str.parse().ok());
|
||||
} else if let Some(rest) = s.strip_prefix("Eiweiß = ") {
|
||||
protein = grams_to_bigdecimal(rest);
|
||||
} else if let Some(rest) = s.strip_prefix("Kohlenhydrate = ") {
|
||||
carbs = grams_to_bigdecimal(rest);
|
||||
} else if let Some(rest) = s.strip_prefix("Fett = ") {
|
||||
fat = grams_to_bigdecimal(rest);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NutritionValues {
|
||||
kjoule,
|
||||
protein,
|
||||
carbs,
|
||||
fat,
|
||||
}
|
||||
} else {
|
||||
NutritionValues::default()
|
||||
};
|
||||
|
||||
Some(Self {
|
||||
name,
|
||||
image_src: img_src,
|
||||
price_students: prices
|
||||
.iter_mut()
|
||||
.find(|(price_for, _)| price_for == "Studierende")
|
||||
.map(|(_, price)| std::mem::take(price)),
|
||||
.map(|(_, price)| price_to_bigdecimal(Some(price)))?,
|
||||
price_employees: prices
|
||||
.iter_mut()
|
||||
.find(|(price_for, _)| price_for == "Bedienstete")
|
||||
.map(|(_, price)| std::mem::take(price)),
|
||||
.map(|(_, price)| price_to_bigdecimal(Some(price)))?,
|
||||
price_guests: prices
|
||||
.iter_mut()
|
||||
.find(|(price_for, _)| price_for == "Gäste")
|
||||
.map(|(_, price)| std::mem::take(price)),
|
||||
.map(|(_, price)| price_to_bigdecimal(Some(price)))?,
|
||||
extras,
|
||||
dish_type,
|
||||
nutrition_values,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -124,3 +183,16 @@ impl PartialOrd for Dish {
|
|||
self.name.partial_cmp(&other.name)
|
||||
}
|
||||
}
|
||||
|
||||
fn price_to_bigdecimal(s: Option<&str>) -> BigDecimal {
|
||||
s.and_then(|p| p.trim_end_matches(" €").replace(',', ".").parse().ok())
|
||||
.unwrap_or_else(|| BigDecimal::from_bigint(BigInt::from(99999), 2))
|
||||
}
|
||||
|
||||
fn grams_to_bigdecimal(s: &str) -> Option<BigDecimal> {
|
||||
s.trim_end_matches("g")
|
||||
.replace(',', ".")
|
||||
.trim()
|
||||
.parse()
|
||||
.ok()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ async fn main() -> Result<()> {
|
|||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let date_canteen_combinations = (0..7)
|
||||
let date_canteen_combinations = (0..1)
|
||||
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
|
||||
.cartesian_product(Canteen::iter())
|
||||
.filter(|entry @ (_, canteen)| {
|
||||
|
|
|
|||
|
|
@ -1,9 +1,22 @@
|
|||
use std::sync::LazyLock;
|
||||
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
use scraper::{Html, Selector};
|
||||
use shared::{Canteen, DishType};
|
||||
|
||||
use crate::{canteen::CanteenExt as _, CustomError, Dish};
|
||||
|
||||
static HTML_MAIN_DISHES_TBODY_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
|
||||
Selector::parse("table.table-dishes.main-dishes > tbody").expect("Failed to parse selector")
|
||||
});
|
||||
static HTML_SIDE_DISHES_TBODY_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
|
||||
Selector::parse("table.table-dishes.side-dishes > tbody").expect("Failed to parse selector")
|
||||
});
|
||||
static HTML_DESSERTS_TBODY_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
|
||||
Selector::parse("table.table-dishes.soups > tbody").expect("Failed to parse selector")
|
||||
});
|
||||
|
||||
#[tracing::instrument]
|
||||
pub async fn scrape_menu(date: &NaiveDate, canteen: Canteen) -> Result<Vec<Dish>> {
|
||||
tracing::debug!("Starting scraping");
|
||||
|
|
@ -19,39 +32,41 @@ pub async fn scrape_menu(date: &NaiveDate, canteen: Canteen) -> Result<Vec<Dish>
|
|||
|
||||
let document = scraper::Html::parse_document(&html_content);
|
||||
|
||||
let html_main_dishes_selector = scraper::Selector::parse(
|
||||
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
|
||||
)
|
||||
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||
let html_main_dishes = document.select(&html_main_dishes_selector);
|
||||
let main_dishes = html_main_dishes
|
||||
.filter_map(|dish| Dish::from_element(dish, DishType::Main))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let html_side_dishes_selector = scraper::Selector::parse(
|
||||
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
|
||||
)
|
||||
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||
let html_side_dishes = document.select(&html_side_dishes_selector);
|
||||
let side_dishes = html_side_dishes
|
||||
.filter_map(|dish| Dish::from_element(dish, DishType::Side))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let html_desserts_selector = scraper::Selector::parse(
|
||||
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
|
||||
)
|
||||
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||
let html_desserts = document.select(&html_desserts_selector);
|
||||
let desserts = html_desserts
|
||||
.filter_map(|dish| Dish::from_element(dish, DishType::Dessert))
|
||||
.collect::<Vec<_>>();
|
||||
let main_dishes = scrape_category(&document, &HTML_MAIN_DISHES_TBODY_SELECTOR, DishType::Main)?;
|
||||
let side_dishes = scrape_category(&document, &HTML_SIDE_DISHES_TBODY_SELECTOR, DishType::Side)?;
|
||||
let desserts = scrape_category(&document, &HTML_DESSERTS_TBODY_SELECTOR, DishType::Dessert)?;
|
||||
|
||||
let mut res = Vec::new();
|
||||
res.extend(main_dishes);
|
||||
res.extend(side_dishes);
|
||||
res.extend(desserts);
|
||||
|
||||
dbg!(&res);
|
||||
|
||||
tracing::debug!("Finished scraping");
|
||||
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
static ITEM_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
|
||||
Selector::parse("tr.odd > td.description > div.row").expect("Failed to parse selector")
|
||||
});
|
||||
static ITEM_DETAILS_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
|
||||
Selector::parse("tr.even > td.more > div.ingredients-list").expect("Failed to parse selector")
|
||||
});
|
||||
|
||||
fn scrape_category<'a>(
|
||||
document: &'a Html,
|
||||
tbody_selector: &Selector,
|
||||
dish_type: DishType,
|
||||
) -> Result<impl Iterator<Item = Dish> + 'a> {
|
||||
let tbody = document.select(tbody_selector).next().ok_or_else(|| {
|
||||
CustomError::from(format!("No tbody found for selector: {:?}", tbody_selector))
|
||||
})?;
|
||||
let dishes = tbody.select(&ITEM_SELECTOR);
|
||||
let dish_details = tbody.select(&ITEM_DETAILS_SELECTOR);
|
||||
|
||||
Ok(dishes
|
||||
.zip(dish_details)
|
||||
.filter_map(move |(dish, details)| Dish::from_element(dish, details, dish_type)))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@ use std::env;
|
|||
use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
use futures::StreamExt as _;
|
||||
use num_bigint::BigInt;
|
||||
use shared::{Canteen, DishType};
|
||||
use sqlx::{postgres::PgPoolOptions, types::BigDecimal, PgPool, PgTransaction};
|
||||
use sqlx::{postgres::PgPoolOptions, PgPool, PgTransaction};
|
||||
|
||||
use crate::{scrape_menu, Dish};
|
||||
|
||||
|
|
@ -71,7 +70,7 @@ pub async fn add_menu_to_db(
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
let mut query = sqlx::QueryBuilder::new("INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian) ");
|
||||
let mut query = sqlx::QueryBuilder::new("INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian,kjoules,proteins,carbohydrates,fats) ");
|
||||
|
||||
query
|
||||
.push_values(menu, |mut sep, item| {
|
||||
|
|
@ -82,11 +81,15 @@ pub async fn add_menu_to_db(
|
|||
.push_bind(item.get_name().to_string())
|
||||
.push_bind(item.get_type() as DishType)
|
||||
.push_bind(item.get_image_src().map(str::to_string))
|
||||
.push_bind(price_to_bigdecimal(item.get_price_students()))
|
||||
.push_bind(price_to_bigdecimal(item.get_price_employees()))
|
||||
.push_bind(price_to_bigdecimal(item.get_price_guests()))
|
||||
.push_bind(item.get_price_students().to_owned())
|
||||
.push_bind(item.get_price_employees().to_owned())
|
||||
.push_bind(item.get_price_guests().to_owned())
|
||||
.push_bind(vegan)
|
||||
.push_bind(vegan || item.is_vegetarian());
|
||||
.push_bind(vegan || item.is_vegetarian())
|
||||
.push_bind(item.nutrition_values.kjoule)
|
||||
.push_bind(item.nutrition_values.protein.to_owned())
|
||||
.push_bind(item.nutrition_values.carbs.to_owned())
|
||||
.push_bind(item.nutrition_values.fat.to_owned());
|
||||
})
|
||||
.build()
|
||||
.execute(&mut **db)
|
||||
|
|
@ -104,8 +107,3 @@ pub async fn add_menu_to_db(
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn price_to_bigdecimal(s: Option<&str>) -> BigDecimal {
|
||||
s.and_then(|p| p.trim_end_matches(" €").replace(',', ".").parse().ok())
|
||||
.unwrap_or_else(|| BigDecimal::from_bigint(BigInt::from(99999), 2))
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue