make scraper work on new canteen website

This commit is contained in:
Moritz Hölting 2026-03-07 16:14:18 +01:00
parent 675b5bb7db
commit eec24bd287
19 changed files with 434 additions and 539 deletions

View File

@ -1,6 +1,6 @@
{
"db_name": "PostgreSQL",
"query": "SELECT canteen, name, image_src, price_students, price_employees, price_guests, vegetarian, vegan, dish_type AS \"dish_type: DishType\", kjoules, proteins, carbohydrates, fats FROM meals WHERE date = $1 AND is_latest = TRUE AND canteen = ANY($2)",
"query": "SELECT canteen, name, image_src, price_students, price_employees, price_guests, vegetarian, vegan, dish_type AS \"dish_type: DishType\", kjoules, proteins, carbohydrates, fats, saturated_fats FROM meals WHERE date = $1 AND is_latest = TRUE AND canteen = ANY($2)",
"describe": {
"columns": [
{
@ -53,7 +53,9 @@
"Enum": [
"main",
"side",
"dessert"
"soup",
"dessert",
"other"
]
}
}
@ -78,6 +80,11 @@
"ordinal": 12,
"name": "fats",
"type_info": "Numeric"
},
{
"ordinal": 13,
"name": "saturated_fats",
"type_info": "Numeric"
}
],
"parameters": {
@ -99,8 +106,9 @@
true,
true,
true,
true,
true
]
},
"hash": "87707bff13b4ce6ff47d2f79ee5d40b677042a20c217acc347ecdd04ebf3e6e0"
"hash": "a640594c466fb9386a26a1273cc80eb560abee2849e47cc3584a09cc93e1137f"
}

View File

@ -1,6 +1,6 @@
{
"db_name": "PostgreSQL",
"query": "SELECT kjoules, proteins, carbohydrates, fats FROM meals m WHERE is_latest = TRUE AND LOWER(\"name\") = $1 AND date = $2 LIMIT 1;",
"query": "SELECT kjoules, proteins, carbohydrates, fats, saturated_fats FROM meals m WHERE is_latest = TRUE AND LOWER(\"name\") = $1 AND date = $2 LIMIT 1;",
"describe": {
"columns": [
{
@ -22,6 +22,11 @@
"ordinal": 3,
"name": "fats",
"type_info": "Numeric"
},
{
"ordinal": 4,
"name": "saturated_fats",
"type_info": "Numeric"
}
],
"parameters": {
@ -34,8 +39,9 @@
true,
true,
true,
true,
true
]
},
"hash": "d7d20b101fbed8dfe7ff33ac7a6a0e4cddfaa36050c5482818b6ee4783f8173d"
"hash": "b2f0546894868db6c426df5c22e966c74b3ecfc8474c0cce4a8a03269823ae67"
}

View File

@ -1,6 +1,6 @@
{
"db_name": "PostgreSQL",
"query": "SELECT kjoules, proteins, carbohydrates, fats FROM meals m WHERE is_latest = TRUE AND LOWER(\"name\") = $1 ORDER BY date DESC LIMIT 1;",
"query": "SELECT kjoules, proteins, carbohydrates, fats, saturated_fats FROM meals m WHERE is_latest = TRUE AND LOWER(\"name\") = $1 ORDER BY date DESC LIMIT 1;",
"describe": {
"columns": [
{
@ -22,6 +22,11 @@
"ordinal": 3,
"name": "fats",
"type_info": "Numeric"
},
{
"ordinal": 4,
"name": "saturated_fats",
"type_info": "Numeric"
}
],
"parameters": {
@ -33,8 +38,9 @@
true,
true,
true,
true,
true
]
},
"hash": "a08588e594190460891c0e545b9983594e837f8da93db0d7c03e8ef16b9d0e3b"
"hash": "d326a7a0b870b2b1d954d389df145d49628f0ef3684ca6c98a44f8523ca30152"
}

View File

@ -23,7 +23,9 @@
"Enum": [
"main",
"side",
"dessert"
"soup",
"dessert",
"other"
]
}
}

323
Cargo.lock generated
View File

@ -586,26 +586,6 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8"
[[package]]
name = "const_format"
version = "0.2.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad"
dependencies = [
"const_format_proc_macros",
]
[[package]]
name = "const_format_proc_macros"
version = "0.2.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "convert_case"
version = "0.10.0"
@ -690,29 +670,6 @@ dependencies = [
"typenum",
]
[[package]]
name = "cssparser"
version = "0.36.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dae61cf9c0abb83bd659dab65b7e4e38d8236824c85f0f804f173567bda257d2"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn 2.0.113",
]
[[package]]
name = "dashmap"
version = "6.1.0"
@ -799,27 +756,6 @@ version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]]
name = "dtoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]]
name = "ego-tree"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
[[package]]
name = "either"
version = "1.15.0"
@ -877,10 +813,15 @@ dependencies = [
]
[[package]]
name = "fastrand"
version = "2.3.0"
name = "extend"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
checksum = "311a6d2f1f9d60bff73d2c78a0af97ed27f79672f15c238192a5bbb64db56d00"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.113",
]
[[package]]
name = "find-msvc-tools"
@ -942,16 +883,6 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]]
name = "futures"
version = "0.3.31"
@ -1068,15 +999,6 @@ dependencies = [
"version_check",
]
[[package]]
name = "getopts"
version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width",
]
[[package]]
name = "getrandom"
version = "0.2.16"
@ -1251,13 +1173,12 @@ dependencies = [
]
[[package]]
name = "html5ever"
version = "0.36.1"
name = "html-escape"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6452c4751a24e1b99c3260d505eaeee76a050573e61f30ac2c924ddc7236f01e"
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
dependencies = [
"log",
"markup5ever",
"utf8-width",
]
[[package]]
@ -1586,6 +1507,29 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4345964bb142484797b161f473a503a434de77149dd8c7427788c6e13379388"
[[package]]
name = "lazy-regex"
version = "3.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bae91019476d3ec7147de9aa291cadb6d870abf2f3015d2da73a90325ac1496"
dependencies = [
"lazy-regex-proc_macros",
"once_cell",
"regex",
]
[[package]]
name = "lazy-regex-proc_macros"
version = "3.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4de9c1e1439d8b7b3061b2d209809f447ca33241733d9a3c01eabf2dc8d94358"
dependencies = [
"proc-macro2",
"quote",
"regex",
"syn 2.0.113",
]
[[package]]
name = "lazy_static"
version = "1.5.0"
@ -1672,23 +1616,6 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c3294c4d74d0742910f8c7b466f44dda9eb2d5742c1e430138df290a1e8451c"
dependencies = [
"log",
"tendril",
"web_atoms",
]
[[package]]
name = "matchers"
version = "0.2.0"
@ -1747,12 +1674,14 @@ dependencies = [
"anyhow",
"chrono",
"clap",
"const_format",
"dotenvy",
"extend",
"futures",
"html-escape",
"itertools",
"lazy-regex",
"reqwest",
"scraper",
"serde",
"shared",
"sqlx",
"strum",
@ -1789,12 +1718,6 @@ dependencies = [
"windows-sys 0.61.2",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
name = "nonzero_ext"
version = "0.3.0"
@ -1918,59 +1841,6 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "phf"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
dependencies = [
"phf_macros",
"phf_shared",
"serde",
]
[[package]]
name = "phf_codegen"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
dependencies = [
"fastrand",
"phf_shared",
]
[[package]]
name = "phf_macros"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
"syn 2.0.113",
]
[[package]]
name = "phf_shared"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
dependencies = [
"siphasher",
]
[[package]]
name = "pin-project-lite"
version = "0.2.16"
@ -2040,12 +1910,6 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "proc-macro-crate"
version = "3.4.0"
@ -2493,46 +2357,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93cecd86d6259499c844440546d02f55f3e17bd286e529e48d1f9f67e92315cb"
dependencies = [
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"precomputed-hash",
"selectors",
"tendril",
]
[[package]]
name = "seahash"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "selectors"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "feef350c36147532e1b79ea5c1f3791373e61cbd9a6a2615413b3807bb164fb7"
dependencies = [
"bitflags",
"cssparser",
"derive_more",
"log",
"new_debug_unreachable",
"phf",
"phf_codegen",
"precomputed-hash",
"rustc-hash",
"servo_arc",
"smallvec",
]
[[package]]
name = "semver"
version = "1.0.27"
@ -2594,15 +2424,6 @@ dependencies = [
"serde",
]
[[package]]
name = "servo_arc"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930"
dependencies = [
"stable_deref_trait",
]
[[package]]
name = "sha1"
version = "0.10.6"
@ -2682,12 +2503,6 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
[[package]]
name = "siphasher"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
[[package]]
name = "slab"
version = "0.4.11"
@ -2958,31 +2773,6 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
[[package]]
name = "string_cache"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
]
[[package]]
name = "stringprep"
version = "0.1.5"
@ -3075,17 +2865,6 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]]
name = "thiserror"
version = "2.0.17"
@ -3428,12 +3207,6 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
[[package]]
name = "unicode-xid"
version = "0.2.6"
@ -3459,10 +3232,10 @@ dependencies = [
]
[[package]]
name = "utf-8"
version = "0.7.6"
name = "utf8-width"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
checksum = "1292c0d970b54115d14f2492fe0170adf21d68a1de108eebc51c1df4f346a091"
[[package]]
name = "utf8_iter"
@ -3659,18 +3432,6 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "web_atoms"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acd0c322f146d0f8aad130ce6c187953889359584497dac6561204c8e17bb43d"
dependencies = [
"phf",
"phf_codegen",
"string_cache",
"string_cache_codegen",
]
[[package]]
name = "webpki-roots"
version = "0.26.11"

View File

@ -0,0 +1,42 @@
-- Add down migration script here
ALTER TABLE meals
DROP COLUMN saturated_fats;
DROP VIEW IF EXISTS meals_view;
UPDATE meals
SET dish_type = 'main'
WHERE dish_type IN ('soup', 'other');
ALTER TYPE dish_type_enum RENAME TO dish_type_enum_old;
CREATE TYPE dish_type_enum AS ENUM ('main', 'side', 'dessert');
ALTER TABLE meals
ALTER COLUMN dish_type
TYPE dish_type_enum
USING dish_type::text::dish_type_enum;
DROP TYPE dish_type_enum_old;
CREATE OR REPLACE VIEW meals_view AS
SELECT
id,
date,
canteen,
name,
dish_type,
image_src,
price_students,
price_employees,
price_guests,
vegan,
vegetarian,
kjoules,
proteins,
carbohydrates,
fats,
round(kjoules / 4.184) AS kcal
FROM meals
WHERE is_latest = TRUE;

View File

@ -0,0 +1,8 @@
-- Add up migration script here
ALTER TYPE dish_type_enum ADD VALUE IF NOT EXISTS 'soup' AFTER 'side';
ALTER TYPE dish_type_enum ADD VALUE IF NOT EXISTS 'other' AFTER 'dessert';
ALTER TABLE meals
ADD COLUMN saturated_fats NUMERIC(6,2);

View File

@ -17,12 +17,14 @@ path = "src/bin/cli.rs"
anyhow = { workspace = true }
chrono = { workspace = true }
clap = { version = "4.5.54", features = ["derive", "env"] }
const_format = "0.2.33"
dotenvy = { workspace = true }
extend = "1.2.0"
futures = { workspace = true }
html-escape = "0.2.13"
itertools = { workspace = true }
reqwest = { version = "0.12.9", default-features = false, features = ["charset", "rustls-tls", "http2"] }
scraper = "0.25.0"
lazy-regex = "3.6.0"
reqwest = { version = "0.12.9", default-features = false, features = ["charset", "json", "rustls-tls", "http2"] }
serde = { workspace = true, features = ["derive"] }
shared = { path = "../shared" }
sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "rust_decimal"] }
strum = { workspace = true, features = ["derive"] }

View File

@ -1,23 +1,15 @@
use const_format::concatcp;
use shared::Canteen;
const POST_URL_BASE: &str = "https://www.studierendenwerk-pb.de/gastronomie/speiseplaene/";
pub trait CanteenExt {
fn get_url(&self) -> &str;
}
impl CanteenExt for Canteen {
fn get_url(&self) -> &str {
#[extend::ext]
pub impl Canteen {
fn get_venue_id(&self) -> &'static str {
match self {
Self::Forum => concatcp!(POST_URL_BASE, "forum/"),
Self::Academica => concatcp!(POST_URL_BASE, "mensa-academica/"),
Self::Picknick => concatcp!(POST_URL_BASE, "picknick/"),
Self::BonaVista => concatcp!(POST_URL_BASE, "bona-vista/"),
Self::GrillCafe => concatcp!(POST_URL_BASE, "grillcafe/"),
Self::ZM2 => concatcp!(POST_URL_BASE, "mensa-zm2/"),
Self::Basilica => concatcp!(POST_URL_BASE, "mensa-basilica-hamm/"),
Self::Atrium => concatcp!(POST_URL_BASE, "mensa-atrium-lippstadt/"),
Self::Academica => "mensa",
Self::Forum => "mensa-forum",
Self::ZM2 => "mensa-zm2",
Self::Basilica => "mensa-hamm",
Self::Atrium => "mensa-lippstadt",
Self::GrillCafe => "grill-cafe",
}
}
}

View File

@ -1,19 +1,12 @@
use std::sync::LazyLock;
use std::borrow::Cow;
use scraper::{ElementRef, Selector};
use shared::DishType;
use sqlx::types::Decimal;
use crate::util::normalize_price_bigdecimal;
static IMG_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse(".img img").expect("Failed to parse selector"));
static HTML_PRICE_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse(".desc .price").expect("Failed to parse selector"));
static HTML_EXTRAS_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse(".desc .buttons > *").expect("Failed to parse selector"));
static HTML_NUTRITIONS_SELECTOR: LazyLock<Selector> =
LazyLock::new(|| Selector::parse(".nutritions > p").expect("Failed to parse selector"));
use crate::{
menu::ResponseMeal,
util::{first_non_empty_string, normalize_price_bigdecimal},
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Dish {
@ -34,6 +27,7 @@ pub struct NutritionValues {
pub protein: Option<Decimal>,
pub carbs: Option<Decimal>,
pub fat: Option<Decimal>,
pub saturated_fat: Option<Decimal>,
}
impl Dish {
@ -71,113 +65,6 @@ impl Dish {
&& self.vegetarian == other.vegetarian
&& self.dish_type == other.dish_type
}
pub fn from_element(
element: ElementRef,
details: ElementRef,
dish_type: DishType,
) -> Option<Self> {
let html_name_selector = Selector::parse(".desc h4").ok()?;
let name = element
.select(&html_name_selector)
.next()?
.text()
.collect::<Vec<_>>()
.join("")
.trim()
.to_string();
let img_src = element.select(&IMG_SELECTOR).next().and_then(|el| {
el.value()
.attr("src")
.map(|img_src_path| format!("https://www.studierendenwerk-pb.de/{}", img_src_path))
});
let mut prices = element
.select(&HTML_PRICE_SELECTOR)
.filter_map(|price| {
let price_for = price.first_child().and_then(|strong| {
strong.first_child().and_then(|text_element| {
text_element
.value()
.as_text()
.map(|text| text.trim().trim_end_matches(':').to_string())
})
});
let price_value = price.last_child().and_then(|text_element| {
text_element
.value()
.as_text()
.map(|text| text.trim().to_string())
});
price_for
.and_then(|price_for| price_value.map(|price_value| (price_for, price_value)))
})
.collect::<Vec<_>>();
let extras = element
.select(&HTML_EXTRAS_SELECTOR)
.filter_map(|extra| extra.value().attr("title").map(|title| title.to_string()))
.collect::<Vec<_>>();
let nutritions_element = details.select(&HTML_NUTRITIONS_SELECTOR).next();
let nutrition_values = if let Some(nutritions_element) = nutritions_element {
let mut kjoule = None;
let mut protein = None;
let mut carbs = None;
let mut fat = None;
for s in nutritions_element.text() {
let s = s.trim();
if !s.is_empty() {
if let Some(rest) = s.strip_prefix("Brennwert = ") {
kjoule = rest
.split_whitespace()
.next()
.and_then(|num_str| num_str.parse().ok());
} else if let Some(rest) = s.strip_prefix("Eiweiß = ") {
protein = grams_to_bigdecimal(rest);
} else if let Some(rest) = s.strip_prefix("Kohlenhydrate = ") {
carbs = grams_to_bigdecimal(rest);
} else if let Some(rest) = s.strip_prefix("Fett = ") {
fat = grams_to_bigdecimal(rest);
}
}
}
NutritionValues {
kjoule,
protein,
carbs,
fat,
}
} else {
NutritionValues::default()
};
let vegan = extras.contains(&"vegan".to_string());
Some(Self {
name,
image_src: img_src,
price_students: prices
.iter_mut()
.find(|(price_for, _)| price_for == "Studierende")
.map(|(_, price)| price_to_bigdecimal(Some(price)))?,
price_employees: prices
.iter_mut()
.find(|(price_for, _)| price_for == "Bedienstete")
.map(|(_, price)| price_to_bigdecimal(Some(price)))?,
price_guests: prices
.iter_mut()
.find(|(price_for, _)| price_for == "Gäste")
.map(|(_, price)| price_to_bigdecimal(Some(price)))?,
vegetarian: vegan || extras.contains(&"vegetarisch".to_string()),
vegan,
dish_type,
nutrition_values: nutrition_values.normalize(),
})
}
}
impl NutritionValues {
@ -187,6 +74,7 @@ impl NutritionValues {
protein: self.protein.map(|p| p.normalize().round_dp(2)),
carbs: self.carbs.map(|c| c.normalize().round_dp(2)),
fat: self.fat.map(|f| f.normalize().round_dp(2)),
saturated_fat: self.saturated_fat.map(|sf| sf.normalize().round_dp(2)),
}
}
}
@ -197,15 +85,148 @@ impl PartialOrd for Dish {
}
}
fn price_to_bigdecimal(s: Option<&str>) -> Decimal {
s.and_then(|p| {
p.trim_end_matches("")
.replace(',', ".")
.parse::<Decimal>()
.ok()
})
.map(normalize_price_bigdecimal)
.unwrap_or_else(|| Decimal::from(99999))
impl From<ResponseMeal> for Dish {
fn from(meal: ResponseMeal) -> Self {
let vegan = meal.is_vegan();
let vegetarian = meal.is_vegetarian();
Self {
name: match html_escape::decode_html_entities(&meal.title) {
Cow::Owned(o) => o,
Cow::Borrowed(_) => meal.title,
},
image_src: first_non_empty_string([
meal.image_jpeg,
meal.image_jpeg_small,
meal.image_jpeg_thumb,
meal.image_webp,
meal.image_webp_small,
meal.image_webp_thumb,
]),
price_students: price_to_bigdecimal(&meal.price_students),
price_employees: price_to_bigdecimal(&meal.price_staff),
price_guests: price_to_bigdecimal(&meal.price_guests),
vegan,
vegetarian,
dish_type: DishType::from_category(meal.category.as_str()),
nutrition_values: nutrition_from_str(&meal.nutrition),
}
}
}
fn price_to_bigdecimal(s: &str) -> Decimal {
s.replace(',', ".")
.parse::<Decimal>()
.ok()
.map(normalize_price_bigdecimal)
.unwrap_or_else(|| Decimal::from(99999))
}
#[extend::ext]
impl ResponseMeal {
fn is_vegan(&self) -> bool {
self.button.to_lowercase().contains("/4.png")
}
fn is_vegetarian(&self) -> bool {
self.button.to_lowercase().contains("/3.png") || self.is_vegan()
}
}
#[extend::ext]
impl DishType {
fn from_category(category: &str) -> Self {
if category.trim().is_empty() {
return Self::Other;
}
let lower = category.to_lowercase();
MEAL_CATEGORY_PATTERNS
.iter()
.find(|pattern| (pattern.test)(&lower))
.map(|pattern| pattern.dish_type)
.unwrap_or(DishType::Other)
}
}
const MEAL_CATEGORY_PATTERNS: [MealCategoryPattern; 4] = [
MealCategoryPattern {
test: |s| s.contains("eintopf") || s.contains("suppe"),
dish_type: DishType::Soup,
},
MealCategoryPattern {
test: |s| s.contains("beilage") || s.contains("sättigungbeil") || s.contains("gemüsebeil"),
dish_type: DishType::Side,
},
MealCategoryPattern {
test: |s| s.contains("dessert"),
dish_type: DishType::Dessert,
},
MealCategoryPattern {
test: |s| {
s.contains("fleisch")
|| s.contains("fisch")
|| s.contains("vegetarisch")
|| s.contains("vegan")
|| s.contains("aktions")
|| s.contains("pasta")
|| s.contains("cafeteria")
|| s.contains("zwischenverpflegung")
|| s.contains("restanten")
|| s.contains("bona vista")
},
dish_type: DishType::Main,
},
];
struct MealCategoryPattern {
dish_type: DishType,
test: fn(&str) -> bool,
}
fn nutrition_from_str(nutrition_str: &str) -> NutritionValues {
if !nutrition_str.trim().is_empty() {
let regex_kjoule = lazy_regex::regex!(r"Brennwert=(\d+) kJ"i);
let regex_protein = lazy_regex::regex!(r"Eiweiß=(\d+(?:,\d+)?)g"i);
let regex_carbs = lazy_regex::regex!(r"Kohlenhydrate=(\d+(?:,\d+)?)g"i);
let regex_fat = lazy_regex::regex!(r"Fett=(\d+(?:,\d+)?)g"i);
let regex_saturated_fat =
lazy_regex::regex!(r"davon gesättigte Fettsäuren=(\d+(?:,\d+)?)g"i);
let kjoule = regex_kjoule
.captures(nutrition_str)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().parse().ok());
let protein = regex_protein
.captures(nutrition_str)
.and_then(|c| c.get(1))
.and_then(|m| grams_to_bigdecimal(m.as_str()));
let carbs = regex_carbs
.captures(nutrition_str)
.and_then(|c| c.get(1))
.and_then(|m| grams_to_bigdecimal(m.as_str()));
let fat = regex_fat
.captures(nutrition_str)
.and_then(|c| c.get(1))
.and_then(|m| grams_to_bigdecimal(m.as_str()));
let saturated_fat = regex_saturated_fat
.captures(nutrition_str)
.and_then(|c| c.get(1))
.and_then(|m| grams_to_bigdecimal(m.as_str()));
NutritionValues {
kjoule,
protein,
carbs,
fat,
saturated_fat,
}
} else {
NutritionValues::default()
}
}
fn grams_to_bigdecimal(s: &str) -> Option<Decimal> {

View File

@ -4,36 +4,13 @@ mod menu;
mod refresh;
pub mod util;
use std::{collections::HashSet, error::Error, fmt::Display, sync::LazyLock};
use std::{collections::HashSet, sync::LazyLock};
pub use dish::Dish;
pub use menu::scrape_menu;
pub use refresh::check_refresh;
use shared::Canteen;
#[derive(Debug, Clone)]
struct CustomError(String);
impl Display for CustomError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Error for CustomError {}
impl From<&str> for CustomError {
fn from(s: &str) -> Self {
CustomError(s.to_string())
}
}
impl From<String> for CustomError {
fn from(s: String) -> Self {
CustomError(s)
}
}
pub static FILTER_CANTEENS: LazyLock<HashSet<Canteen>> = LazyLock::new(|| {
std::env::var("FILTER_CANTEENS")
.ok()

View File

@ -1,70 +1,96 @@
use std::sync::LazyLock;
use anyhow::Result;
use chrono::NaiveDate;
use scraper::{Html, Selector};
use shared::{Canteen, DishType};
use shared::Canteen;
use crate::{CustomError, Dish, canteen::CanteenExt as _};
use crate::{Dish, canteen::CanteenExt as _};
static HTML_MAIN_DISHES_TBODY_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
Selector::parse("table.table-dishes.main-dishes > tbody").expect("Failed to parse selector")
});
static HTML_SIDE_DISHES_TBODY_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
Selector::parse("table.table-dishes.side-dishes > tbody").expect("Failed to parse selector")
});
static HTML_DESSERTS_TBODY_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
Selector::parse("table.table-dishes.soups > tbody").expect("Failed to parse selector")
});
const API_URL: &str = "https://stwpb.de/wp-json/stwk-pb/v1/meals";
#[tracing::instrument]
pub async fn scrape_menu(date: &NaiveDate, canteen: Canteen) -> Result<Vec<Dish>> {
pub async fn scrape_menu(
start_date: &NaiveDate,
end_date: &NaiveDate,
canteen: Canteen,
) -> Result<Vec<Dish>> {
tracing::debug!("Starting scraping");
let url = canteen.get_url();
let client = reqwest::Client::new();
let request_builder = client.post(url).query(&[(
"tx_pamensa_mensa[date]",
date.format("%Y-%m-%d").to_string(),
)]);
let request_builder = client.get(API_URL).query(&[
("venue", canteen.get_venue_id().to_string()),
("start_date", start_date.format("%Y-%m-%d").to_string()),
("end_date", end_date.format("%Y-%m-%d").to_string()),
]);
let response = request_builder.send().await?;
let html_content = response.text().await?;
let response_data = response.json::<ResponseData>().await?;
let document = scraper::Html::parse_document(&html_content);
let main_dishes = scrape_category(&document, &HTML_MAIN_DISHES_TBODY_SELECTOR, DishType::Main)?;
let side_dishes = scrape_category(&document, &HTML_SIDE_DISHES_TBODY_SELECTOR, DishType::Side)?;
let desserts = scrape_category(&document, &HTML_DESSERTS_TBODY_SELECTOR, DishType::Dessert)?;
let mut res = Vec::new();
res.extend(main_dishes);
res.extend(side_dishes);
res.extend(desserts);
let res = response_data.meals.into_iter().map(Dish::from).collect();
tracing::debug!("Finished scraping");
Ok(res)
}
static ITEM_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
Selector::parse("tr.odd > td.description > div.row").expect("Failed to parse selector")
});
static ITEM_DETAILS_SELECTOR: LazyLock<Selector> = LazyLock::new(|| {
Selector::parse("tr.even > td.more > div.ingredients-list").expect("Failed to parse selector")
});
fn scrape_category<'a>(
document: &'a Html,
tbody_selector: &Selector,
dish_type: DishType,
) -> Result<impl Iterator<Item = Dish> + 'a> {
let tbody = document.select(tbody_selector).next().ok_or_else(|| {
CustomError::from(format!("No tbody found for selector: {:?}", tbody_selector))
})?;
let dishes = tbody.select(&ITEM_SELECTOR);
let dish_details = tbody.select(&ITEM_DETAILS_SELECTOR);
Ok(dishes
.zip(dish_details)
.filter_map(move |(dish, details)| Dish::from_element(dish, details, dish_type)))
#[expect(dead_code)]
#[derive(Debug, Clone, serde::Deserialize)]
struct ResponseData {
venue: String,
venue_name: String,
start_date: NaiveDate,
end_date: NaiveDate,
meals: Vec<ResponseMeal>,
total: usize,
}
#[expect(dead_code)]
#[derive(Debug, Clone, serde::Deserialize)]
pub(crate) struct ResponseMeal {
pub id: usize,
pub title: String,
pub date: NaiveDate,
pub date_german: String,
pub category: String,
pub price_students: String,
pub price_staff: String,
pub price_guests: String,
pub allergens_raw: String,
pub allergens_decoded: ResponseAllergensDecoded,
pub nutrition: String,
pub button: String,
pub image_jpeg: String,
pub image_webp: String,
pub image_jpeg_small: String,
pub image_webp_small: String,
pub image_jpeg_thumb: String,
pub image_webp_thumb: String,
}
#[expect(dead_code)]
#[derive(Debug, Clone, serde::Deserialize)]
pub(crate) struct ResponseAllergensDecoded {
pub allergens: Vec<ResponseAllergen>,
pub additives: Vec<ResponseAdditive>,
pub raw_codes: Vec<String>,
}
#[expect(dead_code)]
#[derive(Debug, Clone, serde::Deserialize)]
pub(crate) struct ResponseAllergen {
pub id: String,
pub code: String,
pub name_de: String,
pub name_en: String,
pub category: String,
pub active: String,
pub sort_order: String,
}
#[expect(dead_code)]
#[derive(Debug, Clone, serde::Deserialize)]
pub(crate) struct ResponseAdditive {
pub id: String,
pub code: String,
pub name_de: String,
pub name_en: String,
pub active: String,
pub sort_order: String,
}

View File

@ -95,7 +95,7 @@ pub async fn check_refresh(
.collect::<HashSet<_>>();
let db_data = sqlx::query!(
r#"SELECT canteen, name, image_src, price_students, price_employees, price_guests, vegetarian, vegan, dish_type AS "dish_type: DishType", kjoules, proteins, carbohydrates, fats FROM meals WHERE date = $1 AND is_latest = TRUE AND canteen = ANY($2)"#,
r#"SELECT canteen, name, image_src, price_students, price_employees, price_guests, vegetarian, vegan, dish_type AS "dish_type: DishType", kjoules, proteins, carbohydrates, fats, saturated_fats FROM meals WHERE date = $1 AND is_latest = TRUE AND canteen = ANY($2)"#,
date,
&canteens_needing_refresh
.iter()
@ -118,6 +118,7 @@ pub async fn check_refresh(
protein: r.proteins,
carbs: r.carbohydrates,
fat: r.fats,
saturated_fat: r.saturated_fats,
}.normalize(),
}
)

View File

@ -4,9 +4,9 @@ use anyhow::Result;
use chrono::NaiveDate;
use futures::{Stream, StreamExt as _};
use shared::{Canteen, DishType};
use sqlx::{postgres::PgPoolOptions, types::Decimal, PgPool, PgTransaction};
use sqlx::{PgPool, PgTransaction, postgres::PgPoolOptions, types::Decimal};
use crate::{scrape_menu, Dish};
use crate::{Dish, scrape_menu};
pub fn get_db() -> Result<PgPool> {
Ok(PgPoolOptions::new()
@ -17,7 +17,7 @@ pub fn scrape_canteens_at_days<'a>(
date_canteen_combinations: &'a [(NaiveDate, Canteen)],
) -> impl Stream<Item = Result<(NaiveDate, Canteen, Vec<Dish>)>> + 'a {
futures::stream::iter(date_canteen_combinations).then(|(date, canteen)| async move {
scrape_menu(date, *canteen)
scrape_menu(date, date, *canteen)
.await
.map(|menu| (*date, *canteen, menu))
})
@ -30,7 +30,9 @@ pub async fn add_menu_to_db(
menu: Vec<Dish>,
) -> Result<(), sqlx::Error> {
if !menu.is_empty() {
let mut query = sqlx::QueryBuilder::new("INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian,kjoules,proteins,carbohydrates,fats) ");
let mut query = sqlx::QueryBuilder::new(
"INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian,kjoules,proteins,carbohydrates,fats) ",
);
query
.push_values(menu, |mut sep, item| {
@ -72,3 +74,7 @@ pub async fn add_menu_to_db(
pub fn normalize_price_bigdecimal(price: Decimal) -> Decimal {
price.normalize().round_dp(2)
}
pub fn first_non_empty_string(strings: impl IntoIterator<Item = String>) -> Option<String> {
strings.into_iter().find(|s| !s.trim().is_empty())
}

View File

@ -7,8 +7,6 @@ use strum::EnumIter;
pub enum Canteen {
Forum,
Academica,
Picknick,
BonaVista,
GrillCafe,
ZM2,
Basilica,
@ -20,8 +18,6 @@ impl Canteen {
match self {
Self::Forum => "forum",
Self::Academica => "academica",
Self::Picknick => "picknick",
Self::BonaVista => "bona-vista",
Self::GrillCafe => "grillcafe",
Self::ZM2 => "zm2",
Self::Basilica => "basilica",
@ -37,8 +33,6 @@ impl FromStr for Canteen {
match s {
"forum" => Ok(Self::Forum),
"academica" => Ok(Self::Academica),
"picknick" => Ok(Self::Picknick),
"bona-vista" => Ok(Self::BonaVista),
"grillcafe" => Ok(Self::GrillCafe),
"zm2" => Ok(Self::ZM2),
"basilica" => Ok(Self::Basilica),

View File

@ -9,7 +9,9 @@ pub use canteen::Canteen;
pub enum DishType {
Main,
Side,
Soup,
Dessert,
Other,
}
impl Display for DishType {
@ -17,7 +19,9 @@ impl Display for DishType {
let s = match self {
Self::Main => "main",
Self::Side => "side",
Self::Soup => "soup",
Self::Dessert => "dessert",
Self::Other => "other",
};
f.write_str(s)
}

View File

@ -26,7 +26,8 @@ pub struct DishPrices {
"kjoules": 1500,
"carbohydrates": "45.5",
"proteins": "30.0",
"fats": "10.0"
"fats": "10.0",
"saturated_fats": "2.5"
})
))]
pub struct DishNutrients {
@ -34,6 +35,7 @@ pub struct DishNutrients {
pub carbohydrates: Option<Decimal>,
pub proteins: Option<Decimal>,
pub fats: Option<Decimal>,
pub saturated_fats: Option<Decimal>,
}
impl Dish {
@ -74,6 +76,7 @@ impl DishNutrients {
carbohydrates: self.carbohydrates.map(|v| v.normalize().round_dp(2)),
proteins: self.proteins.map(|v| v.normalize().round_dp(2)),
fats: self.fats.map(|v| v.normalize().round_dp(2)),
saturated_fats: self.saturated_fats.map(|v| v.normalize().round_dp(2)),
}
}
}

View File

@ -1,4 +1,4 @@
use actix_web::{get, web, HttpResponse, Responder};
use actix_web::{HttpResponse, Responder, get, web};
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use serde_json::json;
@ -39,14 +39,14 @@ async fn nutrition(
let res = if let Some(date) = query.date {
sqlx::query_as!(
DishNutrients,
r#"SELECT kjoules, proteins, carbohydrates, fats FROM meals m WHERE is_latest = TRUE AND LOWER("name") = $1 AND date = $2 LIMIT 1;"#,
r#"SELECT kjoules, proteins, carbohydrates, fats, saturated_fats FROM meals m WHERE is_latest = TRUE AND LOWER("name") = $1 AND date = $2 LIMIT 1;"#,
dish_name.to_lowercase(),
date,
).fetch_optional(db).await
} else {
sqlx::query_as!(
DishNutrients,
r#"SELECT kjoules, proteins, carbohydrates, fats FROM meals m WHERE is_latest = TRUE AND LOWER("name") = $1 ORDER BY date DESC LIMIT 1;"#,
r#"SELECT kjoules, proteins, carbohydrates, fats, saturated_fats FROM meals m WHERE is_latest = TRUE AND LOWER("name") = $1 ORDER BY date DESC LIMIT 1;"#,
dish_name.to_lowercase(),
).fetch_optional(db).await
};

View File

@ -12,7 +12,9 @@ pub struct Menu {
date: NaiveDate,
main_dishes: Vec<Dish>,
side_dishes: Vec<Dish>,
desserts: Vec<Dish>,
soup_dishes: Vec<Dish>,
dessert_dishes: Vec<Dish>,
other_dishes: Vec<Dish>,
}
impl Menu {
@ -41,7 +43,9 @@ impl Menu {
let mut main_dishes = Vec::new();
let mut side_dishes = Vec::new();
let mut desserts = Vec::new();
let mut soup_dishes = Vec::new();
let mut dessert_dishes = Vec::new();
let mut other_dishes = Vec::new();
for row in result {
let dish = Dish {
@ -65,8 +69,12 @@ impl Menu {
main_dishes.push(dish);
} else if row.dish_type == DishType::Side {
side_dishes.push(dish);
} else if row.dish_type == DishType::Soup {
soup_dishes.push(dish);
} else if row.dish_type == DishType::Dessert {
desserts.push(dish);
dessert_dishes.push(dish);
} else if row.dish_type == DishType::Other {
other_dishes.push(dish);
}
}
@ -74,7 +82,9 @@ impl Menu {
date,
main_dishes,
side_dishes,
desserts,
soup_dishes,
dessert_dishes,
other_dishes,
})
}
@ -86,14 +96,24 @@ impl Menu {
&self.side_dishes
}
pub fn get_desserts(&self) -> &[Dish] {
&self.desserts
pub fn get_soup_dishes(&self) -> &[Dish] {
&self.soup_dishes
}
pub fn get_dessert_dishes(&self) -> &[Dish] {
&self.dessert_dishes
}
pub fn get_other_dishes(&self) -> &[Dish] {
&self.other_dishes
}
pub fn merged(self, other: Self) -> Self {
let mut main_dishes = self.main_dishes;
let mut side_dishes = self.side_dishes;
let mut desserts = self.desserts;
let mut soup_dishes = self.soup_dishes;
let mut dessert_dishes = self.dessert_dishes;
let mut other_dishes = self.other_dishes;
for dish in other.main_dishes {
if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) {
@ -109,11 +129,25 @@ impl Menu {
side_dishes.push(dish);
}
}
for dish in other.desserts {
if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) {
for dish in other.soup_dishes {
if let Some(existing) = soup_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
desserts.push(dish);
soup_dishes.push(dish);
}
}
for dish in other.dessert_dishes {
if let Some(existing) = dessert_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
dessert_dishes.push(dish);
}
}
for dish in other.other_dishes {
if let Some(existing) = other_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
other_dishes.push(dish);
}
}
@ -121,7 +155,9 @@ impl Menu {
date: self.date,
main_dishes,
side_dishes,
desserts,
soup_dishes,
dessert_dishes,
other_dishes,
}
}
}