implement first version

This commit is contained in:
Moritz Hölting 2024-07-02 17:38:48 +02:00
commit be144628c4
8 changed files with 2887 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
.env

2490
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

18
Cargo.toml Normal file
View File

@ -0,0 +1,18 @@
[package]
name = "mensa-upb-api"
description = "A web scraper api for the canteens of the University of Paderborn"
version = "0.1.0"
edition = "2021"
[dependencies]
actix-web = "4.8.0"
chrono = "0.4.38"
const_format = "0.2.32"
dotenvy = "0.15.7"
itertools = "0.13.0"
reqwest = "0.12.5"
scraper = "0.19.0"
serde = { version = "1.0.203", features = ["derive"] }
serde_json = "1.0.120"
strum = { version = "0.26.3", features = ["derive"] }
tokio = { version = "1.38.0", features = ["full"] }

64
src/canteen.rs Normal file
View File

@ -0,0 +1,64 @@
use std::str::FromStr;
use const_format::concatcp;
use strum::EnumIter;
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter)]
pub enum Canteen {
Forum,
Academica,
Picknick,
BonaVista,
GrillCafe,
ZM2,
Basilica,
Atrium,
}
const POST_URL_BASE: &str = "https://www.studierendenwerk-pb.de/gastronomie/speiseplaene/";
impl Canteen {
pub fn get_url(&self) -> &str {
match self {
Self::Forum => concatcp!(POST_URL_BASE, "forum/"),
Self::Academica => concatcp!(POST_URL_BASE, "mensa-academica/"),
Self::Picknick => concatcp!(POST_URL_BASE, "picknick/"),
Self::BonaVista => concatcp!(POST_URL_BASE, "bona-vista/"),
Self::GrillCafe => concatcp!(POST_URL_BASE, "grillcafe/"),
Self::ZM2 => concatcp!(POST_URL_BASE, "mensa-zm2/"),
Self::Basilica => concatcp!(POST_URL_BASE, "mensa-basilica-hamm/"),
Self::Atrium => concatcp!(POST_URL_BASE, "mensa-atrium-lippstadt/"),
}
}
pub fn get_identifier(&self) -> &str {
match self {
Self::Forum => "forum",
Self::Academica => "academica",
Self::Picknick => "picknick",
Self::BonaVista => "bona-vista",
Self::GrillCafe => "grillcafe",
Self::ZM2 => "zm2",
Self::Basilica => "basilica",
Self::Atrium => "atrium",
}
}
}
impl FromStr for Canteen {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"forum" => Ok(Self::Forum),
"academica" => Ok(Self::Academica),
"picknick" => Ok(Self::Picknick),
"bona-vista" => Ok(Self::BonaVista),
"grillcafe" => Ok(Self::GrillCafe),
"zm2" => Ok(Self::ZM2),
"basilica" => Ok(Self::Basilica),
"atrium" => Ok(Self::Atrium),
invalid => Err(format!("Invalid canteen identifier: {}", invalid)),
}
}
}

125
src/dish.rs Normal file
View File

@ -0,0 +1,125 @@
use itertools::Itertools;
use scraper::ElementRef;
use serde::{Deserialize, Serialize};
use crate::Canteen;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Dish {
name: String,
image_src: String,
price_students: Option<String>,
price_employees: Option<String>,
price_guests: Option<String>,
extras: Vec<String>,
#[serde(skip)]
canteens: Vec<Canteen>,
}
impl Dish {
pub fn get_name(&self) -> &str {
&self.name
}
pub fn get_price_students(&self) -> Option<&str> {
self.price_students.as_deref()
}
pub fn get_price_employees(&self) -> Option<&str> {
self.price_employees.as_deref()
}
pub fn get_price_guests(&self) -> Option<&str> {
self.price_guests.as_deref()
}
pub fn get_extras(&self) -> &[String] {
&self.extras
}
pub fn get_canteens(&self) -> &[Canteen] {
&self.canteens
}
pub fn same_as(&self, other: &Self) -> bool {
self.name == other.name
&& self.price_employees == other.price_employees
&& self.price_guests == other.price_guests
&& self.price_students == other.price_students
&& self.extras.iter().sorted().collect_vec()
== self.extras.iter().sorted().collect_vec()
}
pub fn merge(&mut self, other: Self) {
self.canteens.extend(other.canteens);
self.canteens.sort();
self.canteens.dedup();
}
}
impl Dish {
pub fn from_element(element: ElementRef, canteen: Canteen) -> Option<Self> {
let html_name_selector = scraper::Selector::parse(".desc h4").unwrap();
let name = element
.select(&html_name_selector)
.next()?
.text()
.collect::<Vec<_>>()
.join("")
.trim()
.to_string();
let img_selector = scraper::Selector::parse(".img img").unwrap();
let img_src_path = element.select(&img_selector).next()?.value().attr("src")?;
let img_src = format!("https://www.studierendenwerk-pb.de/{}", img_src_path);
let html_price_selector = scraper::Selector::parse(".desc .price").unwrap();
let mut prices = element
.select(&html_price_selector)
.filter_map(|price| {
let price_for = price.first_child().and_then(|strong| {
strong.first_child().and_then(|text_element| {
text_element
.value()
.as_text()
.map(|text| text.trim().trim_end_matches(':').to_string())
})
});
let price_value = price.last_child().and_then(|text_element| {
text_element
.value()
.as_text()
.map(|text| text.trim().to_string())
});
price_for
.and_then(|price_for| price_value.map(|price_value| (price_for, price_value)))
})
.collect::<Vec<_>>();
let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").unwrap();
let extras = element
.select(&html_extras_selector)
.filter_map(|extra| extra.value().attr("title").map(|title| title.to_string()))
.collect::<Vec<_>>();
Some(Self {
name,
image_src: img_src,
price_students: prices
.iter_mut()
.find(|(price_for, _)| price_for == "Studierende")
.map(|(_, price)| std::mem::take(price)),
price_employees: prices
.iter_mut()
.find(|(price_for, _)| price_for == "Bedienstete")
.map(|(_, price)| std::mem::take(price)),
price_guests: prices
.iter_mut()
.find(|(price_for, _)| price_for == "Gäste")
.map(|(_, price)| std::mem::take(price)),
extras,
canteens: vec![canteen],
})
}
}
impl PartialOrd for Dish {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.name.partial_cmp(&other.name)
}
}

7
src/lib.rs Normal file
View File

@ -0,0 +1,7 @@
mod canteen;
mod dish;
mod menu;
pub use canteen::Canteen;
pub use dish::Dish;
pub use menu::Menu;

69
src/main.rs Normal file
View File

@ -0,0 +1,69 @@
use std::{env, io, str::FromStr};
use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
use chrono::{Duration as CDuration, Utc};
use itertools::Itertools;
use mensa_upb_api::{Canteen, Menu};
use serde::{Deserialize, Serialize};
use serde_json::json;
use strum::IntoEnumIterator;
#[actix_web::main]
async fn main() -> io::Result<()> {
if dotenvy::dotenv().is_ok() {
println!("Loaded .env file");
}
let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string());
let port = env::var("API_PORT")
.ok()
.and_then(|p| p.parse::<u16>().ok())
.unwrap_or(8080);
HttpServer::new(|| App::new().service(index).service(menu_today))
.bind((interface.as_str(), port))?
.run()
.await
}
#[get("/")]
async fn index() -> impl Responder {
HttpResponse::Ok().json(json!({
"version": env!("CARGO_PKG_VERSION"),
"description": env!("CARGO_PKG_DESCRIPTION"),
"supportedCanteens": Canteen::iter().map(|c| c.get_identifier().to_string()).collect_vec(),
}))
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
struct MenuQuery {
#[serde(rename = "d")]
days_ahead: Option<u32>,
}
#[get("/menu/{canteen}")]
async fn menu_today(path: web::Path<String>, query: web::Query<MenuQuery>) -> impl Responder {
let canteens = path
.into_inner()
.split(',')
.map(Canteen::from_str)
.collect_vec();
if canteens.iter().all(Result::is_ok) {
let canteens = canteens.into_iter().filter_map(Result::ok).collect_vec();
let days_ahead = query.days_ahead.unwrap_or(0);
let menu = Menu::new(
(Utc::now() + CDuration::days(days_ahead as i64)).date_naive(),
&canteens,
)
.await
.unwrap();
HttpResponse::Ok().json(menu)
} else {
HttpResponse::BadRequest().json(json!({
"error": "Invalid canteen identifier",
"invalid": canteens.into_iter().filter_map(|c| c.err()).collect_vec()
}))
}
}

112
src/menu.rs Normal file
View File

@ -0,0 +1,112 @@
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use crate::{Canteen, Dish};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Menu {
main_dishes: Vec<Dish>,
side_dishes: Vec<Dish>,
desserts: Vec<Dish>,
}
impl Menu {
pub async fn new(day: NaiveDate, canteens: &[Canteen]) -> Result<Self, reqwest::Error> {
let mut main_dishes = Vec::new();
let mut side_dishes = Vec::new();
let mut desserts = Vec::new();
for canteen in canteens.iter().copied() {
let (main, side, des) = scrape_menu(canteen, day).await?;
for dish in main {
if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
main_dishes.push(dish);
}
}
for dish in side {
if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
side_dishes.push(dish);
}
}
for dish in des {
if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
desserts.push(dish);
}
}
}
let compare_name = |a: &Dish, b: &Dish| a.get_name().cmp(b.get_name());
main_dishes.sort_by(compare_name);
side_dishes.sort_by(compare_name);
desserts.sort_by(compare_name);
Ok(Self {
main_dishes,
side_dishes,
desserts,
})
}
pub fn get_main_dishes(&self) -> &[Dish] {
&self.main_dishes
}
pub fn get_side_dishes(&self) -> &[Dish] {
&self.side_dishes
}
pub fn get_desserts(&self) -> &[Dish] {
&self.desserts
}
}
async fn scrape_menu(
canteen: Canteen,
day: NaiveDate,
) -> Result<(Vec<Dish>, Vec<Dish>, Vec<Dish>), reqwest::Error> {
let url = canteen.get_url();
let client = reqwest::Client::new();
let request_builder = client
.post(url)
.query(&[("tx_pamensa_mensa[date]", day.format("%Y-%m-%d").to_string())]);
let response = request_builder.send().await?;
let html_content = response.text().await?;
let document = scraper::Html::parse_document(&html_content);
let html_main_dishes_selector = scraper::Selector::parse(
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
)
.unwrap();
let html_main_dishes = document.select(&html_main_dishes_selector);
let main_dishes = html_main_dishes
.filter_map(|dish| Dish::from_element(dish, canteen))
.collect::<Vec<_>>();
let html_side_dishes_selector = scraper::Selector::parse(
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
)
.unwrap();
let html_side_dishes = document.select(&html_side_dishes_selector);
let side_dishes = html_side_dishes
.filter_map(|dish| Dish::from_element(dish, canteen))
.collect::<Vec<_>>();
let html_desserts_selector = scraper::Selector::parse(
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
)
.unwrap();
let html_desserts = document.select(&html_desserts_selector);
let desserts = html_desserts
.filter_map(|dish| Dish::from_element(dish, canteen))
.collect::<Vec<_>>();
Ok((main_dishes, side_dishes, desserts))
}