implement first version
This commit is contained in:
commit
be144628c4
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
.env
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,18 @@
|
||||||
|
[package]
|
||||||
|
name = "mensa-upb-api"
|
||||||
|
description = "A web scraper api for the canteens of the University of Paderborn"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
actix-web = "4.8.0"
|
||||||
|
chrono = "0.4.38"
|
||||||
|
const_format = "0.2.32"
|
||||||
|
dotenvy = "0.15.7"
|
||||||
|
itertools = "0.13.0"
|
||||||
|
reqwest = "0.12.5"
|
||||||
|
scraper = "0.19.0"
|
||||||
|
serde = { version = "1.0.203", features = ["derive"] }
|
||||||
|
serde_json = "1.0.120"
|
||||||
|
strum = { version = "0.26.3", features = ["derive"] }
|
||||||
|
tokio = { version = "1.38.0", features = ["full"] }
|
|
@ -0,0 +1,64 @@
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use const_format::concatcp;
|
||||||
|
use strum::EnumIter;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter)]
|
||||||
|
pub enum Canteen {
|
||||||
|
Forum,
|
||||||
|
Academica,
|
||||||
|
Picknick,
|
||||||
|
BonaVista,
|
||||||
|
GrillCafe,
|
||||||
|
ZM2,
|
||||||
|
Basilica,
|
||||||
|
Atrium,
|
||||||
|
}
|
||||||
|
|
||||||
|
const POST_URL_BASE: &str = "https://www.studierendenwerk-pb.de/gastronomie/speiseplaene/";
|
||||||
|
|
||||||
|
impl Canteen {
|
||||||
|
pub fn get_url(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
Self::Forum => concatcp!(POST_URL_BASE, "forum/"),
|
||||||
|
Self::Academica => concatcp!(POST_URL_BASE, "mensa-academica/"),
|
||||||
|
Self::Picknick => concatcp!(POST_URL_BASE, "picknick/"),
|
||||||
|
Self::BonaVista => concatcp!(POST_URL_BASE, "bona-vista/"),
|
||||||
|
Self::GrillCafe => concatcp!(POST_URL_BASE, "grillcafe/"),
|
||||||
|
Self::ZM2 => concatcp!(POST_URL_BASE, "mensa-zm2/"),
|
||||||
|
Self::Basilica => concatcp!(POST_URL_BASE, "mensa-basilica-hamm/"),
|
||||||
|
Self::Atrium => concatcp!(POST_URL_BASE, "mensa-atrium-lippstadt/"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_identifier(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
Self::Forum => "forum",
|
||||||
|
Self::Academica => "academica",
|
||||||
|
Self::Picknick => "picknick",
|
||||||
|
Self::BonaVista => "bona-vista",
|
||||||
|
Self::GrillCafe => "grillcafe",
|
||||||
|
Self::ZM2 => "zm2",
|
||||||
|
Self::Basilica => "basilica",
|
||||||
|
Self::Atrium => "atrium",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for Canteen {
|
||||||
|
type Err = String;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"forum" => Ok(Self::Forum),
|
||||||
|
"academica" => Ok(Self::Academica),
|
||||||
|
"picknick" => Ok(Self::Picknick),
|
||||||
|
"bona-vista" => Ok(Self::BonaVista),
|
||||||
|
"grillcafe" => Ok(Self::GrillCafe),
|
||||||
|
"zm2" => Ok(Self::ZM2),
|
||||||
|
"basilica" => Ok(Self::Basilica),
|
||||||
|
"atrium" => Ok(Self::Atrium),
|
||||||
|
invalid => Err(format!("Invalid canteen identifier: {}", invalid)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,125 @@
|
||||||
|
use itertools::Itertools;
|
||||||
|
use scraper::ElementRef;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::Canteen;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||||
|
pub struct Dish {
|
||||||
|
name: String,
|
||||||
|
image_src: String,
|
||||||
|
price_students: Option<String>,
|
||||||
|
price_employees: Option<String>,
|
||||||
|
price_guests: Option<String>,
|
||||||
|
extras: Vec<String>,
|
||||||
|
#[serde(skip)]
|
||||||
|
canteens: Vec<Canteen>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dish {
|
||||||
|
pub fn get_name(&self) -> &str {
|
||||||
|
&self.name
|
||||||
|
}
|
||||||
|
pub fn get_price_students(&self) -> Option<&str> {
|
||||||
|
self.price_students.as_deref()
|
||||||
|
}
|
||||||
|
pub fn get_price_employees(&self) -> Option<&str> {
|
||||||
|
self.price_employees.as_deref()
|
||||||
|
}
|
||||||
|
pub fn get_price_guests(&self) -> Option<&str> {
|
||||||
|
self.price_guests.as_deref()
|
||||||
|
}
|
||||||
|
pub fn get_extras(&self) -> &[String] {
|
||||||
|
&self.extras
|
||||||
|
}
|
||||||
|
pub fn get_canteens(&self) -> &[Canteen] {
|
||||||
|
&self.canteens
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn same_as(&self, other: &Self) -> bool {
|
||||||
|
self.name == other.name
|
||||||
|
&& self.price_employees == other.price_employees
|
||||||
|
&& self.price_guests == other.price_guests
|
||||||
|
&& self.price_students == other.price_students
|
||||||
|
&& self.extras.iter().sorted().collect_vec()
|
||||||
|
== self.extras.iter().sorted().collect_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn merge(&mut self, other: Self) {
|
||||||
|
self.canteens.extend(other.canteens);
|
||||||
|
self.canteens.sort();
|
||||||
|
self.canteens.dedup();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dish {
|
||||||
|
pub fn from_element(element: ElementRef, canteen: Canteen) -> Option<Self> {
|
||||||
|
let html_name_selector = scraper::Selector::parse(".desc h4").unwrap();
|
||||||
|
let name = element
|
||||||
|
.select(&html_name_selector)
|
||||||
|
.next()?
|
||||||
|
.text()
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("")
|
||||||
|
.trim()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
let img_selector = scraper::Selector::parse(".img img").unwrap();
|
||||||
|
let img_src_path = element.select(&img_selector).next()?.value().attr("src")?;
|
||||||
|
let img_src = format!("https://www.studierendenwerk-pb.de/{}", img_src_path);
|
||||||
|
|
||||||
|
let html_price_selector = scraper::Selector::parse(".desc .price").unwrap();
|
||||||
|
let mut prices = element
|
||||||
|
.select(&html_price_selector)
|
||||||
|
.filter_map(|price| {
|
||||||
|
let price_for = price.first_child().and_then(|strong| {
|
||||||
|
strong.first_child().and_then(|text_element| {
|
||||||
|
text_element
|
||||||
|
.value()
|
||||||
|
.as_text()
|
||||||
|
.map(|text| text.trim().trim_end_matches(':').to_string())
|
||||||
|
})
|
||||||
|
});
|
||||||
|
let price_value = price.last_child().and_then(|text_element| {
|
||||||
|
text_element
|
||||||
|
.value()
|
||||||
|
.as_text()
|
||||||
|
.map(|text| text.trim().to_string())
|
||||||
|
});
|
||||||
|
price_for
|
||||||
|
.and_then(|price_for| price_value.map(|price_value| (price_for, price_value)))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").unwrap();
|
||||||
|
let extras = element
|
||||||
|
.select(&html_extras_selector)
|
||||||
|
.filter_map(|extra| extra.value().attr("title").map(|title| title.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
name,
|
||||||
|
image_src: img_src,
|
||||||
|
price_students: prices
|
||||||
|
.iter_mut()
|
||||||
|
.find(|(price_for, _)| price_for == "Studierende")
|
||||||
|
.map(|(_, price)| std::mem::take(price)),
|
||||||
|
price_employees: prices
|
||||||
|
.iter_mut()
|
||||||
|
.find(|(price_for, _)| price_for == "Bedienstete")
|
||||||
|
.map(|(_, price)| std::mem::take(price)),
|
||||||
|
price_guests: prices
|
||||||
|
.iter_mut()
|
||||||
|
.find(|(price_for, _)| price_for == "Gäste")
|
||||||
|
.map(|(_, price)| std::mem::take(price)),
|
||||||
|
extras,
|
||||||
|
canteens: vec![canteen],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Dish {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||||
|
self.name.partial_cmp(&other.name)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
mod canteen;
|
||||||
|
mod dish;
|
||||||
|
mod menu;
|
||||||
|
|
||||||
|
pub use canteen::Canteen;
|
||||||
|
pub use dish::Dish;
|
||||||
|
pub use menu::Menu;
|
|
@ -0,0 +1,69 @@
|
||||||
|
use std::{env, io, str::FromStr};
|
||||||
|
|
||||||
|
use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
|
||||||
|
use chrono::{Duration as CDuration, Utc};
|
||||||
|
use itertools::Itertools;
|
||||||
|
use mensa_upb_api::{Canteen, Menu};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::json;
|
||||||
|
use strum::IntoEnumIterator;
|
||||||
|
|
||||||
|
#[actix_web::main]
|
||||||
|
async fn main() -> io::Result<()> {
|
||||||
|
if dotenvy::dotenv().is_ok() {
|
||||||
|
println!("Loaded .env file");
|
||||||
|
}
|
||||||
|
|
||||||
|
let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string());
|
||||||
|
let port = env::var("API_PORT")
|
||||||
|
.ok()
|
||||||
|
.and_then(|p| p.parse::<u16>().ok())
|
||||||
|
.unwrap_or(8080);
|
||||||
|
|
||||||
|
HttpServer::new(|| App::new().service(index).service(menu_today))
|
||||||
|
.bind((interface.as_str(), port))?
|
||||||
|
.run()
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[get("/")]
|
||||||
|
async fn index() -> impl Responder {
|
||||||
|
HttpResponse::Ok().json(json!({
|
||||||
|
"version": env!("CARGO_PKG_VERSION"),
|
||||||
|
"description": env!("CARGO_PKG_DESCRIPTION"),
|
||||||
|
"supportedCanteens": Canteen::iter().map(|c| c.get_identifier().to_string()).collect_vec(),
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
|
||||||
|
struct MenuQuery {
|
||||||
|
#[serde(rename = "d")]
|
||||||
|
days_ahead: Option<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[get("/menu/{canteen}")]
|
||||||
|
async fn menu_today(path: web::Path<String>, query: web::Query<MenuQuery>) -> impl Responder {
|
||||||
|
let canteens = path
|
||||||
|
.into_inner()
|
||||||
|
.split(',')
|
||||||
|
.map(Canteen::from_str)
|
||||||
|
.collect_vec();
|
||||||
|
if canteens.iter().all(Result::is_ok) {
|
||||||
|
let canteens = canteens.into_iter().filter_map(Result::ok).collect_vec();
|
||||||
|
let days_ahead = query.days_ahead.unwrap_or(0);
|
||||||
|
|
||||||
|
let menu = Menu::new(
|
||||||
|
(Utc::now() + CDuration::days(days_ahead as i64)).date_naive(),
|
||||||
|
&canteens,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
HttpResponse::Ok().json(menu)
|
||||||
|
} else {
|
||||||
|
HttpResponse::BadRequest().json(json!({
|
||||||
|
"error": "Invalid canteen identifier",
|
||||||
|
"invalid": canteens.into_iter().filter_map(|c| c.err()).collect_vec()
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,112 @@
|
||||||
|
use chrono::NaiveDate;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::{Canteen, Dish};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct Menu {
|
||||||
|
main_dishes: Vec<Dish>,
|
||||||
|
side_dishes: Vec<Dish>,
|
||||||
|
desserts: Vec<Dish>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Menu {
|
||||||
|
pub async fn new(day: NaiveDate, canteens: &[Canteen]) -> Result<Self, reqwest::Error> {
|
||||||
|
let mut main_dishes = Vec::new();
|
||||||
|
let mut side_dishes = Vec::new();
|
||||||
|
let mut desserts = Vec::new();
|
||||||
|
|
||||||
|
for canteen in canteens.iter().copied() {
|
||||||
|
let (main, side, des) = scrape_menu(canteen, day).await?;
|
||||||
|
for dish in main {
|
||||||
|
if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) {
|
||||||
|
existing.merge(dish);
|
||||||
|
} else {
|
||||||
|
main_dishes.push(dish);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for dish in side {
|
||||||
|
if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) {
|
||||||
|
existing.merge(dish);
|
||||||
|
} else {
|
||||||
|
side_dishes.push(dish);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for dish in des {
|
||||||
|
if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) {
|
||||||
|
existing.merge(dish);
|
||||||
|
} else {
|
||||||
|
desserts.push(dish);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let compare_name = |a: &Dish, b: &Dish| a.get_name().cmp(b.get_name());
|
||||||
|
|
||||||
|
main_dishes.sort_by(compare_name);
|
||||||
|
side_dishes.sort_by(compare_name);
|
||||||
|
desserts.sort_by(compare_name);
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
main_dishes,
|
||||||
|
side_dishes,
|
||||||
|
desserts,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_main_dishes(&self) -> &[Dish] {
|
||||||
|
&self.main_dishes
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_side_dishes(&self) -> &[Dish] {
|
||||||
|
&self.side_dishes
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_desserts(&self) -> &[Dish] {
|
||||||
|
&self.desserts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn scrape_menu(
|
||||||
|
canteen: Canteen,
|
||||||
|
day: NaiveDate,
|
||||||
|
) -> Result<(Vec<Dish>, Vec<Dish>, Vec<Dish>), reqwest::Error> {
|
||||||
|
let url = canteen.get_url();
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let request_builder = client
|
||||||
|
.post(url)
|
||||||
|
.query(&[("tx_pamensa_mensa[date]", day.format("%Y-%m-%d").to_string())]);
|
||||||
|
let response = request_builder.send().await?;
|
||||||
|
let html_content = response.text().await?;
|
||||||
|
|
||||||
|
let document = scraper::Html::parse_document(&html_content);
|
||||||
|
|
||||||
|
let html_main_dishes_selector = scraper::Selector::parse(
|
||||||
|
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let html_main_dishes = document.select(&html_main_dishes_selector);
|
||||||
|
let main_dishes = html_main_dishes
|
||||||
|
.filter_map(|dish| Dish::from_element(dish, canteen))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let html_side_dishes_selector = scraper::Selector::parse(
|
||||||
|
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let html_side_dishes = document.select(&html_side_dishes_selector);
|
||||||
|
let side_dishes = html_side_dishes
|
||||||
|
.filter_map(|dish| Dish::from_element(dish, canteen))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let html_desserts_selector = scraper::Selector::parse(
|
||||||
|
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let html_desserts = document.select(&html_desserts_selector);
|
||||||
|
let desserts = html_desserts
|
||||||
|
.filter_map(|dish| Dish::from_element(dish, canteen))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Ok((main_dishes, side_dishes, desserts))
|
||||||
|
}
|
Loading…
Reference in New Issue