Compare commits

...

2 Commits

Author SHA1 Message Date
Moritz Hölting 94b1ffead7 combine repositories of web api and scraper 2024-11-20 21:45:30 +01:00
Moritz Hölting bc88064c82 change to use postgres db instead of scraping 2024-11-20 20:11:00 +01:00
38 changed files with 2472 additions and 1003 deletions

View File

@ -1,32 +1,4 @@
# Include any files or directories that you don't want to be copied to your
# container here (e.g., local build artifacts, temporary files, etc.).
#
# For more help, visit the .dockerignore file reference guide at
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
**/.DS_Store
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/charts
**/docker-compose*
**/compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/secrets.dev.yaml
**/values.dev.yaml
/bin
/target
LICENSE
README.md
/dev-compose.yml
.env
.gitignore

View File

@ -0,0 +1,23 @@
{
"db_name": "PostgreSQL",
"query": "INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian)\n VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)\n ON CONFLICT (date,canteen,name) DO NOTHING",
"describe": {
"columns": [],
"parameters": {
"Left": [
"Date",
"Text",
"Text",
"Text",
"Text",
"Numeric",
"Numeric",
"Numeric",
"Bool",
"Bool"
]
},
"nullable": []
},
"hash": "4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c"
}

View File

@ -0,0 +1,71 @@
{
"db_name": "PostgreSQL",
"query": "SELECT name, array_agg(DISTINCT canteen ORDER BY canteen) AS canteens, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian \n FROM meals WHERE date = $1 AND canteen = ANY($2) \n GROUP BY name, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian\n ORDER BY name",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "name",
"type_info": "Text"
},
{
"ordinal": 1,
"name": "canteens",
"type_info": "TextArray"
},
{
"ordinal": 2,
"name": "dish_type",
"type_info": "Text"
},
{
"ordinal": 3,
"name": "image_src",
"type_info": "Text"
},
{
"ordinal": 4,
"name": "price_students",
"type_info": "Numeric"
},
{
"ordinal": 5,
"name": "price_employees",
"type_info": "Numeric"
},
{
"ordinal": 6,
"name": "price_guests",
"type_info": "Numeric"
},
{
"ordinal": 7,
"name": "vegan",
"type_info": "Bool"
},
{
"ordinal": 8,
"name": "vegetarian",
"type_info": "Bool"
}
],
"parameters": {
"Left": [
"Date",
"TextArray"
]
},
"nullable": [
false,
null,
false,
true,
false,
false,
false,
false,
false
]
},
"hash": "b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028"
}

View File

@ -0,0 +1,29 @@
{
"db_name": "PostgreSQL",
"query": "SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "date",
"type_info": "Date"
},
{
"ordinal": 1,
"name": "canteen",
"type_info": "Text"
}
],
"parameters": {
"Left": [
"Date",
"Date"
]
},
"nullable": [
false,
false
]
},
"hash": "b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23"
}

1972
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,29 +1,25 @@
[package]
name = "mensa-upb-api"
description = "A web scraper api for the canteens of the University of Paderborn"
[workspace]
members = [
"scraper",
"web-api",
]
resolver = "2"
[workspace.package]
license = "MIT"
authors = ["Moritz Hölting"]
repository = "https://github.com/moritz-hoelting/mensa-upb-api"
publish = false
readme = "README.md"
version = "0.1.1"
edition = "2021"
[dependencies]
actix-cors = "0.7.0"
actix-governor = { version = "0.5.0", features = ["log"] }
actix-web = "4.8.0"
anyhow = "1.0.86"
[workspace.dependencies]
anyhow = "1.0.93"
chrono = "0.4.38"
const_format = "0.2.32"
dotenvy = "0.15.7"
futures = "0.3.30"
itertools = "0.13.0"
reqwest = "0.12.5"
scraper = "0.19.0"
serde = { version = "1.0.203", features = ["derive"] }
serde_json = "1.0.120"
strum = { version = "0.26.3", features = ["derive"] }
tokio = { version = "1.38.0", features = ["full"] }
sqlx = "0.8.2"
strum = "0.26.3"
tokio = "1.41.1"
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing-subscriber = "0.3.18"

View File

@ -1,68 +0,0 @@
# syntax=docker/dockerfile:1
################################################################################
# Create a stage for building the application.
ARG RUST_VERSION=1.79.0
ARG APP_NAME=mensa-upb-api
FROM rust:${RUST_VERSION}-slim-bullseye AS build
ARG APP_NAME
WORKDIR /app
RUN apt-get update -y && \
apt-get install -y pkg-config make g++ libssl-dev
# Build the application.
# Leverage a cache mount to /usr/local/cargo/registry/
# for downloaded dependencies and a cache mount to /app/target/ for
# compiled dependencies which will speed up subsequent builds.
# Leverage a bind mount to the src directory to avoid having to copy the
# source code into the container. Once built, copy the executable to an
# output directory before the cache mounted /app/target is unmounted.
RUN --mount=type=bind,source=src,target=src \
--mount=type=bind,source=Cargo.toml,target=Cargo.toml \
--mount=type=bind,source=Cargo.lock,target=Cargo.lock \
--mount=type=cache,target=/app/target/ \
--mount=type=cache,target=/usr/local/cargo/registry/ \
<<EOF
set -e
cargo build --locked --release
cp ./target/release/$APP_NAME /bin/server
EOF
################################################################################
# Create a new stage for running the application that contains the minimal
# runtime dependencies for the application. This often uses a different base
# image from the build stage where the necessary files are copied from the build
# stage.
FROM debian:bullseye-slim AS final
# Install ca certificates
RUN apt-get update -y && \
apt-get install -y ca-certificates
# Create a non-privileged user that the app will run under.
# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
ARG UID=10001
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--shell "/sbin/nologin" \
--no-create-home \
--uid "${UID}" \
appuser
USER appuser
# Copy the executable from the "build" stage.
COPY --from=build /bin/server /bin/
# Set the environment variable to listen on all interfaces.
ENV API_INTERFACE=0.0.0.0
# Expose the port that the application listens on.
EXPOSE 8080
# What the container should run when it is started.
CMD ["/bin/server"]

View File

@ -1,9 +1,39 @@
services:
server:
api:
build:
context: .
target: final
dockerfile: ./web-api/Dockerfile
image: mensa-upb-api:latest
ports:
- 8080:8080
environment:
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb/postgres
- "RUST_LOG=none,mensa_upb_api=info"
- TZ=Europe/Berlin
depends_on:
- postgres
scraper:
build:
context: .
dockerfile: ./scraper/Dockerfile
image: mensa-upb-scraper:latest
environment:
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb/postgres
- "RUST_LOG=none,mensa_upb_scraper=info"
- TZ=Europe/Berlin
depends_on:
- postgres
postgres:
container_name: postgres-mensa-upb
image: postgres:17-alpine
environment:
- POSTGRES_USER=pguser
- POSTGRES_PASSWORD=pgpass
- POSTGRES_DB=postgres
volumes:
- db:/var/lib/postgresql/data
volumes:
db:

14
dev-compose.yml Normal file
View File

@ -0,0 +1,14 @@
services:
postgres:
image: postgres:17-alpine
environment:
- POSTGRES_USER=pguser
- POSTGRES_PASSWORD=pgpass
- POSTGRES_DB=postgres
ports:
- "5432:5432"
volumes:
- db:/var/lib/postgresql/data
volumes:
db:

View File

@ -0,0 +1,3 @@
-- Add down migration script here
DROP TABLE meals;

View File

@ -0,0 +1,15 @@
-- Add up migration script here
CREATE TABLE IF NOT EXISTS meals(
date DATE NOT NULL,
canteen TEXT NOT NULL,
name TEXT NOT NULL,
dish_type TEXT NOT NULL,
image_src TEXT,
price_students DECIMAL(5, 2) NOT NULL,
price_employees DECIMAL(5, 2) NOT NULL,
price_guests DECIMAL(5, 2) NOT NULL,
vegan BOOLEAN DEFAULT FALSE,
vegetarian BOOLEAN DEFAULT FALSE,
PRIMARY KEY (date, canteen, name)
);

View File

@ -0,0 +1,4 @@
-- Add down migration script here
ALTER TABLE meals ALTER COLUMN vegan DROP NOT NULL;
ALTER TABLE meals ALTER COLUMN vegetarian DROP NOT NULL;

View File

@ -0,0 +1,11 @@
-- Add up migration script here
ALTER TABLE meals
ALTER COLUMN vegan TYPE BOOLEAN USING (COALESCE(vegan, FALSE)),
ALTER COLUMN vegan SET DEFAULT FALSE,
ALTER COLUMN vegan SET NOT NULL;
ALTER TABLE meals
ALTER COLUMN vegetarian TYPE BOOLEAN USING (COALESCE(vegetarian, FALSE)),
ALTER COLUMN vegetarian SET DEFAULT FALSE,
ALTER COLUMN vegetarian SET NOT NULL

2
scraper/.dockerignore Normal file
View File

@ -0,0 +1,2 @@
.env
.gitignore

2
scraper/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
.env

26
scraper/Cargo.toml Normal file
View File

@ -0,0 +1,26 @@
[package]
name = "mensa-upb-scraper"
description = "A web scraper for the canteens of the University of Paderborn"
license.workspace = true
authors.workspace = true
repository.workspace = true
readme.workspace = true
version = "0.1.0"
edition = "2021"
publish = false
[dependencies]
anyhow = { workspace = true }
chrono = { workspace = true }
const_format = "0.2.33"
dotenvy = { workspace = true }
futures = "0.3.31"
itertools = { workspace = true }
num-bigint = "0.4.6"
reqwest = { version = "0.12.9", default-features = false, features = ["charset", "rustls-tls", "http2"] }
scraper = "0.21.0"
sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
strum = { workspace = true, features = ["derive"] }
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
tracing = { workspace = true }
tracing-subscriber = { workspace = true, features = ["fmt", "std", "env-filter", "registry", "json", "tracing-log"] }

28
scraper/Dockerfile Normal file
View File

@ -0,0 +1,28 @@
FROM rust:latest AS chef
RUN cargo install cargo-chef
WORKDIR /app
FROM chef AS planner
COPY . .
RUN OFFLINE=true cargo chef prepare --bin mensa-upb-scraper --recipe-path recipe.json
FROM chef AS builder
COPY --from=planner /app/recipe.json recipe.json
RUN cargo chef cook --bin mensa-upb-scraper --release --recipe-path recipe.json
COPY . .
RUN OFFLINE=true cargo build --bin mensa-upb-scraper --release
FROM debian:bookworm-slim AS runtime
WORKDIR /app
RUN apt-get update -y && \
apt-get install -y ca-certificates cron
RUN echo "0 0 * * * /app/mensa-upb-scraper >> /var/log/cron.log 2>&1" > /etc/cron.d/mensa_upb_scraper
RUN chmod 0644 /etc/cron.d/mensa_upb_scraper
RUN crontab /etc/cron.d/mensa_upb_scraper
RUN touch /var/log/cron.log
COPY --from=builder /app/target/release/mensa-upb-scraper /app/mensa-upb-scraper
CMD env > /etc/environment && cron && tail -f /var/log/cron.log

23
scraper/compose.yml Normal file
View File

@ -0,0 +1,23 @@
services:
scraper:
build: .
image: mensa-upb-scraper:latest
environment:
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-scraper/postgres
- "RUST_LOG=none,mensa_upb_scraper=info"
- TZ=Europe/Berlin
depends_on:
- postgres
postgres:
container_name: postgres-mensa-upb-scraper
image: postgres:17-alpine
environment:
- POSTGRES_USER=pguser
- POSTGRES_PASSWORD=pgpass
- POSTGRES_DB=postgres
volumes:
- db:/var/lib/postgresql/data
volumes:
db:

View File

@ -1,12 +1,9 @@
use std::str::FromStr;
use const_format::concatcp;
use serde::{Deserialize, Serialize};
use strum::EnumIter;
#[derive(
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash, Serialize, Deserialize,
)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash)]
pub enum Canteen {
Forum,
Academica,

View File

@ -1,10 +1,9 @@
use std::fmt::Display;
use itertools::Itertools;
use scraper::ElementRef;
use serde::{Deserialize, Serialize};
use crate::Canteen;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Dish {
name: String,
image_src: Option<String>,
@ -12,7 +11,7 @@ pub struct Dish {
price_employees: Option<String>,
price_guests: Option<String>,
extras: Vec<String>,
canteens: Vec<Canteen>,
dish_type: DishType,
}
impl Dish {
@ -28,11 +27,20 @@ impl Dish {
pub fn get_price_guests(&self) -> Option<&str> {
self.price_guests.as_deref()
}
pub fn get_image_src(&self) -> Option<&str> {
self.image_src.as_deref()
}
pub fn is_vegan(&self) -> bool {
self.extras.contains(&"vegan".to_string())
}
pub fn is_vegetarian(&self) -> bool {
self.extras.contains(&"vegetarian".to_string())
}
pub fn get_extras(&self) -> &[String] {
&self.extras
}
pub fn get_canteens(&self) -> &[Canteen] {
&self.canteens
pub fn get_type(&self) -> DishType {
self.dish_type
}
pub fn same_as(&self, other: &Self) -> bool {
@ -44,15 +52,7 @@ impl Dish {
== self.extras.iter().sorted().collect_vec()
}
pub fn merge(&mut self, other: Self) {
self.canteens.extend(other.canteens);
self.canteens.sort();
self.canteens.dedup();
}
}
impl Dish {
pub fn from_element(element: ElementRef, canteen: Canteen) -> Option<Self> {
pub fn from_element(element: ElementRef, dish_type: DishType) -> Option<Self> {
let html_name_selector = scraper::Selector::parse(".desc h4").ok()?;
let name = element
.select(&html_name_selector)
@ -115,7 +115,7 @@ impl Dish {
.find(|(price_for, _)| price_for == "Gäste")
.map(|(_, price)| std::mem::take(price)),
extras,
canteens: vec![canteen],
dish_type,
})
}
}
@ -125,3 +125,21 @@ impl PartialOrd for Dish {
self.name.partial_cmp(&other.name)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DishType {
Main,
Side,
Dessert,
}
impl Display for DishType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::Main => "main",
Self::Side => "side",
Self::Dessert => "dessert",
};
f.write_str(s)
}
}

View File

@ -1,14 +1,12 @@
mod cache;
mod canteen;
mod dish;
mod menu;
pub mod util;
use std::{error::Error, fmt::Display};
pub use cache::MenuCache;
pub use canteen::Canteen;
pub use dish::Dish;
pub use menu::Menu;
#[derive(Debug, Clone)]
struct CustomError(String);

65
scraper/src/main.rs Normal file
View File

@ -0,0 +1,65 @@
use std::{collections::HashSet, env};
use anyhow::Result;
use chrono::{Duration, Utc};
use itertools::Itertools as _;
use mensa_upb_scraper::{util, Canteen};
use strum::IntoEnumIterator;
#[tokio::main]
async fn main() -> Result<()> {
dotenvy::dotenv().ok();
let db = util::get_db()?;
tracing_subscriber::fmt::init();
sqlx::migrate!("../migrations").run(&db).await?;
tracing::info!("Starting up...");
let start_date = Utc::now().date_naive();
let end_date = (Utc::now() + Duration::days(6)).date_naive();
let already_scraped = sqlx::query!(
"SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2",
start_date,
end_date
)
.fetch_all(&db)
.await?
.into_iter()
.map(|r| {
(
r.date,
r.canteen.parse::<Canteen>().expect("Invalid db entry"),
)
})
.collect::<HashSet<_>>();
let filter_canteens = env::var("FILTER_CANTEENS")
.ok()
.map(|s| {
s.split(',')
.filter_map(|el| el.parse::<Canteen>().ok())
.collect::<HashSet<_>>()
})
.unwrap_or_default();
let date_canteen_combinations = (0..7)
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
.cartesian_product(Canteen::iter())
.filter(|entry| !filter_canteens.contains(&entry.1) && !already_scraped.contains(entry))
.collect::<Vec<_>>();
util::async_for_each(&date_canteen_combinations, |(date, canteen, menu)| {
let db = db.clone();
async move {
util::add_menu_to_db(&db, &date, canteen, menu).await;
}
})
.await;
tracing::info!("Finished scraping menu");
Ok(())
}

56
scraper/src/menu.rs Normal file
View File

@ -0,0 +1,56 @@
use anyhow::Result;
use chrono::NaiveDate;
use crate::{dish::DishType, Canteen, CustomError, Dish};
#[tracing::instrument]
pub async fn scrape_menu(date: &NaiveDate, canteen: Canteen) -> Result<Vec<Dish>> {
tracing::debug!("Starting scraping");
let url = canteen.get_url();
let client = reqwest::Client::new();
let request_builder = client.post(url).query(&[(
"tx_pamensa_mensa[date]",
date.format("%Y-%m-%d").to_string(),
)]);
let response = request_builder.send().await?;
let html_content = response.text().await?;
let document = scraper::Html::parse_document(&html_content);
let html_main_dishes_selector = scraper::Selector::parse(
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
)
.map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_main_dishes = document.select(&html_main_dishes_selector);
let main_dishes = html_main_dishes
.filter_map(|dish| Dish::from_element(dish, DishType::Main))
.collect::<Vec<_>>();
let html_side_dishes_selector = scraper::Selector::parse(
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
)
.map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_side_dishes = document.select(&html_side_dishes_selector);
let side_dishes = html_side_dishes
.filter_map(|dish| Dish::from_element(dish, DishType::Side))
.collect::<Vec<_>>();
let html_desserts_selector = scraper::Selector::parse(
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
)
.map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_desserts = document.select(&html_desserts_selector);
let desserts = html_desserts
.filter_map(|dish| Dish::from_element(dish, DishType::Dessert))
.collect::<Vec<_>>();
let mut res = Vec::new();
res.extend(main_dishes);
res.extend(side_dishes);
res.extend(desserts);
tracing::debug!("Finished scraping");
Ok(res)
}

64
scraper/src/util.rs Normal file
View File

@ -0,0 +1,64 @@
use std::{env, future::Future};
use anyhow::Result;
use chrono::NaiveDate;
use futures::StreamExt as _;
use num_bigint::BigInt;
use sqlx::{postgres::PgPoolOptions, types::BigDecimal, PgPool};
use crate::{menu::scrape_menu, Canteen, Dish};
pub async fn async_for_each<F, Fut>(date_canteen_combinations: &[(NaiveDate, Canteen)], f: F)
where
F: FnMut((NaiveDate, Canteen, Vec<Dish>)) -> Fut,
Fut: Future<Output = ()>,
{
futures::stream::iter(date_canteen_combinations)
.then(|(date, canteen)| async move { (*date, *canteen, scrape_menu(date, *canteen).await) })
.filter_map(|(date, canteen, menu)| async move { menu.ok().map(|menu| (date, canteen, menu)) })
.for_each(f)
.await;
}
pub fn get_db() -> Result<PgPool> {
Ok(PgPoolOptions::new()
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?)
}
#[tracing::instrument(skip(db))]
pub async fn add_meal_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, dish: &Dish) -> Result<()> {
let vegan = dish.is_vegan();
sqlx::query!(
"INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
ON CONFLICT (date,canteen,name) DO NOTHING",
date, canteen.get_identifier(), dish.get_name(),
dish.get_type().to_string(), dish.get_image_src(),
price_to_bigdecimal(dish.get_price_students()),
price_to_bigdecimal(dish.get_price_employees()),
price_to_bigdecimal(dish.get_price_guests()),
vegan, vegan || dish.is_vegetarian()
).execute(db).await.inspect_err(|e| {
tracing::error!("error during database insert: {}", e);
})?;
tracing::trace!("Insert to DB successfull");
Ok(())
}
pub async fn add_menu_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, menu: Vec<Dish>) {
futures::stream::iter(menu)
.for_each(|dish| async move {
if !dish.get_name().is_empty() {
add_meal_to_db(db, date, canteen, &dish).await.ok();
}
})
.await;
}
pub fn price_to_bigdecimal(s: Option<&str>) -> BigDecimal {
s.and_then(|p| p.trim_end_matches("").replace(',', ".").parse().ok())
.unwrap_or_else(|| BigDecimal::new(BigInt::from(99999), 2))
}

View File

@ -1,64 +0,0 @@
use std::{collections::HashMap, sync::Arc};
use chrono::{NaiveDate, Utc};
use futures::StreamExt;
use itertools::Itertools;
use tokio::sync::RwLock;
use tracing::{debug, instrument};
use crate::{Canteen, Menu};
#[derive(Debug, Clone, Default)]
pub struct MenuCache {
cache: Arc<RwLock<HashMap<(NaiveDate, Canteen), Menu>>>,
}
impl MenuCache {
pub async fn get_combined(&self, canteens: &[Canteen], date: NaiveDate) -> Menu {
futures::stream::iter(canteens)
.then(|canteen| async move { self.get(*canteen, date).await })
.filter_map(|c| async { c })
.fold(Menu::default(), |a, b| async move { a.merged(b) })
.await
}
#[instrument(skip(self))]
pub async fn get(&self, canteen: Canteen, date: NaiveDate) -> Option<Menu> {
let query = (date, canteen);
let (is_in_cache, is_cache_too_large) = {
let cache = self.cache.read().await;
(cache.contains_key(&query), cache.len() > 100)
};
if is_cache_too_large {
self.clean_outdated().await;
}
if is_in_cache {
let cache = self.cache.read().await;
Some(cache.get(&query)?.clone())
} else {
debug!("Not in cache, fetching from network");
let menu = Menu::new(date, canteen).await.ok()?;
self.cache.write().await.insert(query, menu.clone());
Some(menu)
}
}
pub async fn clean_outdated(&self) {
let today = Utc::now().date_naive();
let outdated_keys = self
.cache
.read()
.await
.keys()
.map(|x| x.to_owned())
.filter(|(date, _)| date < &today)
.collect_vec();
let mut cache = self.cache.write().await;
for key in outdated_keys {
cache.remove(&key);
}
}
}

View File

@ -1,132 +0,0 @@
use std::{env, io, str::FromStr};
use actix_cors::Cors;
use actix_governor::{Governor, GovernorConfigBuilder};
use actix_web::{get, web, App, HttpResponse, HttpServer, Responder};
use chrono::{Duration as CDuration, Utc};
use itertools::Itertools;
use mensa_upb_api::{Canteen, MenuCache};
use serde::{Deserialize, Serialize};
use serde_json::json;
use strum::IntoEnumIterator;
use tracing::{debug, error, info, level_filters::LevelFilter};
use tracing_subscriber::EnvFilter;
#[tokio::main]
async fn main() -> io::Result<()> {
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::WARN.into())
.from_env()
.expect("Invalid filter")
.add_directive("mensa_upb_api=debug".parse().unwrap());
tracing_subscriber::fmt().with_env_filter(env_filter).init();
match dotenvy::dotenv() {
Ok(_) => debug!("Loaded .env file"),
Err(dotenvy::Error::LineParse(..)) => error!("Malformed .env file"),
Err(_) => {}
}
let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string());
let port = env::var("API_PORT")
.ok()
.and_then(|p| p.parse::<u16>().ok())
.unwrap_or(8080);
let seconds_replenish = env::var("API_RATE_LIMIT_SECONDS")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(5);
let burst_size = env::var("API_RATE_LIMIT_BURST")
.ok()
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(5);
let allowed_cors = env::var("API_CORS_ALLOWED")
.map(|val| {
val.split(',')
.map(|domain| domain.trim().to_string())
.collect_vec()
})
.ok()
.unwrap_or_default();
let governor_conf = GovernorConfigBuilder::default()
.per_second(seconds_replenish)
.burst_size(burst_size)
.finish()
.unwrap();
let menu_cache = MenuCache::default();
info!("Starting server on {}:{}", interface, port);
HttpServer::new(move || {
let cors = allowed_cors
.iter()
.fold(Cors::default(), |cors, domain| cors.allowed_origin(domain))
.send_wildcard()
.allow_any_method()
.allow_any_header()
.max_age(3600);
App::new()
.wrap(Governor::new(&governor_conf))
.wrap(cors)
.app_data(web::Data::new(menu_cache.clone()))
.service(index)
.service(menu_today)
})
.bind((interface.as_str(), port))?
.run()
.await
}
#[get("/")]
async fn index() -> impl Responder {
HttpResponse::Ok().json(json!({
"version": env!("CARGO_PKG_VERSION"),
"description": env!("CARGO_PKG_DESCRIPTION"),
"supportedCanteens": Canteen::iter().map(|c| c.get_identifier().to_string()).collect_vec(),
}))
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
struct MenuQuery {
#[serde(rename = "d")]
days_ahead: Option<String>,
}
#[get("/menu/{canteen}")]
async fn menu_today(
cache: web::Data<MenuCache>,
path: web::Path<String>,
query: web::Query<MenuQuery>,
) -> impl Responder {
let canteens = path
.into_inner()
.split(',')
.map(Canteen::from_str)
.collect_vec();
if canteens.iter().all(Result::is_ok) {
let canteens = canteens.into_iter().filter_map(Result::ok).collect_vec();
let days_ahead = query
.days_ahead
.as_ref()
.map_or(Ok(0), |d| d.parse::<i64>());
if let Ok(days_ahead) = days_ahead {
let date = (Utc::now() + CDuration::days(days_ahead)).date_naive();
let menu = cache.get_combined(&canteens, date).await;
HttpResponse::Ok().json(menu)
} else {
HttpResponse::BadRequest().json(json!({
"error": "Invalid days query"
}))
}
} else {
HttpResponse::BadRequest().json(json!({
"error": "Invalid canteen identifier",
"invalid": canteens.into_iter().filter_map(|c| c.err()).collect_vec()
}))
}
}

View File

@ -1,113 +0,0 @@
use anyhow::Result;
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use crate::{Canteen, CustomError, Dish};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Menu {
main_dishes: Vec<Dish>,
side_dishes: Vec<Dish>,
desserts: Vec<Dish>,
}
impl Menu {
pub async fn new(day: NaiveDate, canteen: Canteen) -> Result<Self> {
scrape_menu(canteen, day).await
}
pub fn get_main_dishes(&self) -> &[Dish] {
&self.main_dishes
}
pub fn get_side_dishes(&self) -> &[Dish] {
&self.side_dishes
}
pub fn get_desserts(&self) -> &[Dish] {
&self.desserts
}
pub fn merged(self, other: Self) -> Self {
let mut main_dishes = self.main_dishes;
let mut side_dishes = self.side_dishes;
let mut desserts = self.desserts;
for dish in other.main_dishes {
if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
main_dishes.push(dish);
}
}
for dish in other.side_dishes {
if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
side_dishes.push(dish);
}
}
for dish in other.desserts {
if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
desserts.push(dish);
}
}
main_dishes.sort_by(|a, b| a.get_name().cmp(b.get_name()));
side_dishes.sort_by(|a, b| a.get_name().cmp(b.get_name()));
desserts.sort_by(|a, b| a.get_name().cmp(b.get_name()));
Self {
main_dishes,
side_dishes,
desserts,
}
}
}
async fn scrape_menu(canteen: Canteen, day: NaiveDate) -> Result<Menu> {
let url = canteen.get_url();
let client = reqwest::Client::new();
let request_builder = client
.post(url)
.query(&[("tx_pamensa_mensa[date]", day.format("%Y-%m-%d").to_string())]);
let response = request_builder.send().await?;
let html_content = response.text().await?;
let document = scraper::Html::parse_document(&html_content);
let html_main_dishes_selector = scraper::Selector::parse(
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
)
.map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_main_dishes = document.select(&html_main_dishes_selector);
let main_dishes = html_main_dishes
.filter_map(|dish| Dish::from_element(dish, canteen))
.collect::<Vec<_>>();
let html_side_dishes_selector = scraper::Selector::parse(
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
)
.map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_side_dishes = document.select(&html_side_dishes_selector);
let side_dishes = html_side_dishes
.filter_map(|dish| Dish::from_element(dish, canteen))
.collect::<Vec<_>>();
let html_desserts_selector = scraper::Selector::parse(
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
)
.map_err(|_| CustomError::from("Failed to parse selector"))?;
let html_desserts = document.select(&html_desserts_selector);
let desserts = html_desserts
.filter_map(|dish| Dish::from_element(dish, canteen))
.collect::<Vec<_>>();
Ok(Menu {
main_dishes,
side_dishes,
desserts,
})
}

32
web-api/.dockerignore Normal file
View File

@ -0,0 +1,32 @@
# Include any files or directories that you don't want to be copied to your
# container here (e.g., local build artifacts, temporary files, etc.).
#
# For more help, visit the .dockerignore file reference guide at
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
**/.DS_Store
**/.classpath
**/.dockerignore
**/.env
**/.git
**/.gitignore
**/.project
**/.settings
**/.toolstarget
**/.vs
**/.vscode
**/*.*proj.user
**/*.dbmdl
**/*.jfm
**/charts
**/docker-compose*
**/compose*
**/Dockerfile*
**/node_modules
**/npm-debug.log
**/secrets.dev.yaml
**/values.dev.yaml
/bin
/target
LICENSE
README.md

27
web-api/Cargo.toml Normal file
View File

@ -0,0 +1,27 @@
[package]
name = "mensa-upb-api"
description = "A web api for a local database of the canteens of the University of Paderborn"
license.workspace = true
authors.workspace = true
repository.workspace = true
readme.workspace = true
version = "0.2.0"
edition = "2021"
publish = false
[dependencies]
actix-cors = "0.7.0"
actix-governor = { version = "0.7.0", features = ["log"] }
actix-web = "4.9.0"
anyhow = { workspace = true }
bigdecimal = { version = "0.4.6", features = ["serde"] }
chrono = { workspace = true, features = ["serde"] }
dotenvy = { workspace = true }
itertools = { workspace = true }
serde = { version = "1.0.215", features = ["derive"] }
serde_json = "1.0.133"
sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
strum = { workspace = true, features = ["derive"] }
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
tracing = "0.1.40"
tracing-subscriber = { workspace = true, features = ["env-filter"] }

36
web-api/Dockerfile Normal file
View File

@ -0,0 +1,36 @@
FROM rust:latest AS chef
RUN cargo install cargo-chef
WORKDIR /app
FROM chef AS planner
COPY . .
RUN OFFLINE=true cargo chef prepare --bin mensa-upb-api --recipe-path recipe.json
FROM chef AS builder
COPY --from=planner /app/recipe.json recipe.json
RUN cargo chef cook --bin mensa-upb-api --release --recipe-path recipe.json
COPY . .
RUN OFFLINE=true cargo build --bin mensa-upb-api --release
FROM debian:bookworm-slim AS runtime
ARG UID=10001
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--shell "/sbin/nologin" \
--no-create-home \
--uid "${UID}" \
appuser
USER appuser
COPY --from=builder /app/target/release/mensa-upb-api /bin/mensa-upb-api
ENV API_INTERFACE=0.0.0.0
EXPOSE 8080
# What the container should run when it is started.
CMD ["/bin/mensa-upb-api"]

27
web-api/compose.yml Normal file
View File

@ -0,0 +1,27 @@
services:
api:
build: .
image: mensa-upb-api:latest
ports:
- 8080:8080
environment:
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-api/postgres
- "RUST_LOG=none,mensa_upb_api=info"
- TZ=Europe/Berlin
depends_on:
- postgres
postgres:
container_name: postgres-mensa-upb-api
image: postgres:17-alpine
environment:
- POSTGRES_USER=pguser
- POSTGRES_PASSWORD=pgpass
- POSTGRES_DB=postgres
volumes:
- db:/var/lib/postgresql/data
volumes:
db:

51
web-api/src/canteen.rs Normal file
View File

@ -0,0 +1,51 @@
use std::str::FromStr;
use serde::{Deserialize, Serialize};
use strum::EnumIter;
#[derive(
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash, Serialize, Deserialize,
)]
pub enum Canteen {
Forum,
Academica,
Picknick,
BonaVista,
GrillCafe,
ZM2,
Basilica,
Atrium,
}
impl Canteen {
pub fn get_identifier(&self) -> &str {
match self {
Self::Forum => "forum",
Self::Academica => "academica",
Self::Picknick => "picknick",
Self::BonaVista => "bona-vista",
Self::GrillCafe => "grillcafe",
Self::ZM2 => "zm2",
Self::Basilica => "basilica",
Self::Atrium => "atrium",
}
}
}
impl FromStr for Canteen {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"forum" => Ok(Self::Forum),
"academica" => Ok(Self::Academica),
"picknick" => Ok(Self::Picknick),
"bona-vista" => Ok(Self::BonaVista),
"grillcafe" => Ok(Self::GrillCafe),
"zm2" => Ok(Self::ZM2),
"basilica" => Ok(Self::Basilica),
"atrium" => Ok(Self::Atrium),
invalid => Err(format!("Invalid canteen identifier: {}", invalid)),
}
}
}

42
web-api/src/dish.rs Normal file
View File

@ -0,0 +1,42 @@
use bigdecimal::BigDecimal;
use serde::{Deserialize, Serialize};
use crate::Canteen;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Dish {
pub name: String,
pub image_src: Option<String>,
pub price: DishPrices,
pub vegetarian: bool,
pub vegan: bool,
pub canteens: Vec<Canteen>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct DishPrices {
pub students: BigDecimal,
pub employees: BigDecimal,
pub guests: BigDecimal,
}
impl Dish {
pub fn same_as(&self, other: &Self) -> bool {
self.name == other.name
&& self.price == other.price
&& self.vegan == other.vegan
&& self.vegetarian == other.vegetarian
}
pub fn merge(&mut self, other: Self) {
self.canteens.extend(other.canteens);
self.canteens.sort();
self.canteens.dedup();
}
}
impl PartialOrd for Dish {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.name.partial_cmp(&other.name)
}
}

View File

@ -0,0 +1,49 @@
use std::str::FromStr as _;
use actix_web::{get, web, HttpResponse, Responder};
use chrono::NaiveDate;
use itertools::Itertools as _;
use serde::{Deserialize, Serialize};
use serde_json::json;
use sqlx::PgPool;
use crate::{Canteen, Menu};
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
struct MenuQuery {
date: Option<NaiveDate>,
}
#[get("/menu/{canteen}")]
async fn menu(
path: web::Path<String>,
query: web::Query<MenuQuery>,
db: web::Data<PgPool>,
) -> impl Responder {
let canteens = path
.into_inner()
.split(',')
.map(Canteen::from_str)
.collect_vec();
if canteens.iter().all(Result::is_ok) {
let canteens = canteens.into_iter().filter_map(Result::ok).collect_vec();
let date = query
.date
.unwrap_or_else(|| chrono::Local::now().date_naive());
let menu = Menu::query(&db, date, &canteens).await;
if let Ok(menu) = menu {
HttpResponse::Ok().json(menu)
} else {
HttpResponse::InternalServerError().json(json!({
"error": "Failed to query database",
}))
}
} else {
HttpResponse::BadRequest().json(json!({
"error": "Invalid canteen identifier",
"invalid": canteens.into_iter().filter_map(|c| c.err()).collect_vec()
}))
}
}

View File

@ -0,0 +1,22 @@
use actix_web::{get, web::ServiceConfig, HttpResponse, Responder};
use itertools::Itertools as _;
use serde_json::json;
use strum::IntoEnumIterator as _;
use crate::Canteen;
mod menu;
pub fn configure(cfg: &mut ServiceConfig) {
cfg.service(index);
cfg.service(menu::menu);
}
#[get("/")]
async fn index() -> impl Responder {
HttpResponse::Ok().json(json!({
"version": env!("CARGO_PKG_VERSION"),
"description": env!("CARGO_PKG_DESCRIPTION"),
"supportedCanteens": Canteen::iter().map(|c| c.get_identifier().to_string()).collect_vec(),
}))
}

33
web-api/src/lib.rs Normal file
View File

@ -0,0 +1,33 @@
mod canteen;
mod dish;
pub mod endpoints;
mod menu;
use std::{error::Error, fmt::Display};
pub use canteen::Canteen;
pub use dish::{Dish, DishPrices};
pub use menu::Menu;
#[derive(Debug, Clone)]
struct CustomError(String);
impl Display for CustomError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Error for CustomError {}
impl From<&str> for CustomError {
fn from(s: &str) -> Self {
CustomError(s.to_string())
}
}
impl From<String> for CustomError {
fn from(s: String) -> Self {
CustomError(s)
}
}

82
web-api/src/main.rs Normal file
View File

@ -0,0 +1,82 @@
use std::env;
use actix_cors::Cors;
use actix_governor::{Governor, GovernorConfigBuilder};
use actix_web::{web, App, HttpServer};
use anyhow::Result;
use itertools::Itertools;
use sqlx::postgres::PgPoolOptions;
use tracing::{debug, error, info, level_filters::LevelFilter};
use tracing_subscriber::EnvFilter;
#[tokio::main]
async fn main() -> Result<()> {
let env_filter = EnvFilter::builder()
.with_default_directive(LevelFilter::WARN.into())
.from_env()
.expect("Invalid filter")
.add_directive("mensa_upb_api=debug".parse().unwrap());
tracing_subscriber::fmt().with_env_filter(env_filter).init();
match dotenvy::dotenv() {
Ok(_) => debug!("Loaded .env file"),
Err(dotenvy::Error::LineParse(..)) => error!("Malformed .env file"),
Err(_) => {}
}
let db = PgPoolOptions::new()
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?;
sqlx::migrate!("../migrations").run(&db).await?;
let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string());
let port = env::var("API_PORT")
.ok()
.and_then(|p| p.parse::<u16>().ok())
.unwrap_or(8080);
let seconds_replenish = env::var("API_RATE_LIMIT_SECONDS")
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(5);
let burst_size = env::var("API_RATE_LIMIT_BURST")
.ok()
.and_then(|s| s.parse::<u32>().ok())
.unwrap_or(5);
let allowed_cors = env::var("API_CORS_ALLOWED")
.map(|val| {
val.split(',')
.map(|domain| domain.trim().to_string())
.collect_vec()
})
.ok()
.unwrap_or_default();
let governor_conf = GovernorConfigBuilder::default()
.seconds_per_request(seconds_replenish)
.burst_size(burst_size)
.finish()
.unwrap();
info!("Starting server on {}:{}", interface, port);
HttpServer::new(move || {
let cors = allowed_cors
.iter()
.fold(Cors::default(), |cors, domain| cors.allowed_origin(domain))
.send_wildcard()
.allow_any_method()
.allow_any_header()
.max_age(3600);
App::new()
.wrap(Governor::new(&governor_conf))
.wrap(cors)
.app_data(web::Data::new(db.clone()))
.configure(mensa_upb_api::endpoints::configure)
})
.bind((interface.as_str(), port))?
.run()
.await?;
Ok(())
}

116
web-api/src/menu.rs Normal file
View File

@ -0,0 +1,116 @@
use std::str::FromStr as _;
use chrono::NaiveDate;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use crate::{Canteen, Dish, DishPrices};
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Menu {
date: NaiveDate,
main_dishes: Vec<Dish>,
side_dishes: Vec<Dish>,
desserts: Vec<Dish>,
}
impl Menu {
pub async fn query(db: &PgPool, date: NaiveDate, canteens: &[Canteen]) -> sqlx::Result<Self> {
let canteens = canteens
.iter()
.map(|c| c.get_identifier().to_string())
.collect::<Vec<_>>();
let result = sqlx::query!("SELECT name, array_agg(DISTINCT canteen ORDER BY canteen) AS canteens, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian
FROM meals WHERE date = $1 AND canteen = ANY($2)
GROUP BY name, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian
ORDER BY name",
date, &canteens)
.fetch_all(db)
.await?;
let mut main_dishes = Vec::new();
let mut side_dishes = Vec::new();
let mut desserts = Vec::new();
for row in result {
let dish = Dish {
name: row.name,
image_src: row.image_src,
canteens: row.canteens.map_or_else(Vec::new, |canteens| {
canteens
.iter()
.map(|canteen| Canteen::from_str(canteen).expect("Invalid database entry"))
.collect()
}),
vegan: row.vegan,
vegetarian: row.vegetarian,
price: DishPrices {
students: row.price_students.with_prec(5).with_scale(2),
employees: row.price_employees.with_prec(5).with_scale(2),
guests: row.price_guests.with_prec(5).with_scale(2),
},
};
if row.dish_type == "main" {
main_dishes.push(dish);
} else if row.dish_type == "side" {
side_dishes.push(dish);
} else if row.dish_type == "dessert" {
desserts.push(dish);
}
}
Ok(Self {
date,
main_dishes,
side_dishes,
desserts,
})
}
pub fn get_main_dishes(&self) -> &[Dish] {
&self.main_dishes
}
pub fn get_side_dishes(&self) -> &[Dish] {
&self.side_dishes
}
pub fn get_desserts(&self) -> &[Dish] {
&self.desserts
}
pub fn merged(self, other: Self) -> Self {
let mut main_dishes = self.main_dishes;
let mut side_dishes = self.side_dishes;
let mut desserts = self.desserts;
for dish in other.main_dishes {
if let Some(existing) = main_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
main_dishes.push(dish);
}
}
for dish in other.side_dishes {
if let Some(existing) = side_dishes.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
side_dishes.push(dish);
}
}
for dish in other.desserts {
if let Some(existing) = desserts.iter_mut().find(|d| dish.same_as(d)) {
existing.merge(dish);
} else {
desserts.push(dish);
}
}
Self {
date: self.date,
main_dishes,
side_dishes,
desserts,
}
}
}