combine repositories of web api and scraper
This commit is contained in:
parent
bc88064c82
commit
94b1ffead7
|
@ -1,32 +1,4 @@
|
||||||
# Include any files or directories that you don't want to be copied to your
|
|
||||||
# container here (e.g., local build artifacts, temporary files, etc.).
|
|
||||||
#
|
|
||||||
# For more help, visit the .dockerignore file reference guide at
|
|
||||||
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
|
||||||
|
|
||||||
**/.DS_Store
|
|
||||||
**/.classpath
|
|
||||||
**/.dockerignore
|
|
||||||
**/.env
|
|
||||||
**/.git
|
|
||||||
**/.gitignore
|
|
||||||
**/.project
|
|
||||||
**/.settings
|
|
||||||
**/.toolstarget
|
|
||||||
**/.vs
|
|
||||||
**/.vscode
|
|
||||||
**/*.*proj.user
|
|
||||||
**/*.dbmdl
|
|
||||||
**/*.jfm
|
|
||||||
**/charts
|
|
||||||
**/docker-compose*
|
|
||||||
**/compose*
|
|
||||||
**/Dockerfile*
|
|
||||||
**/node_modules
|
|
||||||
**/npm-debug.log
|
|
||||||
**/secrets.dev.yaml
|
|
||||||
**/values.dev.yaml
|
|
||||||
/bin
|
|
||||||
/target
|
/target
|
||||||
LICENSE
|
/dev-compose.yml
|
||||||
README.md
|
.env
|
||||||
|
.gitignore
|
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"db_name": "PostgreSQL",
|
||||||
|
"query": "INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian)\n VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)\n ON CONFLICT (date,canteen,name) DO NOTHING",
|
||||||
|
"describe": {
|
||||||
|
"columns": [],
|
||||||
|
"parameters": {
|
||||||
|
"Left": [
|
||||||
|
"Date",
|
||||||
|
"Text",
|
||||||
|
"Text",
|
||||||
|
"Text",
|
||||||
|
"Text",
|
||||||
|
"Numeric",
|
||||||
|
"Numeric",
|
||||||
|
"Numeric",
|
||||||
|
"Bool",
|
||||||
|
"Bool"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"nullable": []
|
||||||
|
},
|
||||||
|
"hash": "4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c"
|
||||||
|
}
|
|
@ -0,0 +1,71 @@
|
||||||
|
{
|
||||||
|
"db_name": "PostgreSQL",
|
||||||
|
"query": "SELECT name, array_agg(DISTINCT canteen ORDER BY canteen) AS canteens, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian \n FROM meals WHERE date = $1 AND canteen = ANY($2) \n GROUP BY name, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian\n ORDER BY name",
|
||||||
|
"describe": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"ordinal": 0,
|
||||||
|
"name": "name",
|
||||||
|
"type_info": "Text"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 1,
|
||||||
|
"name": "canteens",
|
||||||
|
"type_info": "TextArray"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 2,
|
||||||
|
"name": "dish_type",
|
||||||
|
"type_info": "Text"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 3,
|
||||||
|
"name": "image_src",
|
||||||
|
"type_info": "Text"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 4,
|
||||||
|
"name": "price_students",
|
||||||
|
"type_info": "Numeric"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 5,
|
||||||
|
"name": "price_employees",
|
||||||
|
"type_info": "Numeric"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 6,
|
||||||
|
"name": "price_guests",
|
||||||
|
"type_info": "Numeric"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 7,
|
||||||
|
"name": "vegan",
|
||||||
|
"type_info": "Bool"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 8,
|
||||||
|
"name": "vegetarian",
|
||||||
|
"type_info": "Bool"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": {
|
||||||
|
"Left": [
|
||||||
|
"Date",
|
||||||
|
"TextArray"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"nullable": [
|
||||||
|
false,
|
||||||
|
null,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hash": "b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028"
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
{
|
||||||
|
"db_name": "PostgreSQL",
|
||||||
|
"query": "SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2",
|
||||||
|
"describe": {
|
||||||
|
"columns": [
|
||||||
|
{
|
||||||
|
"ordinal": 0,
|
||||||
|
"name": "date",
|
||||||
|
"type_info": "Date"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ordinal": 1,
|
||||||
|
"name": "canteen",
|
||||||
|
"type_info": "Text"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"parameters": {
|
||||||
|
"Left": [
|
||||||
|
"Date",
|
||||||
|
"Date"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"nullable": [
|
||||||
|
false,
|
||||||
|
false
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"hash": "b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23"
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
34
Cargo.toml
34
Cargo.toml
|
@ -1,27 +1,25 @@
|
||||||
[package]
|
|
||||||
name = "mensa-upb-api"
|
|
||||||
description = "A web scraper api for the canteens of the University of Paderborn"
|
[workspace]
|
||||||
|
members = [
|
||||||
|
"scraper",
|
||||||
|
"web-api",
|
||||||
|
]
|
||||||
|
resolver = "2"
|
||||||
|
|
||||||
|
[workspace.package]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
authors = ["Moritz Hölting"]
|
authors = ["Moritz Hölting"]
|
||||||
repository = "https://github.com/moritz-hoelting/mensa-upb-api"
|
repository = "https://github.com/moritz-hoelting/mensa-upb-api"
|
||||||
publish = false
|
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
version = "0.2.0"
|
|
||||||
edition = "2021"
|
|
||||||
|
|
||||||
[dependencies]
|
[workspace.dependencies]
|
||||||
actix-cors = "0.7.0"
|
|
||||||
actix-governor = { version = "0.7.0", features = ["log"] }
|
|
||||||
actix-web = "4.9.0"
|
|
||||||
anyhow = "1.0.93"
|
anyhow = "1.0.93"
|
||||||
bigdecimal = { version = "0.4.6", features = ["serde"] }
|
chrono = "0.4.38"
|
||||||
chrono = { version = "0.4.38", features = ["serde"] }
|
|
||||||
dotenvy = "0.15.7"
|
dotenvy = "0.15.7"
|
||||||
itertools = "0.13.0"
|
itertools = "0.13.0"
|
||||||
serde = { version = "1.0.215", features = ["derive"] }
|
sqlx = "0.8.2"
|
||||||
serde_json = "1.0.133"
|
strum = "0.26.3"
|
||||||
strum = { version = "0.26.3", features = ["derive"] }
|
tokio = "1.41.1"
|
||||||
sqlx = { version = "0.8.2", features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
|
|
||||||
tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] }
|
|
||||||
tracing = "0.1.40"
|
tracing = "0.1.40"
|
||||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
tracing-subscriber = "0.3.18"
|
68
Dockerfile
68
Dockerfile
|
@ -1,68 +0,0 @@
|
||||||
# syntax=docker/dockerfile:1
|
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
# Create a stage for building the application.
|
|
||||||
|
|
||||||
ARG RUST_VERSION=1.79.0
|
|
||||||
ARG APP_NAME=mensa-upb-api
|
|
||||||
FROM rust:${RUST_VERSION}-slim-bullseye AS build
|
|
||||||
ARG APP_NAME
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update -y && \
|
|
||||||
apt-get install -y pkg-config make g++ libssl-dev
|
|
||||||
|
|
||||||
# Build the application.
|
|
||||||
# Leverage a cache mount to /usr/local/cargo/registry/
|
|
||||||
# for downloaded dependencies and a cache mount to /app/target/ for
|
|
||||||
# compiled dependencies which will speed up subsequent builds.
|
|
||||||
# Leverage a bind mount to the src directory to avoid having to copy the
|
|
||||||
# source code into the container. Once built, copy the executable to an
|
|
||||||
# output directory before the cache mounted /app/target is unmounted.
|
|
||||||
RUN --mount=type=bind,source=src,target=src \
|
|
||||||
--mount=type=bind,source=Cargo.toml,target=Cargo.toml \
|
|
||||||
--mount=type=bind,source=Cargo.lock,target=Cargo.lock \
|
|
||||||
--mount=type=cache,target=/app/target/ \
|
|
||||||
--mount=type=cache,target=/usr/local/cargo/registry/ \
|
|
||||||
<<EOF
|
|
||||||
set -e
|
|
||||||
cargo build --locked --release
|
|
||||||
cp ./target/release/$APP_NAME /bin/server
|
|
||||||
EOF
|
|
||||||
|
|
||||||
################################################################################
|
|
||||||
# Create a new stage for running the application that contains the minimal
|
|
||||||
# runtime dependencies for the application. This often uses a different base
|
|
||||||
# image from the build stage where the necessary files are copied from the build
|
|
||||||
# stage.
|
|
||||||
FROM debian:bullseye-slim AS final
|
|
||||||
|
|
||||||
# Install ca certificates
|
|
||||||
RUN apt-get update -y && \
|
|
||||||
apt-get install -y ca-certificates
|
|
||||||
|
|
||||||
# Create a non-privileged user that the app will run under.
|
|
||||||
# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
|
|
||||||
ARG UID=10001
|
|
||||||
RUN adduser \
|
|
||||||
--disabled-password \
|
|
||||||
--gecos "" \
|
|
||||||
--home "/nonexistent" \
|
|
||||||
--shell "/sbin/nologin" \
|
|
||||||
--no-create-home \
|
|
||||||
--uid "${UID}" \
|
|
||||||
appuser
|
|
||||||
USER appuser
|
|
||||||
|
|
||||||
# Copy the executable from the "build" stage.
|
|
||||||
COPY --from=build /bin/server /bin/
|
|
||||||
|
|
||||||
# Set the environment variable to listen on all interfaces.
|
|
||||||
ENV API_INTERFACE=0.0.0.0
|
|
||||||
|
|
||||||
# Expose the port that the application listens on.
|
|
||||||
EXPOSE 8080
|
|
||||||
|
|
||||||
# What the container should run when it is started.
|
|
||||||
CMD ["/bin/server"]
|
|
34
compose.yml
34
compose.yml
|
@ -1,9 +1,39 @@
|
||||||
services:
|
services:
|
||||||
server:
|
api:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
target: final
|
dockerfile: ./web-api/Dockerfile
|
||||||
|
image: mensa-upb-api:latest
|
||||||
ports:
|
ports:
|
||||||
- 8080:8080
|
- 8080:8080
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb/postgres
|
||||||
|
- "RUST_LOG=none,mensa_upb_api=info"
|
||||||
|
- TZ=Europe/Berlin
|
||||||
|
depends_on:
|
||||||
|
- postgres
|
||||||
|
|
||||||
|
scraper:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./scraper/Dockerfile
|
||||||
|
image: mensa-upb-scraper:latest
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb/postgres
|
||||||
|
- "RUST_LOG=none,mensa_upb_scraper=info"
|
||||||
|
- TZ=Europe/Berlin
|
||||||
|
depends_on:
|
||||||
|
- postgres
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
container_name: postgres-mensa-upb
|
||||||
|
image: postgres:17-alpine
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=pguser
|
||||||
|
- POSTGRES_PASSWORD=pgpass
|
||||||
|
- POSTGRES_DB=postgres
|
||||||
|
volumes:
|
||||||
|
- db:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
db:
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:17-alpine
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=pguser
|
||||||
|
- POSTGRES_PASSWORD=pgpass
|
||||||
|
- POSTGRES_DB=postgres
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- db:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
db:
|
|
@ -0,0 +1,2 @@
|
||||||
|
.env
|
||||||
|
.gitignore
|
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
.env
|
|
@ -0,0 +1,26 @@
|
||||||
|
[package]
|
||||||
|
name = "mensa-upb-scraper"
|
||||||
|
description = "A web scraper for the canteens of the University of Paderborn"
|
||||||
|
license.workspace = true
|
||||||
|
authors.workspace = true
|
||||||
|
repository.workspace = true
|
||||||
|
readme.workspace = true
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
chrono = { workspace = true }
|
||||||
|
const_format = "0.2.33"
|
||||||
|
dotenvy = { workspace = true }
|
||||||
|
futures = "0.3.31"
|
||||||
|
itertools = { workspace = true }
|
||||||
|
num-bigint = "0.4.6"
|
||||||
|
reqwest = { version = "0.12.9", default-features = false, features = ["charset", "rustls-tls", "http2"] }
|
||||||
|
scraper = "0.21.0"
|
||||||
|
sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
|
||||||
|
strum = { workspace = true, features = ["derive"] }
|
||||||
|
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||||
|
tracing = { workspace = true }
|
||||||
|
tracing-subscriber = { workspace = true, features = ["fmt", "std", "env-filter", "registry", "json", "tracing-log"] }
|
|
@ -0,0 +1,28 @@
|
||||||
|
FROM rust:latest AS chef
|
||||||
|
RUN cargo install cargo-chef
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
FROM chef AS planner
|
||||||
|
COPY . .
|
||||||
|
RUN OFFLINE=true cargo chef prepare --bin mensa-upb-scraper --recipe-path recipe.json
|
||||||
|
|
||||||
|
FROM chef AS builder
|
||||||
|
COPY --from=planner /app/recipe.json recipe.json
|
||||||
|
RUN cargo chef cook --bin mensa-upb-scraper --release --recipe-path recipe.json
|
||||||
|
COPY . .
|
||||||
|
RUN OFFLINE=true cargo build --bin mensa-upb-scraper --release
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim AS runtime
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update -y && \
|
||||||
|
apt-get install -y ca-certificates cron
|
||||||
|
|
||||||
|
RUN echo "0 0 * * * /app/mensa-upb-scraper >> /var/log/cron.log 2>&1" > /etc/cron.d/mensa_upb_scraper
|
||||||
|
RUN chmod 0644 /etc/cron.d/mensa_upb_scraper
|
||||||
|
RUN crontab /etc/cron.d/mensa_upb_scraper
|
||||||
|
RUN touch /var/log/cron.log
|
||||||
|
|
||||||
|
COPY --from=builder /app/target/release/mensa-upb-scraper /app/mensa-upb-scraper
|
||||||
|
|
||||||
|
CMD env > /etc/environment && cron && tail -f /var/log/cron.log
|
|
@ -0,0 +1,23 @@
|
||||||
|
services:
|
||||||
|
scraper:
|
||||||
|
build: .
|
||||||
|
image: mensa-upb-scraper:latest
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-scraper/postgres
|
||||||
|
- "RUST_LOG=none,mensa_upb_scraper=info"
|
||||||
|
- TZ=Europe/Berlin
|
||||||
|
depends_on:
|
||||||
|
- postgres
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
container_name: postgres-mensa-upb-scraper
|
||||||
|
image: postgres:17-alpine
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=pguser
|
||||||
|
- POSTGRES_PASSWORD=pgpass
|
||||||
|
- POSTGRES_DB=postgres
|
||||||
|
volumes:
|
||||||
|
- db:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
db:
|
|
@ -0,0 +1,64 @@
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use const_format::concatcp;
|
||||||
|
use strum::EnumIter;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash)]
|
||||||
|
pub enum Canteen {
|
||||||
|
Forum,
|
||||||
|
Academica,
|
||||||
|
Picknick,
|
||||||
|
BonaVista,
|
||||||
|
GrillCafe,
|
||||||
|
ZM2,
|
||||||
|
Basilica,
|
||||||
|
Atrium,
|
||||||
|
}
|
||||||
|
|
||||||
|
const POST_URL_BASE: &str = "https://www.studierendenwerk-pb.de/gastronomie/speiseplaene/";
|
||||||
|
|
||||||
|
impl Canteen {
|
||||||
|
pub fn get_url(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
Self::Forum => concatcp!(POST_URL_BASE, "forum/"),
|
||||||
|
Self::Academica => concatcp!(POST_URL_BASE, "mensa-academica/"),
|
||||||
|
Self::Picknick => concatcp!(POST_URL_BASE, "picknick/"),
|
||||||
|
Self::BonaVista => concatcp!(POST_URL_BASE, "bona-vista/"),
|
||||||
|
Self::GrillCafe => concatcp!(POST_URL_BASE, "grillcafe/"),
|
||||||
|
Self::ZM2 => concatcp!(POST_URL_BASE, "mensa-zm2/"),
|
||||||
|
Self::Basilica => concatcp!(POST_URL_BASE, "mensa-basilica-hamm/"),
|
||||||
|
Self::Atrium => concatcp!(POST_URL_BASE, "mensa-atrium-lippstadt/"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_identifier(&self) -> &str {
|
||||||
|
match self {
|
||||||
|
Self::Forum => "forum",
|
||||||
|
Self::Academica => "academica",
|
||||||
|
Self::Picknick => "picknick",
|
||||||
|
Self::BonaVista => "bona-vista",
|
||||||
|
Self::GrillCafe => "grillcafe",
|
||||||
|
Self::ZM2 => "zm2",
|
||||||
|
Self::Basilica => "basilica",
|
||||||
|
Self::Atrium => "atrium",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for Canteen {
|
||||||
|
type Err = String;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"forum" => Ok(Self::Forum),
|
||||||
|
"academica" => Ok(Self::Academica),
|
||||||
|
"picknick" => Ok(Self::Picknick),
|
||||||
|
"bona-vista" => Ok(Self::BonaVista),
|
||||||
|
"grillcafe" => Ok(Self::GrillCafe),
|
||||||
|
"zm2" => Ok(Self::ZM2),
|
||||||
|
"basilica" => Ok(Self::Basilica),
|
||||||
|
"atrium" => Ok(Self::Atrium),
|
||||||
|
invalid => Err(format!("Invalid canteen identifier: {}", invalid)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,145 @@
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
|
use scraper::ElementRef;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub struct Dish {
|
||||||
|
name: String,
|
||||||
|
image_src: Option<String>,
|
||||||
|
price_students: Option<String>,
|
||||||
|
price_employees: Option<String>,
|
||||||
|
price_guests: Option<String>,
|
||||||
|
extras: Vec<String>,
|
||||||
|
dish_type: DishType,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dish {
|
||||||
|
pub fn get_name(&self) -> &str {
|
||||||
|
&self.name
|
||||||
|
}
|
||||||
|
pub fn get_price_students(&self) -> Option<&str> {
|
||||||
|
self.price_students.as_deref()
|
||||||
|
}
|
||||||
|
pub fn get_price_employees(&self) -> Option<&str> {
|
||||||
|
self.price_employees.as_deref()
|
||||||
|
}
|
||||||
|
pub fn get_price_guests(&self) -> Option<&str> {
|
||||||
|
self.price_guests.as_deref()
|
||||||
|
}
|
||||||
|
pub fn get_image_src(&self) -> Option<&str> {
|
||||||
|
self.image_src.as_deref()
|
||||||
|
}
|
||||||
|
pub fn is_vegan(&self) -> bool {
|
||||||
|
self.extras.contains(&"vegan".to_string())
|
||||||
|
}
|
||||||
|
pub fn is_vegetarian(&self) -> bool {
|
||||||
|
self.extras.contains(&"vegetarian".to_string())
|
||||||
|
}
|
||||||
|
pub fn get_extras(&self) -> &[String] {
|
||||||
|
&self.extras
|
||||||
|
}
|
||||||
|
pub fn get_type(&self) -> DishType {
|
||||||
|
self.dish_type
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn same_as(&self, other: &Self) -> bool {
|
||||||
|
self.name == other.name
|
||||||
|
&& self.price_employees == other.price_employees
|
||||||
|
&& self.price_guests == other.price_guests
|
||||||
|
&& self.price_students == other.price_students
|
||||||
|
&& self.extras.iter().sorted().collect_vec()
|
||||||
|
== self.extras.iter().sorted().collect_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_element(element: ElementRef, dish_type: DishType) -> Option<Self> {
|
||||||
|
let html_name_selector = scraper::Selector::parse(".desc h4").ok()?;
|
||||||
|
let name = element
|
||||||
|
.select(&html_name_selector)
|
||||||
|
.next()?
|
||||||
|
.text()
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("")
|
||||||
|
.trim()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
let img_selector = scraper::Selector::parse(".img img").ok()?;
|
||||||
|
let img_src = element.select(&img_selector).next().and_then(|el| {
|
||||||
|
el.value()
|
||||||
|
.attr("src")
|
||||||
|
.map(|img_src_path| format!("https://www.studierendenwerk-pb.de/{}", img_src_path))
|
||||||
|
});
|
||||||
|
|
||||||
|
let html_price_selector = scraper::Selector::parse(".desc .price").ok()?;
|
||||||
|
let mut prices = element
|
||||||
|
.select(&html_price_selector)
|
||||||
|
.filter_map(|price| {
|
||||||
|
let price_for = price.first_child().and_then(|strong| {
|
||||||
|
strong.first_child().and_then(|text_element| {
|
||||||
|
text_element
|
||||||
|
.value()
|
||||||
|
.as_text()
|
||||||
|
.map(|text| text.trim().trim_end_matches(':').to_string())
|
||||||
|
})
|
||||||
|
});
|
||||||
|
let price_value = price.last_child().and_then(|text_element| {
|
||||||
|
text_element
|
||||||
|
.value()
|
||||||
|
.as_text()
|
||||||
|
.map(|text| text.trim().to_string())
|
||||||
|
});
|
||||||
|
price_for
|
||||||
|
.and_then(|price_for| price_value.map(|price_value| (price_for, price_value)))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").ok()?;
|
||||||
|
let extras = element
|
||||||
|
.select(&html_extras_selector)
|
||||||
|
.filter_map(|extra| extra.value().attr("title").map(|title| title.to_string()))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
name,
|
||||||
|
image_src: img_src,
|
||||||
|
price_students: prices
|
||||||
|
.iter_mut()
|
||||||
|
.find(|(price_for, _)| price_for == "Studierende")
|
||||||
|
.map(|(_, price)| std::mem::take(price)),
|
||||||
|
price_employees: prices
|
||||||
|
.iter_mut()
|
||||||
|
.find(|(price_for, _)| price_for == "Bedienstete")
|
||||||
|
.map(|(_, price)| std::mem::take(price)),
|
||||||
|
price_guests: prices
|
||||||
|
.iter_mut()
|
||||||
|
.find(|(price_for, _)| price_for == "Gäste")
|
||||||
|
.map(|(_, price)| std::mem::take(price)),
|
||||||
|
extras,
|
||||||
|
dish_type,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Dish {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||||
|
self.name.partial_cmp(&other.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum DishType {
|
||||||
|
Main,
|
||||||
|
Side,
|
||||||
|
Dessert,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for DishType {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let s = match self {
|
||||||
|
Self::Main => "main",
|
||||||
|
Self::Side => "side",
|
||||||
|
Self::Dessert => "dessert",
|
||||||
|
};
|
||||||
|
f.write_str(s)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
mod canteen;
|
||||||
|
mod dish;
|
||||||
|
mod menu;
|
||||||
|
pub mod util;
|
||||||
|
|
||||||
|
use std::{error::Error, fmt::Display};
|
||||||
|
|
||||||
|
pub use canteen::Canteen;
|
||||||
|
pub use dish::Dish;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct CustomError(String);
|
||||||
|
|
||||||
|
impl Display for CustomError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for CustomError {}
|
||||||
|
|
||||||
|
impl From<&str> for CustomError {
|
||||||
|
fn from(s: &str) -> Self {
|
||||||
|
CustomError(s.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for CustomError {
|
||||||
|
fn from(s: String) -> Self {
|
||||||
|
CustomError(s)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,65 @@
|
||||||
|
use std::{collections::HashSet, env};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use chrono::{Duration, Utc};
|
||||||
|
use itertools::Itertools as _;
|
||||||
|
use mensa_upb_scraper::{util, Canteen};
|
||||||
|
use strum::IntoEnumIterator;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
dotenvy::dotenv().ok();
|
||||||
|
|
||||||
|
let db = util::get_db()?;
|
||||||
|
|
||||||
|
tracing_subscriber::fmt::init();
|
||||||
|
|
||||||
|
sqlx::migrate!("../migrations").run(&db).await?;
|
||||||
|
|
||||||
|
tracing::info!("Starting up...");
|
||||||
|
|
||||||
|
let start_date = Utc::now().date_naive();
|
||||||
|
let end_date = (Utc::now() + Duration::days(6)).date_naive();
|
||||||
|
|
||||||
|
let already_scraped = sqlx::query!(
|
||||||
|
"SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2",
|
||||||
|
start_date,
|
||||||
|
end_date
|
||||||
|
)
|
||||||
|
.fetch_all(&db)
|
||||||
|
.await?
|
||||||
|
.into_iter()
|
||||||
|
.map(|r| {
|
||||||
|
(
|
||||||
|
r.date,
|
||||||
|
r.canteen.parse::<Canteen>().expect("Invalid db entry"),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect::<HashSet<_>>();
|
||||||
|
|
||||||
|
let filter_canteens = env::var("FILTER_CANTEENS")
|
||||||
|
.ok()
|
||||||
|
.map(|s| {
|
||||||
|
s.split(',')
|
||||||
|
.filter_map(|el| el.parse::<Canteen>().ok())
|
||||||
|
.collect::<HashSet<_>>()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let date_canteen_combinations = (0..7)
|
||||||
|
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
|
||||||
|
.cartesian_product(Canteen::iter())
|
||||||
|
.filter(|entry| !filter_canteens.contains(&entry.1) && !already_scraped.contains(entry))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
util::async_for_each(&date_canteen_combinations, |(date, canteen, menu)| {
|
||||||
|
let db = db.clone();
|
||||||
|
async move {
|
||||||
|
util::add_menu_to_db(&db, &date, canteen, menu).await;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
tracing::info!("Finished scraping menu");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -0,0 +1,56 @@
|
||||||
|
use anyhow::Result;
|
||||||
|
use chrono::NaiveDate;
|
||||||
|
|
||||||
|
use crate::{dish::DishType, Canteen, CustomError, Dish};
|
||||||
|
|
||||||
|
#[tracing::instrument]
|
||||||
|
pub async fn scrape_menu(date: &NaiveDate, canteen: Canteen) -> Result<Vec<Dish>> {
|
||||||
|
tracing::debug!("Starting scraping");
|
||||||
|
|
||||||
|
let url = canteen.get_url();
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let request_builder = client.post(url).query(&[(
|
||||||
|
"tx_pamensa_mensa[date]",
|
||||||
|
date.format("%Y-%m-%d").to_string(),
|
||||||
|
)]);
|
||||||
|
let response = request_builder.send().await?;
|
||||||
|
let html_content = response.text().await?;
|
||||||
|
|
||||||
|
let document = scraper::Html::parse_document(&html_content);
|
||||||
|
|
||||||
|
let html_main_dishes_selector = scraper::Selector::parse(
|
||||||
|
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
|
||||||
|
)
|
||||||
|
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||||
|
let html_main_dishes = document.select(&html_main_dishes_selector);
|
||||||
|
let main_dishes = html_main_dishes
|
||||||
|
.filter_map(|dish| Dish::from_element(dish, DishType::Main))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let html_side_dishes_selector = scraper::Selector::parse(
|
||||||
|
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
|
||||||
|
)
|
||||||
|
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||||
|
let html_side_dishes = document.select(&html_side_dishes_selector);
|
||||||
|
let side_dishes = html_side_dishes
|
||||||
|
.filter_map(|dish| Dish::from_element(dish, DishType::Side))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let html_desserts_selector = scraper::Selector::parse(
|
||||||
|
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
|
||||||
|
)
|
||||||
|
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||||
|
let html_desserts = document.select(&html_desserts_selector);
|
||||||
|
let desserts = html_desserts
|
||||||
|
.filter_map(|dish| Dish::from_element(dish, DishType::Dessert))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let mut res = Vec::new();
|
||||||
|
res.extend(main_dishes);
|
||||||
|
res.extend(side_dishes);
|
||||||
|
res.extend(desserts);
|
||||||
|
|
||||||
|
tracing::debug!("Finished scraping");
|
||||||
|
|
||||||
|
Ok(res)
|
||||||
|
}
|
|
@ -0,0 +1,64 @@
|
||||||
|
use std::{env, future::Future};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use chrono::NaiveDate;
|
||||||
|
use futures::StreamExt as _;
|
||||||
|
use num_bigint::BigInt;
|
||||||
|
use sqlx::{postgres::PgPoolOptions, types::BigDecimal, PgPool};
|
||||||
|
|
||||||
|
use crate::{menu::scrape_menu, Canteen, Dish};
|
||||||
|
|
||||||
|
pub async fn async_for_each<F, Fut>(date_canteen_combinations: &[(NaiveDate, Canteen)], f: F)
|
||||||
|
where
|
||||||
|
F: FnMut((NaiveDate, Canteen, Vec<Dish>)) -> Fut,
|
||||||
|
Fut: Future<Output = ()>,
|
||||||
|
{
|
||||||
|
futures::stream::iter(date_canteen_combinations)
|
||||||
|
.then(|(date, canteen)| async move { (*date, *canteen, scrape_menu(date, *canteen).await) })
|
||||||
|
.filter_map(|(date, canteen, menu)| async move { menu.ok().map(|menu| (date, canteen, menu)) })
|
||||||
|
.for_each(f)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_db() -> Result<PgPool> {
|
||||||
|
Ok(PgPoolOptions::new()
|
||||||
|
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(skip(db))]
|
||||||
|
pub async fn add_meal_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, dish: &Dish) -> Result<()> {
|
||||||
|
let vegan = dish.is_vegan();
|
||||||
|
|
||||||
|
sqlx::query!(
|
||||||
|
"INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian)
|
||||||
|
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
||||||
|
ON CONFLICT (date,canteen,name) DO NOTHING",
|
||||||
|
date, canteen.get_identifier(), dish.get_name(),
|
||||||
|
dish.get_type().to_string(), dish.get_image_src(),
|
||||||
|
price_to_bigdecimal(dish.get_price_students()),
|
||||||
|
price_to_bigdecimal(dish.get_price_employees()),
|
||||||
|
price_to_bigdecimal(dish.get_price_guests()),
|
||||||
|
vegan, vegan || dish.is_vegetarian()
|
||||||
|
).execute(db).await.inspect_err(|e| {
|
||||||
|
tracing::error!("error during database insert: {}", e);
|
||||||
|
})?;
|
||||||
|
|
||||||
|
tracing::trace!("Insert to DB successfull");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn add_menu_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, menu: Vec<Dish>) {
|
||||||
|
futures::stream::iter(menu)
|
||||||
|
.for_each(|dish| async move {
|
||||||
|
if !dish.get_name().is_empty() {
|
||||||
|
add_meal_to_db(db, date, canteen, &dish).await.ok();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn price_to_bigdecimal(s: Option<&str>) -> BigDecimal {
|
||||||
|
s.and_then(|p| p.trim_end_matches(" €").replace(',', ".").parse().ok())
|
||||||
|
.unwrap_or_else(|| BigDecimal::new(BigInt::from(99999), 2))
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
# Include any files or directories that you don't want to be copied to your
|
||||||
|
# container here (e.g., local build artifacts, temporary files, etc.).
|
||||||
|
#
|
||||||
|
# For more help, visit the .dockerignore file reference guide at
|
||||||
|
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||||
|
|
||||||
|
**/.DS_Store
|
||||||
|
**/.classpath
|
||||||
|
**/.dockerignore
|
||||||
|
**/.env
|
||||||
|
**/.git
|
||||||
|
**/.gitignore
|
||||||
|
**/.project
|
||||||
|
**/.settings
|
||||||
|
**/.toolstarget
|
||||||
|
**/.vs
|
||||||
|
**/.vscode
|
||||||
|
**/*.*proj.user
|
||||||
|
**/*.dbmdl
|
||||||
|
**/*.jfm
|
||||||
|
**/charts
|
||||||
|
**/docker-compose*
|
||||||
|
**/compose*
|
||||||
|
**/Dockerfile*
|
||||||
|
**/node_modules
|
||||||
|
**/npm-debug.log
|
||||||
|
**/secrets.dev.yaml
|
||||||
|
**/values.dev.yaml
|
||||||
|
/bin
|
||||||
|
/target
|
||||||
|
LICENSE
|
||||||
|
README.md
|
|
@ -0,0 +1,27 @@
|
||||||
|
[package]
|
||||||
|
name = "mensa-upb-api"
|
||||||
|
description = "A web api for a local database of the canteens of the University of Paderborn"
|
||||||
|
license.workspace = true
|
||||||
|
authors.workspace = true
|
||||||
|
repository.workspace = true
|
||||||
|
readme.workspace = true
|
||||||
|
version = "0.2.0"
|
||||||
|
edition = "2021"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
actix-cors = "0.7.0"
|
||||||
|
actix-governor = { version = "0.7.0", features = ["log"] }
|
||||||
|
actix-web = "4.9.0"
|
||||||
|
anyhow = { workspace = true }
|
||||||
|
bigdecimal = { version = "0.4.6", features = ["serde"] }
|
||||||
|
chrono = { workspace = true, features = ["serde"] }
|
||||||
|
dotenvy = { workspace = true }
|
||||||
|
itertools = { workspace = true }
|
||||||
|
serde = { version = "1.0.215", features = ["derive"] }
|
||||||
|
serde_json = "1.0.133"
|
||||||
|
sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
|
||||||
|
strum = { workspace = true, features = ["derive"] }
|
||||||
|
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||||
|
tracing = "0.1.40"
|
||||||
|
tracing-subscriber = { workspace = true, features = ["env-filter"] }
|
|
@ -0,0 +1,36 @@
|
||||||
|
|
||||||
|
FROM rust:latest AS chef
|
||||||
|
RUN cargo install cargo-chef
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
FROM chef AS planner
|
||||||
|
COPY . .
|
||||||
|
RUN OFFLINE=true cargo chef prepare --bin mensa-upb-api --recipe-path recipe.json
|
||||||
|
|
||||||
|
FROM chef AS builder
|
||||||
|
COPY --from=planner /app/recipe.json recipe.json
|
||||||
|
RUN cargo chef cook --bin mensa-upb-api --release --recipe-path recipe.json
|
||||||
|
COPY . .
|
||||||
|
RUN OFFLINE=true cargo build --bin mensa-upb-api --release
|
||||||
|
|
||||||
|
FROM debian:bookworm-slim AS runtime
|
||||||
|
|
||||||
|
ARG UID=10001
|
||||||
|
RUN adduser \
|
||||||
|
--disabled-password \
|
||||||
|
--gecos "" \
|
||||||
|
--home "/nonexistent" \
|
||||||
|
--shell "/sbin/nologin" \
|
||||||
|
--no-create-home \
|
||||||
|
--uid "${UID}" \
|
||||||
|
appuser
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
COPY --from=builder /app/target/release/mensa-upb-api /bin/mensa-upb-api
|
||||||
|
|
||||||
|
ENV API_INTERFACE=0.0.0.0
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
# What the container should run when it is started.
|
||||||
|
CMD ["/bin/mensa-upb-api"]
|
|
@ -0,0 +1,27 @@
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
build: .
|
||||||
|
image: mensa-upb-api:latest
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-api/postgres
|
||||||
|
- "RUST_LOG=none,mensa_upb_api=info"
|
||||||
|
- TZ=Europe/Berlin
|
||||||
|
depends_on:
|
||||||
|
- postgres
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
container_name: postgres-mensa-upb-api
|
||||||
|
image: postgres:17-alpine
|
||||||
|
environment:
|
||||||
|
- POSTGRES_USER=pguser
|
||||||
|
- POSTGRES_PASSWORD=pgpass
|
||||||
|
- POSTGRES_DB=postgres
|
||||||
|
volumes:
|
||||||
|
- db:/var/lib/postgresql/data
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
db:
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,8 @@ async fn main() -> Result<()> {
|
||||||
let db = PgPoolOptions::new()
|
let db = PgPoolOptions::new()
|
||||||
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?;
|
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?;
|
||||||
|
|
||||||
|
sqlx::migrate!("../migrations").run(&db).await?;
|
||||||
|
|
||||||
let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string());
|
let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string());
|
||||||
let port = env::var("API_PORT")
|
let port = env::var("API_PORT")
|
||||||
.ok()
|
.ok()
|
Loading…
Reference in New Issue