combine repositories of web api and scraper
This commit is contained in:
parent
bc88064c82
commit
94b1ffead7
|
@ -1,32 +1,4 @@
|
|||
# Include any files or directories that you don't want to be copied to your
|
||||
# container here (e.g., local build artifacts, temporary files, etc.).
|
||||
#
|
||||
# For more help, visit the .dockerignore file reference guide at
|
||||
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||
|
||||
**/.DS_Store
|
||||
**/.classpath
|
||||
**/.dockerignore
|
||||
**/.env
|
||||
**/.git
|
||||
**/.gitignore
|
||||
**/.project
|
||||
**/.settings
|
||||
**/.toolstarget
|
||||
**/.vs
|
||||
**/.vscode
|
||||
**/*.*proj.user
|
||||
**/*.dbmdl
|
||||
**/*.jfm
|
||||
**/charts
|
||||
**/docker-compose*
|
||||
**/compose*
|
||||
**/Dockerfile*
|
||||
**/node_modules
|
||||
**/npm-debug.log
|
||||
**/secrets.dev.yaml
|
||||
**/values.dev.yaml
|
||||
/bin
|
||||
/target
|
||||
LICENSE
|
||||
README.md
|
||||
/dev-compose.yml
|
||||
.env
|
||||
.gitignore
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian)\n VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)\n ON CONFLICT (date,canteen,name) DO NOTHING",
|
||||
"describe": {
|
||||
"columns": [],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Date",
|
||||
"Text",
|
||||
"Text",
|
||||
"Text",
|
||||
"Text",
|
||||
"Numeric",
|
||||
"Numeric",
|
||||
"Numeric",
|
||||
"Bool",
|
||||
"Bool"
|
||||
]
|
||||
},
|
||||
"nullable": []
|
||||
},
|
||||
"hash": "4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c"
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT name, array_agg(DISTINCT canteen ORDER BY canteen) AS canteens, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian \n FROM meals WHERE date = $1 AND canteen = ANY($2) \n GROUP BY name, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian\n ORDER BY name",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "name",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "canteens",
|
||||
"type_info": "TextArray"
|
||||
},
|
||||
{
|
||||
"ordinal": 2,
|
||||
"name": "dish_type",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 3,
|
||||
"name": "image_src",
|
||||
"type_info": "Text"
|
||||
},
|
||||
{
|
||||
"ordinal": 4,
|
||||
"name": "price_students",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 5,
|
||||
"name": "price_employees",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 6,
|
||||
"name": "price_guests",
|
||||
"type_info": "Numeric"
|
||||
},
|
||||
{
|
||||
"ordinal": 7,
|
||||
"name": "vegan",
|
||||
"type_info": "Bool"
|
||||
},
|
||||
{
|
||||
"ordinal": 8,
|
||||
"name": "vegetarian",
|
||||
"type_info": "Bool"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Date",
|
||||
"TextArray"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
false,
|
||||
null,
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028"
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
{
|
||||
"db_name": "PostgreSQL",
|
||||
"query": "SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2",
|
||||
"describe": {
|
||||
"columns": [
|
||||
{
|
||||
"ordinal": 0,
|
||||
"name": "date",
|
||||
"type_info": "Date"
|
||||
},
|
||||
{
|
||||
"ordinal": 1,
|
||||
"name": "canteen",
|
||||
"type_info": "Text"
|
||||
}
|
||||
],
|
||||
"parameters": {
|
||||
"Left": [
|
||||
"Date",
|
||||
"Date"
|
||||
]
|
||||
},
|
||||
"nullable": [
|
||||
false,
|
||||
false
|
||||
]
|
||||
},
|
||||
"hash": "b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23"
|
||||
}
|
File diff suppressed because it is too large
Load Diff
34
Cargo.toml
34
Cargo.toml
|
@ -1,27 +1,25 @@
|
|||
[package]
|
||||
name = "mensa-upb-api"
|
||||
description = "A web scraper api for the canteens of the University of Paderborn"
|
||||
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"scraper",
|
||||
"web-api",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
license = "MIT"
|
||||
authors = ["Moritz Hölting"]
|
||||
repository = "https://github.com/moritz-hoelting/mensa-upb-api"
|
||||
publish = false
|
||||
readme = "README.md"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-governor = { version = "0.7.0", features = ["log"] }
|
||||
actix-web = "4.9.0"
|
||||
[workspace.dependencies]
|
||||
anyhow = "1.0.93"
|
||||
bigdecimal = { version = "0.4.6", features = ["serde"] }
|
||||
chrono = { version = "0.4.38", features = ["serde"] }
|
||||
chrono = "0.4.38"
|
||||
dotenvy = "0.15.7"
|
||||
itertools = "0.13.0"
|
||||
serde = { version = "1.0.215", features = ["derive"] }
|
||||
serde_json = "1.0.133"
|
||||
strum = { version = "0.26.3", features = ["derive"] }
|
||||
sqlx = { version = "0.8.2", features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
|
||||
tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] }
|
||||
sqlx = "0.8.2"
|
||||
strum = "0.26.3"
|
||||
tokio = "1.41.1"
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
tracing-subscriber = "0.3.18"
|
68
Dockerfile
68
Dockerfile
|
@ -1,68 +0,0 @@
|
|||
# syntax=docker/dockerfile:1
|
||||
|
||||
|
||||
################################################################################
|
||||
# Create a stage for building the application.
|
||||
|
||||
ARG RUST_VERSION=1.79.0
|
||||
ARG APP_NAME=mensa-upb-api
|
||||
FROM rust:${RUST_VERSION}-slim-bullseye AS build
|
||||
ARG APP_NAME
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -y pkg-config make g++ libssl-dev
|
||||
|
||||
# Build the application.
|
||||
# Leverage a cache mount to /usr/local/cargo/registry/
|
||||
# for downloaded dependencies and a cache mount to /app/target/ for
|
||||
# compiled dependencies which will speed up subsequent builds.
|
||||
# Leverage a bind mount to the src directory to avoid having to copy the
|
||||
# source code into the container. Once built, copy the executable to an
|
||||
# output directory before the cache mounted /app/target is unmounted.
|
||||
RUN --mount=type=bind,source=src,target=src \
|
||||
--mount=type=bind,source=Cargo.toml,target=Cargo.toml \
|
||||
--mount=type=bind,source=Cargo.lock,target=Cargo.lock \
|
||||
--mount=type=cache,target=/app/target/ \
|
||||
--mount=type=cache,target=/usr/local/cargo/registry/ \
|
||||
<<EOF
|
||||
set -e
|
||||
cargo build --locked --release
|
||||
cp ./target/release/$APP_NAME /bin/server
|
||||
EOF
|
||||
|
||||
################################################################################
|
||||
# Create a new stage for running the application that contains the minimal
|
||||
# runtime dependencies for the application. This often uses a different base
|
||||
# image from the build stage where the necessary files are copied from the build
|
||||
# stage.
|
||||
FROM debian:bullseye-slim AS final
|
||||
|
||||
# Install ca certificates
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -y ca-certificates
|
||||
|
||||
# Create a non-privileged user that the app will run under.
|
||||
# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#user
|
||||
ARG UID=10001
|
||||
RUN adduser \
|
||||
--disabled-password \
|
||||
--gecos "" \
|
||||
--home "/nonexistent" \
|
||||
--shell "/sbin/nologin" \
|
||||
--no-create-home \
|
||||
--uid "${UID}" \
|
||||
appuser
|
||||
USER appuser
|
||||
|
||||
# Copy the executable from the "build" stage.
|
||||
COPY --from=build /bin/server /bin/
|
||||
|
||||
# Set the environment variable to listen on all interfaces.
|
||||
ENV API_INTERFACE=0.0.0.0
|
||||
|
||||
# Expose the port that the application listens on.
|
||||
EXPOSE 8080
|
||||
|
||||
# What the container should run when it is started.
|
||||
CMD ["/bin/server"]
|
34
compose.yml
34
compose.yml
|
@ -1,9 +1,39 @@
|
|||
services:
|
||||
server:
|
||||
api:
|
||||
build:
|
||||
context: .
|
||||
target: final
|
||||
dockerfile: ./web-api/Dockerfile
|
||||
image: mensa-upb-api:latest
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb/postgres
|
||||
- "RUST_LOG=none,mensa_upb_api=info"
|
||||
- TZ=Europe/Berlin
|
||||
depends_on:
|
||||
- postgres
|
||||
|
||||
scraper:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./scraper/Dockerfile
|
||||
image: mensa-upb-scraper:latest
|
||||
environment:
|
||||
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb/postgres
|
||||
- "RUST_LOG=none,mensa_upb_scraper=info"
|
||||
- TZ=Europe/Berlin
|
||||
depends_on:
|
||||
- postgres
|
||||
|
||||
postgres:
|
||||
container_name: postgres-mensa-upb
|
||||
image: postgres:17-alpine
|
||||
environment:
|
||||
- POSTGRES_USER=pguser
|
||||
- POSTGRES_PASSWORD=pgpass
|
||||
- POSTGRES_DB=postgres
|
||||
volumes:
|
||||
- db:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
db:
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
services:
|
||||
postgres:
|
||||
image: postgres:17-alpine
|
||||
environment:
|
||||
- POSTGRES_USER=pguser
|
||||
- POSTGRES_PASSWORD=pgpass
|
||||
- POSTGRES_DB=postgres
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- db:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
db:
|
|
@ -0,0 +1,2 @@
|
|||
.env
|
||||
.gitignore
|
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
.env
|
|
@ -0,0 +1,26 @@
|
|||
[package]
|
||||
name = "mensa-upb-scraper"
|
||||
description = "A web scraper for the canteens of the University of Paderborn"
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
repository.workspace = true
|
||||
readme.workspace = true
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
const_format = "0.2.33"
|
||||
dotenvy = { workspace = true }
|
||||
futures = "0.3.31"
|
||||
itertools = { workspace = true }
|
||||
num-bigint = "0.4.6"
|
||||
reqwest = { version = "0.12.9", default-features = false, features = ["charset", "rustls-tls", "http2"] }
|
||||
scraper = "0.21.0"
|
||||
sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
|
||||
strum = { workspace = true, features = ["derive"] }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true, features = ["fmt", "std", "env-filter", "registry", "json", "tracing-log"] }
|
|
@ -0,0 +1,28 @@
|
|||
FROM rust:latest AS chef
|
||||
RUN cargo install cargo-chef
|
||||
WORKDIR /app
|
||||
|
||||
FROM chef AS planner
|
||||
COPY . .
|
||||
RUN OFFLINE=true cargo chef prepare --bin mensa-upb-scraper --recipe-path recipe.json
|
||||
|
||||
FROM chef AS builder
|
||||
COPY --from=planner /app/recipe.json recipe.json
|
||||
RUN cargo chef cook --bin mensa-upb-scraper --release --recipe-path recipe.json
|
||||
COPY . .
|
||||
RUN OFFLINE=true cargo build --bin mensa-upb-scraper --release
|
||||
|
||||
FROM debian:bookworm-slim AS runtime
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update -y && \
|
||||
apt-get install -y ca-certificates cron
|
||||
|
||||
RUN echo "0 0 * * * /app/mensa-upb-scraper >> /var/log/cron.log 2>&1" > /etc/cron.d/mensa_upb_scraper
|
||||
RUN chmod 0644 /etc/cron.d/mensa_upb_scraper
|
||||
RUN crontab /etc/cron.d/mensa_upb_scraper
|
||||
RUN touch /var/log/cron.log
|
||||
|
||||
COPY --from=builder /app/target/release/mensa-upb-scraper /app/mensa-upb-scraper
|
||||
|
||||
CMD env > /etc/environment && cron && tail -f /var/log/cron.log
|
|
@ -0,0 +1,23 @@
|
|||
services:
|
||||
scraper:
|
||||
build: .
|
||||
image: mensa-upb-scraper:latest
|
||||
environment:
|
||||
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-scraper/postgres
|
||||
- "RUST_LOG=none,mensa_upb_scraper=info"
|
||||
- TZ=Europe/Berlin
|
||||
depends_on:
|
||||
- postgres
|
||||
|
||||
postgres:
|
||||
container_name: postgres-mensa-upb-scraper
|
||||
image: postgres:17-alpine
|
||||
environment:
|
||||
- POSTGRES_USER=pguser
|
||||
- POSTGRES_PASSWORD=pgpass
|
||||
- POSTGRES_DB=postgres
|
||||
volumes:
|
||||
- db:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
db:
|
|
@ -0,0 +1,64 @@
|
|||
use std::str::FromStr;
|
||||
|
||||
use const_format::concatcp;
|
||||
use strum::EnumIter;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash)]
|
||||
pub enum Canteen {
|
||||
Forum,
|
||||
Academica,
|
||||
Picknick,
|
||||
BonaVista,
|
||||
GrillCafe,
|
||||
ZM2,
|
||||
Basilica,
|
||||
Atrium,
|
||||
}
|
||||
|
||||
const POST_URL_BASE: &str = "https://www.studierendenwerk-pb.de/gastronomie/speiseplaene/";
|
||||
|
||||
impl Canteen {
|
||||
pub fn get_url(&self) -> &str {
|
||||
match self {
|
||||
Self::Forum => concatcp!(POST_URL_BASE, "forum/"),
|
||||
Self::Academica => concatcp!(POST_URL_BASE, "mensa-academica/"),
|
||||
Self::Picknick => concatcp!(POST_URL_BASE, "picknick/"),
|
||||
Self::BonaVista => concatcp!(POST_URL_BASE, "bona-vista/"),
|
||||
Self::GrillCafe => concatcp!(POST_URL_BASE, "grillcafe/"),
|
||||
Self::ZM2 => concatcp!(POST_URL_BASE, "mensa-zm2/"),
|
||||
Self::Basilica => concatcp!(POST_URL_BASE, "mensa-basilica-hamm/"),
|
||||
Self::Atrium => concatcp!(POST_URL_BASE, "mensa-atrium-lippstadt/"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_identifier(&self) -> &str {
|
||||
match self {
|
||||
Self::Forum => "forum",
|
||||
Self::Academica => "academica",
|
||||
Self::Picknick => "picknick",
|
||||
Self::BonaVista => "bona-vista",
|
||||
Self::GrillCafe => "grillcafe",
|
||||
Self::ZM2 => "zm2",
|
||||
Self::Basilica => "basilica",
|
||||
Self::Atrium => "atrium",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Canteen {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"forum" => Ok(Self::Forum),
|
||||
"academica" => Ok(Self::Academica),
|
||||
"picknick" => Ok(Self::Picknick),
|
||||
"bona-vista" => Ok(Self::BonaVista),
|
||||
"grillcafe" => Ok(Self::GrillCafe),
|
||||
"zm2" => Ok(Self::ZM2),
|
||||
"basilica" => Ok(Self::Basilica),
|
||||
"atrium" => Ok(Self::Atrium),
|
||||
invalid => Err(format!("Invalid canteen identifier: {}", invalid)),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use itertools::Itertools;
|
||||
use scraper::ElementRef;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Dish {
|
||||
name: String,
|
||||
image_src: Option<String>,
|
||||
price_students: Option<String>,
|
||||
price_employees: Option<String>,
|
||||
price_guests: Option<String>,
|
||||
extras: Vec<String>,
|
||||
dish_type: DishType,
|
||||
}
|
||||
|
||||
impl Dish {
|
||||
pub fn get_name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
pub fn get_price_students(&self) -> Option<&str> {
|
||||
self.price_students.as_deref()
|
||||
}
|
||||
pub fn get_price_employees(&self) -> Option<&str> {
|
||||
self.price_employees.as_deref()
|
||||
}
|
||||
pub fn get_price_guests(&self) -> Option<&str> {
|
||||
self.price_guests.as_deref()
|
||||
}
|
||||
pub fn get_image_src(&self) -> Option<&str> {
|
||||
self.image_src.as_deref()
|
||||
}
|
||||
pub fn is_vegan(&self) -> bool {
|
||||
self.extras.contains(&"vegan".to_string())
|
||||
}
|
||||
pub fn is_vegetarian(&self) -> bool {
|
||||
self.extras.contains(&"vegetarian".to_string())
|
||||
}
|
||||
pub fn get_extras(&self) -> &[String] {
|
||||
&self.extras
|
||||
}
|
||||
pub fn get_type(&self) -> DishType {
|
||||
self.dish_type
|
||||
}
|
||||
|
||||
pub fn same_as(&self, other: &Self) -> bool {
|
||||
self.name == other.name
|
||||
&& self.price_employees == other.price_employees
|
||||
&& self.price_guests == other.price_guests
|
||||
&& self.price_students == other.price_students
|
||||
&& self.extras.iter().sorted().collect_vec()
|
||||
== self.extras.iter().sorted().collect_vec()
|
||||
}
|
||||
|
||||
pub fn from_element(element: ElementRef, dish_type: DishType) -> Option<Self> {
|
||||
let html_name_selector = scraper::Selector::parse(".desc h4").ok()?;
|
||||
let name = element
|
||||
.select(&html_name_selector)
|
||||
.next()?
|
||||
.text()
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
let img_selector = scraper::Selector::parse(".img img").ok()?;
|
||||
let img_src = element.select(&img_selector).next().and_then(|el| {
|
||||
el.value()
|
||||
.attr("src")
|
||||
.map(|img_src_path| format!("https://www.studierendenwerk-pb.de/{}", img_src_path))
|
||||
});
|
||||
|
||||
let html_price_selector = scraper::Selector::parse(".desc .price").ok()?;
|
||||
let mut prices = element
|
||||
.select(&html_price_selector)
|
||||
.filter_map(|price| {
|
||||
let price_for = price.first_child().and_then(|strong| {
|
||||
strong.first_child().and_then(|text_element| {
|
||||
text_element
|
||||
.value()
|
||||
.as_text()
|
||||
.map(|text| text.trim().trim_end_matches(':').to_string())
|
||||
})
|
||||
});
|
||||
let price_value = price.last_child().and_then(|text_element| {
|
||||
text_element
|
||||
.value()
|
||||
.as_text()
|
||||
.map(|text| text.trim().to_string())
|
||||
});
|
||||
price_for
|
||||
.and_then(|price_for| price_value.map(|price_value| (price_for, price_value)))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").ok()?;
|
||||
let extras = element
|
||||
.select(&html_extras_selector)
|
||||
.filter_map(|extra| extra.value().attr("title").map(|title| title.to_string()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Some(Self {
|
||||
name,
|
||||
image_src: img_src,
|
||||
price_students: prices
|
||||
.iter_mut()
|
||||
.find(|(price_for, _)| price_for == "Studierende")
|
||||
.map(|(_, price)| std::mem::take(price)),
|
||||
price_employees: prices
|
||||
.iter_mut()
|
||||
.find(|(price_for, _)| price_for == "Bedienstete")
|
||||
.map(|(_, price)| std::mem::take(price)),
|
||||
price_guests: prices
|
||||
.iter_mut()
|
||||
.find(|(price_for, _)| price_for == "Gäste")
|
||||
.map(|(_, price)| std::mem::take(price)),
|
||||
extras,
|
||||
dish_type,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Dish {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.name.partial_cmp(&other.name)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum DishType {
|
||||
Main,
|
||||
Side,
|
||||
Dessert,
|
||||
}
|
||||
|
||||
impl Display for DishType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
Self::Main => "main",
|
||||
Self::Side => "side",
|
||||
Self::Dessert => "dessert",
|
||||
};
|
||||
f.write_str(s)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
mod canteen;
|
||||
mod dish;
|
||||
mod menu;
|
||||
pub mod util;
|
||||
|
||||
use std::{error::Error, fmt::Display};
|
||||
|
||||
pub use canteen::Canteen;
|
||||
pub use dish::Dish;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct CustomError(String);
|
||||
|
||||
impl Display for CustomError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for CustomError {}
|
||||
|
||||
impl From<&str> for CustomError {
|
||||
fn from(s: &str) -> Self {
|
||||
CustomError(s.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for CustomError {
|
||||
fn from(s: String) -> Self {
|
||||
CustomError(s)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
use std::{collections::HashSet, env};
|
||||
|
||||
use anyhow::Result;
|
||||
use chrono::{Duration, Utc};
|
||||
use itertools::Itertools as _;
|
||||
use mensa_upb_scraper::{util, Canteen};
|
||||
use strum::IntoEnumIterator;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenvy::dotenv().ok();
|
||||
|
||||
let db = util::get_db()?;
|
||||
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
sqlx::migrate!("../migrations").run(&db).await?;
|
||||
|
||||
tracing::info!("Starting up...");
|
||||
|
||||
let start_date = Utc::now().date_naive();
|
||||
let end_date = (Utc::now() + Duration::days(6)).date_naive();
|
||||
|
||||
let already_scraped = sqlx::query!(
|
||||
"SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2",
|
||||
start_date,
|
||||
end_date
|
||||
)
|
||||
.fetch_all(&db)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
(
|
||||
r.date,
|
||||
r.canteen.parse::<Canteen>().expect("Invalid db entry"),
|
||||
)
|
||||
})
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let filter_canteens = env::var("FILTER_CANTEENS")
|
||||
.ok()
|
||||
.map(|s| {
|
||||
s.split(',')
|
||||
.filter_map(|el| el.parse::<Canteen>().ok())
|
||||
.collect::<HashSet<_>>()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let date_canteen_combinations = (0..7)
|
||||
.map(|d| (Utc::now() + Duration::days(d)).date_naive())
|
||||
.cartesian_product(Canteen::iter())
|
||||
.filter(|entry| !filter_canteens.contains(&entry.1) && !already_scraped.contains(entry))
|
||||
.collect::<Vec<_>>();
|
||||
util::async_for_each(&date_canteen_combinations, |(date, canteen, menu)| {
|
||||
let db = db.clone();
|
||||
async move {
|
||||
util::add_menu_to_db(&db, &date, canteen, menu).await;
|
||||
}
|
||||
})
|
||||
.await;
|
||||
|
||||
tracing::info!("Finished scraping menu");
|
||||
|
||||
Ok(())
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
|
||||
use crate::{dish::DishType, Canteen, CustomError, Dish};
|
||||
|
||||
#[tracing::instrument]
|
||||
pub async fn scrape_menu(date: &NaiveDate, canteen: Canteen) -> Result<Vec<Dish>> {
|
||||
tracing::debug!("Starting scraping");
|
||||
|
||||
let url = canteen.get_url();
|
||||
let client = reqwest::Client::new();
|
||||
let request_builder = client.post(url).query(&[(
|
||||
"tx_pamensa_mensa[date]",
|
||||
date.format("%Y-%m-%d").to_string(),
|
||||
)]);
|
||||
let response = request_builder.send().await?;
|
||||
let html_content = response.text().await?;
|
||||
|
||||
let document = scraper::Html::parse_document(&html_content);
|
||||
|
||||
let html_main_dishes_selector = scraper::Selector::parse(
|
||||
"table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row",
|
||||
)
|
||||
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||
let html_main_dishes = document.select(&html_main_dishes_selector);
|
||||
let main_dishes = html_main_dishes
|
||||
.filter_map(|dish| Dish::from_element(dish, DishType::Main))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let html_side_dishes_selector = scraper::Selector::parse(
|
||||
"table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row",
|
||||
)
|
||||
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||
let html_side_dishes = document.select(&html_side_dishes_selector);
|
||||
let side_dishes = html_side_dishes
|
||||
.filter_map(|dish| Dish::from_element(dish, DishType::Side))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let html_desserts_selector = scraper::Selector::parse(
|
||||
"table.table-dishes.soups > tbody > tr.odd > td.description > div.row",
|
||||
)
|
||||
.map_err(|_| CustomError::from("Failed to parse selector"))?;
|
||||
let html_desserts = document.select(&html_desserts_selector);
|
||||
let desserts = html_desserts
|
||||
.filter_map(|dish| Dish::from_element(dish, DishType::Dessert))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut res = Vec::new();
|
||||
res.extend(main_dishes);
|
||||
res.extend(side_dishes);
|
||||
res.extend(desserts);
|
||||
|
||||
tracing::debug!("Finished scraping");
|
||||
|
||||
Ok(res)
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
use std::{env, future::Future};
|
||||
|
||||
use anyhow::Result;
|
||||
use chrono::NaiveDate;
|
||||
use futures::StreamExt as _;
|
||||
use num_bigint::BigInt;
|
||||
use sqlx::{postgres::PgPoolOptions, types::BigDecimal, PgPool};
|
||||
|
||||
use crate::{menu::scrape_menu, Canteen, Dish};
|
||||
|
||||
pub async fn async_for_each<F, Fut>(date_canteen_combinations: &[(NaiveDate, Canteen)], f: F)
|
||||
where
|
||||
F: FnMut((NaiveDate, Canteen, Vec<Dish>)) -> Fut,
|
||||
Fut: Future<Output = ()>,
|
||||
{
|
||||
futures::stream::iter(date_canteen_combinations)
|
||||
.then(|(date, canteen)| async move { (*date, *canteen, scrape_menu(date, *canteen).await) })
|
||||
.filter_map(|(date, canteen, menu)| async move { menu.ok().map(|menu| (date, canteen, menu)) })
|
||||
.for_each(f)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub fn get_db() -> Result<PgPool> {
|
||||
Ok(PgPoolOptions::new()
|
||||
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?)
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(db))]
|
||||
pub async fn add_meal_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, dish: &Dish) -> Result<()> {
|
||||
let vegan = dish.is_vegan();
|
||||
|
||||
sqlx::query!(
|
||||
"INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian)
|
||||
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)
|
||||
ON CONFLICT (date,canteen,name) DO NOTHING",
|
||||
date, canteen.get_identifier(), dish.get_name(),
|
||||
dish.get_type().to_string(), dish.get_image_src(),
|
||||
price_to_bigdecimal(dish.get_price_students()),
|
||||
price_to_bigdecimal(dish.get_price_employees()),
|
||||
price_to_bigdecimal(dish.get_price_guests()),
|
||||
vegan, vegan || dish.is_vegetarian()
|
||||
).execute(db).await.inspect_err(|e| {
|
||||
tracing::error!("error during database insert: {}", e);
|
||||
})?;
|
||||
|
||||
tracing::trace!("Insert to DB successfull");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn add_menu_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, menu: Vec<Dish>) {
|
||||
futures::stream::iter(menu)
|
||||
.for_each(|dish| async move {
|
||||
if !dish.get_name().is_empty() {
|
||||
add_meal_to_db(db, date, canteen, &dish).await.ok();
|
||||
}
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
pub fn price_to_bigdecimal(s: Option<&str>) -> BigDecimal {
|
||||
s.and_then(|p| p.trim_end_matches(" €").replace(',', ".").parse().ok())
|
||||
.unwrap_or_else(|| BigDecimal::new(BigInt::from(99999), 2))
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
# Include any files or directories that you don't want to be copied to your
|
||||
# container here (e.g., local build artifacts, temporary files, etc.).
|
||||
#
|
||||
# For more help, visit the .dockerignore file reference guide at
|
||||
# https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||
|
||||
**/.DS_Store
|
||||
**/.classpath
|
||||
**/.dockerignore
|
||||
**/.env
|
||||
**/.git
|
||||
**/.gitignore
|
||||
**/.project
|
||||
**/.settings
|
||||
**/.toolstarget
|
||||
**/.vs
|
||||
**/.vscode
|
||||
**/*.*proj.user
|
||||
**/*.dbmdl
|
||||
**/*.jfm
|
||||
**/charts
|
||||
**/docker-compose*
|
||||
**/compose*
|
||||
**/Dockerfile*
|
||||
**/node_modules
|
||||
**/npm-debug.log
|
||||
**/secrets.dev.yaml
|
||||
**/values.dev.yaml
|
||||
/bin
|
||||
/target
|
||||
LICENSE
|
||||
README.md
|
|
@ -0,0 +1,27 @@
|
|||
[package]
|
||||
name = "mensa-upb-api"
|
||||
description = "A web api for a local database of the canteens of the University of Paderborn"
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
repository.workspace = true
|
||||
readme.workspace = true
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
publish = false
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.7.0"
|
||||
actix-governor = { version = "0.7.0", features = ["log"] }
|
||||
actix-web = "4.9.0"
|
||||
anyhow = { workspace = true }
|
||||
bigdecimal = { version = "0.4.6", features = ["serde"] }
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
dotenvy = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
serde = { version = "1.0.215", features = ["derive"] }
|
||||
serde_json = "1.0.133"
|
||||
sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] }
|
||||
strum = { workspace = true, features = ["derive"] }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-subscriber = { workspace = true, features = ["env-filter"] }
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
FROM rust:latest AS chef
|
||||
RUN cargo install cargo-chef
|
||||
WORKDIR /app
|
||||
|
||||
FROM chef AS planner
|
||||
COPY . .
|
||||
RUN OFFLINE=true cargo chef prepare --bin mensa-upb-api --recipe-path recipe.json
|
||||
|
||||
FROM chef AS builder
|
||||
COPY --from=planner /app/recipe.json recipe.json
|
||||
RUN cargo chef cook --bin mensa-upb-api --release --recipe-path recipe.json
|
||||
COPY . .
|
||||
RUN OFFLINE=true cargo build --bin mensa-upb-api --release
|
||||
|
||||
FROM debian:bookworm-slim AS runtime
|
||||
|
||||
ARG UID=10001
|
||||
RUN adduser \
|
||||
--disabled-password \
|
||||
--gecos "" \
|
||||
--home "/nonexistent" \
|
||||
--shell "/sbin/nologin" \
|
||||
--no-create-home \
|
||||
--uid "${UID}" \
|
||||
appuser
|
||||
USER appuser
|
||||
|
||||
COPY --from=builder /app/target/release/mensa-upb-api /bin/mensa-upb-api
|
||||
|
||||
ENV API_INTERFACE=0.0.0.0
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
# What the container should run when it is started.
|
||||
CMD ["/bin/mensa-upb-api"]
|
|
@ -0,0 +1,27 @@
|
|||
services:
|
||||
api:
|
||||
build: .
|
||||
image: mensa-upb-api:latest
|
||||
ports:
|
||||
- 8080:8080
|
||||
environment:
|
||||
- DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-api/postgres
|
||||
- "RUST_LOG=none,mensa_upb_api=info"
|
||||
- TZ=Europe/Berlin
|
||||
depends_on:
|
||||
- postgres
|
||||
|
||||
postgres:
|
||||
container_name: postgres-mensa-upb-api
|
||||
image: postgres:17-alpine
|
||||
environment:
|
||||
- POSTGRES_USER=pguser
|
||||
- POSTGRES_PASSWORD=pgpass
|
||||
- POSTGRES_DB=postgres
|
||||
volumes:
|
||||
- db:/var/lib/postgresql/data
|
||||
|
||||
volumes:
|
||||
db:
|
||||
|
||||
|
|
@ -27,6 +27,8 @@ async fn main() -> Result<()> {
|
|||
let db = PgPoolOptions::new()
|
||||
.connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?;
|
||||
|
||||
sqlx::migrate!("../migrations").run(&db).await?;
|
||||
|
||||
let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string());
|
||||
let port = env::var("API_PORT")
|
||||
.ok()
|
Loading…
Reference in New Issue