From 94b1ffead770cbb4fe006e0dd376901681b7fb9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Wed, 20 Nov 2024 21:45:30 +0100 Subject: [PATCH] combine repositories of web api and scraper --- .dockerignore | 34 +- ...8946bed129746b70f92f66704f02093b2e27c.json | 23 + ...905e1957d1d33d823dc82ec92b552f5092028.json | 71 ++ ...5f62d467293454b01175939e32339ee90fd23.json | 29 + Cargo.lock | 715 +++++++++++++++++- Cargo.toml | 34 +- Dockerfile | 68 -- compose.yml | 42 +- dev-compose.yml | 14 + scraper/.dockerignore | 2 + scraper/.gitignore | 2 + scraper/Cargo.toml | 26 + scraper/Dockerfile | 28 + scraper/compose.yml | 23 + scraper/src/canteen.rs | 64 ++ scraper/src/dish.rs | 145 ++++ scraper/src/lib.rs | 32 + scraper/src/main.rs | 65 ++ scraper/src/menu.rs | 56 ++ scraper/src/util.rs | 64 ++ web-api/.dockerignore | 32 + web-api/Cargo.toml | 27 + web-api/Dockerfile | 36 + web-api/compose.yml | 27 + {src => web-api/src}/canteen.rs | 0 {src => web-api/src}/dish.rs | 0 {src => web-api/src}/endpoints/menu.rs | 0 {src => web-api/src}/endpoints/mod.rs | 0 {src => web-api/src}/lib.rs | 0 {src => web-api/src}/main.rs | 2 + {src => web-api/src}/menu.rs | 0 31 files changed, 1528 insertions(+), 133 deletions(-) create mode 100644 .sqlx/query-4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c.json create mode 100644 .sqlx/query-b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028.json create mode 100644 .sqlx/query-b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23.json delete mode 100644 Dockerfile create mode 100644 dev-compose.yml create mode 100644 scraper/.dockerignore create mode 100644 scraper/.gitignore create mode 100644 scraper/Cargo.toml create mode 100644 scraper/Dockerfile create mode 100644 scraper/compose.yml create mode 100644 scraper/src/canteen.rs create mode 100644 scraper/src/dish.rs create mode 100644 scraper/src/lib.rs create mode 100644 scraper/src/main.rs create mode 100644 scraper/src/menu.rs create mode 100644 scraper/src/util.rs create mode 100644 web-api/.dockerignore create mode 100644 web-api/Cargo.toml create mode 100644 web-api/Dockerfile create mode 100644 web-api/compose.yml rename {src => web-api/src}/canteen.rs (100%) rename {src => web-api/src}/dish.rs (100%) rename {src => web-api/src}/endpoints/menu.rs (100%) rename {src => web-api/src}/endpoints/mod.rs (100%) rename {src => web-api/src}/lib.rs (100%) rename {src => web-api/src}/main.rs (97%) rename {src => web-api/src}/menu.rs (100%) diff --git a/.dockerignore b/.dockerignore index 3dfba38..f471ec2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,32 +1,4 @@ -# Include any files or directories that you don't want to be copied to your -# container here (e.g., local build artifacts, temporary files, etc.). -# -# For more help, visit the .dockerignore file reference guide at -# https://docs.docker.com/engine/reference/builder/#dockerignore-file - -**/.DS_Store -**/.classpath -**/.dockerignore -**/.env -**/.git -**/.gitignore -**/.project -**/.settings -**/.toolstarget -**/.vs -**/.vscode -**/*.*proj.user -**/*.dbmdl -**/*.jfm -**/charts -**/docker-compose* -**/compose* -**/Dockerfile* -**/node_modules -**/npm-debug.log -**/secrets.dev.yaml -**/values.dev.yaml -/bin /target -LICENSE -README.md +/dev-compose.yml +.env +.gitignore \ No newline at end of file diff --git a/.sqlx/query-4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c.json b/.sqlx/query-4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c.json new file mode 100644 index 0000000..8928e6e --- /dev/null +++ b/.sqlx/query-4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c.json @@ -0,0 +1,23 @@ +{ + "db_name": "PostgreSQL", + "query": "INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian)\n VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)\n ON CONFLICT (date,canteen,name) DO NOTHING", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Date", + "Text", + "Text", + "Text", + "Text", + "Numeric", + "Numeric", + "Numeric", + "Bool", + "Bool" + ] + }, + "nullable": [] + }, + "hash": "4fdb615a3e155d8394c70f25d2d8946bed129746b70f92f66704f02093b2e27c" +} diff --git a/.sqlx/query-b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028.json b/.sqlx/query-b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028.json new file mode 100644 index 0000000..1ec9d01 --- /dev/null +++ b/.sqlx/query-b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028.json @@ -0,0 +1,71 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT name, array_agg(DISTINCT canteen ORDER BY canteen) AS canteens, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian \n FROM meals WHERE date = $1 AND canteen = ANY($2) \n GROUP BY name, dish_type, image_src, price_students, price_employees, price_guests, vegan, vegetarian\n ORDER BY name", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "canteens", + "type_info": "TextArray" + }, + { + "ordinal": 2, + "name": "dish_type", + "type_info": "Text" + }, + { + "ordinal": 3, + "name": "image_src", + "type_info": "Text" + }, + { + "ordinal": 4, + "name": "price_students", + "type_info": "Numeric" + }, + { + "ordinal": 5, + "name": "price_employees", + "type_info": "Numeric" + }, + { + "ordinal": 6, + "name": "price_guests", + "type_info": "Numeric" + }, + { + "ordinal": 7, + "name": "vegan", + "type_info": "Bool" + }, + { + "ordinal": 8, + "name": "vegetarian", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [ + "Date", + "TextArray" + ] + }, + "nullable": [ + false, + null, + false, + true, + false, + false, + false, + false, + false + ] + }, + "hash": "b5a990f34095b255672e81562dc905e1957d1d33d823dc82ec92b552f5092028" +} diff --git a/.sqlx/query-b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23.json b/.sqlx/query-b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23.json new file mode 100644 index 0000000..0861ddb --- /dev/null +++ b/.sqlx/query-b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23.json @@ -0,0 +1,29 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "date", + "type_info": "Date" + }, + { + "ordinal": 1, + "name": "canteen", + "type_info": "Text" + } + ], + "parameters": { + "Left": [ + "Date", + "Date" + ] + }, + "nullable": [ + false, + false + ] + }, + "hash": "b94a6b49fb5e53e361da7a890dd5f62d467293454b01175939e32339ee90fd23" +} diff --git a/Cargo.lock b/Cargo.lock index 082182c..8f58710 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,8 +67,8 @@ dependencies = [ "encoding_rs", "flate2", "futures-core", - "h2", - "http", + "h2 0.3.26", + "http 0.2.12", "httparse", "httpdate", "itoa", @@ -104,7 +104,7 @@ checksum = "13d324164c51f63867b57e73ba5936ea151b8a41a1d23d1031eeb9f70d0236f8" dependencies = [ "bytestring", "cfg-if", - "http", + "http 0.2.12", "regex", "regex-lite", "serde", @@ -301,6 +301,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.4.0" @@ -431,6 +437,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "chrono" version = "0.4.38" @@ -461,6 +473,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const_format" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50c655d81ff1114fb0dcdea9225ea9f0cc712a6f8d189378e82bdf62a473a64b" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff1a44b93f47b1bac19a27932f5c591e43d1ba357ee4f61526c8a25603f0eb1" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "convert_case" version = "0.4.0" @@ -542,6 +574,29 @@ dependencies = [ "typenum", ] +[[package]] +name = "cssparser" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -618,6 +673,27 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "dtoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c6ba7d4eec39eaa9ab24d44a0e73a7949a1095a8b3f3abb11eddf27dbb56a53" + [[package]] name = "either" version = "1.13.0" @@ -716,6 +792,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -822,6 +908,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.7" @@ -832,6 +927,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -839,8 +943,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -881,7 +987,26 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", "indexmap", "slab", "tokio", @@ -959,6 +1084,20 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "html5ever" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e15626aaf9c351bc696217cbe29cb9b5e86c43f8a46b5e2f5c6c5cf7cb904ce" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "http" version = "0.2.12" @@ -970,6 +1109,40 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body", + "pin-project-lite", +] + [[package]] name = "httparse" version = "1.9.5" @@ -982,6 +1155,63 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "hyper" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.7", + "http 1.1.0", + "http-body", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper", + "hyper-util", + "rustls", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", + "webpki-roots", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body", + "hyper", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.61" @@ -1160,6 +1390,12 @@ dependencies = [ "hashbrown 0.15.1", ] +[[package]] +name = "ipnet" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" + [[package]] name = "itertools" version = "0.13.0" @@ -1171,9 +1407,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a73e9fe3c49d7afb2ace819fa181a287ce54a0983eda4e0eb05c22f82ffe534" +checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" [[package]] name = "jobserver" @@ -1276,6 +1512,26 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82c88c6129bd24319e62a0359cb6b958fa7e8be6e19bb1663bc396b90883aca5" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + [[package]] name = "matchers" version = "0.1.0" @@ -1322,6 +1578,26 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "mensa-upb-scraper" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "const_format", + "dotenvy", + "futures", + "itertools", + "num-bigint", + "reqwest", + "scraper", + "sqlx", + "strum", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "mime" version = "0.3.17" @@ -1356,6 +1632,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "no-std-compat" version = "0.4.1" @@ -1522,6 +1804,77 @@ version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.15" @@ -1582,6 +1935,12 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.89" @@ -1606,6 +1965,58 @@ dependencies = [ "winapi", ] +[[package]] +name = "quinn" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" +dependencies = [ + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls", + "socket2", + "thiserror 2.0.3", + "tokio", + "tracing", +] + +[[package]] +name = "quinn-proto" +version = "0.11.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +dependencies = [ + "bytes", + "getrandom", + "rand", + "ring", + "rustc-hash", + "rustls", + "rustls-pki-types", + "slab", + "thiserror 2.0.3", + "tinyvec", + "tracing", + "web-time", +] + +[[package]] +name = "quinn-udp" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.59.0", +] + [[package]] name = "quote" version = "1.0.37" @@ -1713,6 +2124,50 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "reqwest" +version = "0.12.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2 0.4.7", + "http 1.1.0", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pemfile", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "windows-registry", +] + [[package]] name = "ring" version = "0.17.8" @@ -1754,6 +2209,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustc_version" version = "0.4.1" @@ -1804,6 +2265,9 @@ name = "rustls-pki-types" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +dependencies = [ + "web-time", +] [[package]] name = "rustls-webpki" @@ -1834,6 +2298,41 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0e749d29b2064585327af5038a5a8eb73aeebad4a3472e83531a436563f7208" +dependencies = [ + "ahash", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "semver" version = "1.0.23" @@ -1884,6 +2383,15 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "sha1" version = "0.10.6" @@ -1940,6 +2448,12 @@ dependencies = [ "rand_core", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -2055,7 +2569,7 @@ dependencies = [ "sha2", "smallvec", "sqlformat", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", "tracing", @@ -2142,7 +2656,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "tracing", "uuid", "whoami", @@ -2184,7 +2698,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "tracing", "uuid", "whoami", @@ -2221,6 +2735,32 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "string_cache" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" +dependencies = [ + "new_debug_unreachable", + "once_cell", + "parking_lot", + "phf_shared 0.10.0", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", +] + [[package]] name = "stringprep" version = "0.1.5" @@ -2271,6 +2811,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.1" @@ -2295,13 +2844,33 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +dependencies = [ + "thiserror-impl 2.0.3", ] [[package]] @@ -2315,6 +2884,17 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -2410,6 +2990,17 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls", + "rustls-pki-types", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.16" @@ -2434,6 +3025,12 @@ dependencies = [ "tokio", ] +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + [[package]] name = "tracing" version = "0.1.40" @@ -2478,6 +3075,16 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + [[package]] name = "tracing-subscriber" version = "0.3.18" @@ -2488,14 +3095,23 @@ dependencies = [ "nu-ansi-term", "once_cell", "regex", + "serde", + "serde_json", "sharded-slab", "smallvec", "thread_local", "tracing", "tracing-core", "tracing-log", + "tracing-serde", ] +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + [[package]] name = "typenum" version = "1.17.0" @@ -2529,6 +3145,18 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unicode_categories" version = "0.1.1" @@ -2552,6 +3180,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf16_iter" version = "1.0.5" @@ -2588,6 +3222,15 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -2626,6 +3269,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.95" @@ -2665,6 +3320,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webpki-roots" version = "0.26.6" @@ -2715,6 +3380,36 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 4be19a2..ef3140e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,27 +1,25 @@ -[package] -name = "mensa-upb-api" -description = "A web scraper api for the canteens of the University of Paderborn" + + +[workspace] +members = [ + "scraper", + "web-api", +] +resolver = "2" + +[workspace.package] license = "MIT" authors = ["Moritz Hölting"] repository = "https://github.com/moritz-hoelting/mensa-upb-api" -publish = false readme = "README.md" -version = "0.2.0" -edition = "2021" -[dependencies] -actix-cors = "0.7.0" -actix-governor = { version = "0.7.0", features = ["log"] } -actix-web = "4.9.0" +[workspace.dependencies] anyhow = "1.0.93" -bigdecimal = { version = "0.4.6", features = ["serde"] } -chrono = { version = "0.4.38", features = ["serde"] } +chrono = "0.4.38" dotenvy = "0.15.7" itertools = "0.13.0" -serde = { version = "1.0.215", features = ["derive"] } -serde_json = "1.0.133" -strum = { version = "0.26.3", features = ["derive"] } -sqlx = { version = "0.8.2", features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] } -tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] } +sqlx = "0.8.2" +strum = "0.26.3" +tokio = "1.41.1" tracing = "0.1.40" -tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +tracing-subscriber = "0.3.18" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 1e805f6..0000000 --- a/Dockerfile +++ /dev/null @@ -1,68 +0,0 @@ -# syntax=docker/dockerfile:1 - - -################################################################################ -# Create a stage for building the application. - -ARG RUST_VERSION=1.79.0 -ARG APP_NAME=mensa-upb-api -FROM rust:${RUST_VERSION}-slim-bullseye AS build -ARG APP_NAME -WORKDIR /app - -RUN apt-get update -y && \ - apt-get install -y pkg-config make g++ libssl-dev - -# Build the application. -# Leverage a cache mount to /usr/local/cargo/registry/ -# for downloaded dependencies and a cache mount to /app/target/ for -# compiled dependencies which will speed up subsequent builds. -# Leverage a bind mount to the src directory to avoid having to copy the -# source code into the container. Once built, copy the executable to an -# output directory before the cache mounted /app/target is unmounted. -RUN --mount=type=bind,source=src,target=src \ - --mount=type=bind,source=Cargo.toml,target=Cargo.toml \ - --mount=type=bind,source=Cargo.lock,target=Cargo.lock \ - --mount=type=cache,target=/app/target/ \ - --mount=type=cache,target=/usr/local/cargo/registry/ \ - <> /var/log/cron.log 2>&1" > /etc/cron.d/mensa_upb_scraper +RUN chmod 0644 /etc/cron.d/mensa_upb_scraper +RUN crontab /etc/cron.d/mensa_upb_scraper +RUN touch /var/log/cron.log + +COPY --from=builder /app/target/release/mensa-upb-scraper /app/mensa-upb-scraper + +CMD env > /etc/environment && cron && tail -f /var/log/cron.log \ No newline at end of file diff --git a/scraper/compose.yml b/scraper/compose.yml new file mode 100644 index 0000000..711effb --- /dev/null +++ b/scraper/compose.yml @@ -0,0 +1,23 @@ +services: + scraper: + build: . + image: mensa-upb-scraper:latest + environment: + - DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-scraper/postgres + - "RUST_LOG=none,mensa_upb_scraper=info" + - TZ=Europe/Berlin + depends_on: + - postgres + + postgres: + container_name: postgres-mensa-upb-scraper + image: postgres:17-alpine + environment: + - POSTGRES_USER=pguser + - POSTGRES_PASSWORD=pgpass + - POSTGRES_DB=postgres + volumes: + - db:/var/lib/postgresql/data + +volumes: + db: diff --git a/scraper/src/canteen.rs b/scraper/src/canteen.rs new file mode 100644 index 0000000..15b2900 --- /dev/null +++ b/scraper/src/canteen.rs @@ -0,0 +1,64 @@ +use std::str::FromStr; + +use const_format::concatcp; +use strum::EnumIter; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, EnumIter, Hash)] +pub enum Canteen { + Forum, + Academica, + Picknick, + BonaVista, + GrillCafe, + ZM2, + Basilica, + Atrium, +} + +const POST_URL_BASE: &str = "https://www.studierendenwerk-pb.de/gastronomie/speiseplaene/"; + +impl Canteen { + pub fn get_url(&self) -> &str { + match self { + Self::Forum => concatcp!(POST_URL_BASE, "forum/"), + Self::Academica => concatcp!(POST_URL_BASE, "mensa-academica/"), + Self::Picknick => concatcp!(POST_URL_BASE, "picknick/"), + Self::BonaVista => concatcp!(POST_URL_BASE, "bona-vista/"), + Self::GrillCafe => concatcp!(POST_URL_BASE, "grillcafe/"), + Self::ZM2 => concatcp!(POST_URL_BASE, "mensa-zm2/"), + Self::Basilica => concatcp!(POST_URL_BASE, "mensa-basilica-hamm/"), + Self::Atrium => concatcp!(POST_URL_BASE, "mensa-atrium-lippstadt/"), + } + } + + pub fn get_identifier(&self) -> &str { + match self { + Self::Forum => "forum", + Self::Academica => "academica", + Self::Picknick => "picknick", + Self::BonaVista => "bona-vista", + Self::GrillCafe => "grillcafe", + Self::ZM2 => "zm2", + Self::Basilica => "basilica", + Self::Atrium => "atrium", + } + } +} + +impl FromStr for Canteen { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "forum" => Ok(Self::Forum), + "academica" => Ok(Self::Academica), + "picknick" => Ok(Self::Picknick), + "bona-vista" => Ok(Self::BonaVista), + "grillcafe" => Ok(Self::GrillCafe), + "zm2" => Ok(Self::ZM2), + "basilica" => Ok(Self::Basilica), + "atrium" => Ok(Self::Atrium), + invalid => Err(format!("Invalid canteen identifier: {}", invalid)), + } + } +} diff --git a/scraper/src/dish.rs b/scraper/src/dish.rs new file mode 100644 index 0000000..7776db7 --- /dev/null +++ b/scraper/src/dish.rs @@ -0,0 +1,145 @@ +use std::fmt::Display; + +use itertools::Itertools; +use scraper::ElementRef; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Dish { + name: String, + image_src: Option, + price_students: Option, + price_employees: Option, + price_guests: Option, + extras: Vec, + dish_type: DishType, +} + +impl Dish { + pub fn get_name(&self) -> &str { + &self.name + } + pub fn get_price_students(&self) -> Option<&str> { + self.price_students.as_deref() + } + pub fn get_price_employees(&self) -> Option<&str> { + self.price_employees.as_deref() + } + pub fn get_price_guests(&self) -> Option<&str> { + self.price_guests.as_deref() + } + pub fn get_image_src(&self) -> Option<&str> { + self.image_src.as_deref() + } + pub fn is_vegan(&self) -> bool { + self.extras.contains(&"vegan".to_string()) + } + pub fn is_vegetarian(&self) -> bool { + self.extras.contains(&"vegetarian".to_string()) + } + pub fn get_extras(&self) -> &[String] { + &self.extras + } + pub fn get_type(&self) -> DishType { + self.dish_type + } + + pub fn same_as(&self, other: &Self) -> bool { + self.name == other.name + && self.price_employees == other.price_employees + && self.price_guests == other.price_guests + && self.price_students == other.price_students + && self.extras.iter().sorted().collect_vec() + == self.extras.iter().sorted().collect_vec() + } + + pub fn from_element(element: ElementRef, dish_type: DishType) -> Option { + let html_name_selector = scraper::Selector::parse(".desc h4").ok()?; + let name = element + .select(&html_name_selector) + .next()? + .text() + .collect::>() + .join("") + .trim() + .to_string(); + + let img_selector = scraper::Selector::parse(".img img").ok()?; + let img_src = element.select(&img_selector).next().and_then(|el| { + el.value() + .attr("src") + .map(|img_src_path| format!("https://www.studierendenwerk-pb.de/{}", img_src_path)) + }); + + let html_price_selector = scraper::Selector::parse(".desc .price").ok()?; + let mut prices = element + .select(&html_price_selector) + .filter_map(|price| { + let price_for = price.first_child().and_then(|strong| { + strong.first_child().and_then(|text_element| { + text_element + .value() + .as_text() + .map(|text| text.trim().trim_end_matches(':').to_string()) + }) + }); + let price_value = price.last_child().and_then(|text_element| { + text_element + .value() + .as_text() + .map(|text| text.trim().to_string()) + }); + price_for + .and_then(|price_for| price_value.map(|price_value| (price_for, price_value))) + }) + .collect::>(); + + let html_extras_selector = scraper::Selector::parse(".desc .buttons > *").ok()?; + let extras = element + .select(&html_extras_selector) + .filter_map(|extra| extra.value().attr("title").map(|title| title.to_string())) + .collect::>(); + + Some(Self { + name, + image_src: img_src, + price_students: prices + .iter_mut() + .find(|(price_for, _)| price_for == "Studierende") + .map(|(_, price)| std::mem::take(price)), + price_employees: prices + .iter_mut() + .find(|(price_for, _)| price_for == "Bedienstete") + .map(|(_, price)| std::mem::take(price)), + price_guests: prices + .iter_mut() + .find(|(price_for, _)| price_for == "Gäste") + .map(|(_, price)| std::mem::take(price)), + extras, + dish_type, + }) + } +} + +impl PartialOrd for Dish { + fn partial_cmp(&self, other: &Self) -> Option { + self.name.partial_cmp(&other.name) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DishType { + Main, + Side, + Dessert, +} + +impl Display for DishType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match self { + Self::Main => "main", + Self::Side => "side", + Self::Dessert => "dessert", + }; + f.write_str(s) + } +} diff --git a/scraper/src/lib.rs b/scraper/src/lib.rs new file mode 100644 index 0000000..949c0a8 --- /dev/null +++ b/scraper/src/lib.rs @@ -0,0 +1,32 @@ +mod canteen; +mod dish; +mod menu; +pub mod util; + +use std::{error::Error, fmt::Display}; + +pub use canteen::Canteen; +pub use dish::Dish; + +#[derive(Debug, Clone)] +struct CustomError(String); + +impl Display for CustomError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Error for CustomError {} + +impl From<&str> for CustomError { + fn from(s: &str) -> Self { + CustomError(s.to_string()) + } +} + +impl From for CustomError { + fn from(s: String) -> Self { + CustomError(s) + } +} diff --git a/scraper/src/main.rs b/scraper/src/main.rs new file mode 100644 index 0000000..64201bc --- /dev/null +++ b/scraper/src/main.rs @@ -0,0 +1,65 @@ +use std::{collections::HashSet, env}; + +use anyhow::Result; +use chrono::{Duration, Utc}; +use itertools::Itertools as _; +use mensa_upb_scraper::{util, Canteen}; +use strum::IntoEnumIterator; + +#[tokio::main] +async fn main() -> Result<()> { + dotenvy::dotenv().ok(); + + let db = util::get_db()?; + + tracing_subscriber::fmt::init(); + + sqlx::migrate!("../migrations").run(&db).await?; + + tracing::info!("Starting up..."); + + let start_date = Utc::now().date_naive(); + let end_date = (Utc::now() + Duration::days(6)).date_naive(); + + let already_scraped = sqlx::query!( + "SELECT DISTINCT date, canteen FROM MEALS WHERE date >= $1 AND date <= $2", + start_date, + end_date + ) + .fetch_all(&db) + .await? + .into_iter() + .map(|r| { + ( + r.date, + r.canteen.parse::().expect("Invalid db entry"), + ) + }) + .collect::>(); + + let filter_canteens = env::var("FILTER_CANTEENS") + .ok() + .map(|s| { + s.split(',') + .filter_map(|el| el.parse::().ok()) + .collect::>() + }) + .unwrap_or_default(); + + let date_canteen_combinations = (0..7) + .map(|d| (Utc::now() + Duration::days(d)).date_naive()) + .cartesian_product(Canteen::iter()) + .filter(|entry| !filter_canteens.contains(&entry.1) && !already_scraped.contains(entry)) + .collect::>(); + util::async_for_each(&date_canteen_combinations, |(date, canteen, menu)| { + let db = db.clone(); + async move { + util::add_menu_to_db(&db, &date, canteen, menu).await; + } + }) + .await; + + tracing::info!("Finished scraping menu"); + + Ok(()) +} diff --git a/scraper/src/menu.rs b/scraper/src/menu.rs new file mode 100644 index 0000000..64e0cf6 --- /dev/null +++ b/scraper/src/menu.rs @@ -0,0 +1,56 @@ +use anyhow::Result; +use chrono::NaiveDate; + +use crate::{dish::DishType, Canteen, CustomError, Dish}; + +#[tracing::instrument] +pub async fn scrape_menu(date: &NaiveDate, canteen: Canteen) -> Result> { + tracing::debug!("Starting scraping"); + + let url = canteen.get_url(); + let client = reqwest::Client::new(); + let request_builder = client.post(url).query(&[( + "tx_pamensa_mensa[date]", + date.format("%Y-%m-%d").to_string(), + )]); + let response = request_builder.send().await?; + let html_content = response.text().await?; + + let document = scraper::Html::parse_document(&html_content); + + let html_main_dishes_selector = scraper::Selector::parse( + "table.table-dishes.main-dishes > tbody > tr.odd > td.description > div.row", + ) + .map_err(|_| CustomError::from("Failed to parse selector"))?; + let html_main_dishes = document.select(&html_main_dishes_selector); + let main_dishes = html_main_dishes + .filter_map(|dish| Dish::from_element(dish, DishType::Main)) + .collect::>(); + + let html_side_dishes_selector = scraper::Selector::parse( + "table.table-dishes.side-dishes > tbody > tr.odd > td.description > div.row", + ) + .map_err(|_| CustomError::from("Failed to parse selector"))?; + let html_side_dishes = document.select(&html_side_dishes_selector); + let side_dishes = html_side_dishes + .filter_map(|dish| Dish::from_element(dish, DishType::Side)) + .collect::>(); + + let html_desserts_selector = scraper::Selector::parse( + "table.table-dishes.soups > tbody > tr.odd > td.description > div.row", + ) + .map_err(|_| CustomError::from("Failed to parse selector"))?; + let html_desserts = document.select(&html_desserts_selector); + let desserts = html_desserts + .filter_map(|dish| Dish::from_element(dish, DishType::Dessert)) + .collect::>(); + + let mut res = Vec::new(); + res.extend(main_dishes); + res.extend(side_dishes); + res.extend(desserts); + + tracing::debug!("Finished scraping"); + + Ok(res) +} diff --git a/scraper/src/util.rs b/scraper/src/util.rs new file mode 100644 index 0000000..b742ce9 --- /dev/null +++ b/scraper/src/util.rs @@ -0,0 +1,64 @@ +use std::{env, future::Future}; + +use anyhow::Result; +use chrono::NaiveDate; +use futures::StreamExt as _; +use num_bigint::BigInt; +use sqlx::{postgres::PgPoolOptions, types::BigDecimal, PgPool}; + +use crate::{menu::scrape_menu, Canteen, Dish}; + +pub async fn async_for_each(date_canteen_combinations: &[(NaiveDate, Canteen)], f: F) +where + F: FnMut((NaiveDate, Canteen, Vec)) -> Fut, + Fut: Future, +{ + futures::stream::iter(date_canteen_combinations) + .then(|(date, canteen)| async move { (*date, *canteen, scrape_menu(date, *canteen).await) }) + .filter_map(|(date, canteen, menu)| async move { menu.ok().map(|menu| (date, canteen, menu)) }) + .for_each(f) + .await; +} + +pub fn get_db() -> Result { + Ok(PgPoolOptions::new() + .connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?) +} + +#[tracing::instrument(skip(db))] +pub async fn add_meal_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, dish: &Dish) -> Result<()> { + let vegan = dish.is_vegan(); + + sqlx::query!( + "INSERT INTO meals (date,canteen,name,dish_type,image_src,price_students,price_employees,price_guests,vegan,vegetarian) + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10) + ON CONFLICT (date,canteen,name) DO NOTHING", + date, canteen.get_identifier(), dish.get_name(), + dish.get_type().to_string(), dish.get_image_src(), + price_to_bigdecimal(dish.get_price_students()), + price_to_bigdecimal(dish.get_price_employees()), + price_to_bigdecimal(dish.get_price_guests()), + vegan, vegan || dish.is_vegetarian() + ).execute(db).await.inspect_err(|e| { + tracing::error!("error during database insert: {}", e); + })?; + + tracing::trace!("Insert to DB successfull"); + + Ok(()) +} + +pub async fn add_menu_to_db(db: &PgPool, date: &NaiveDate, canteen: Canteen, menu: Vec) { + futures::stream::iter(menu) + .for_each(|dish| async move { + if !dish.get_name().is_empty() { + add_meal_to_db(db, date, canteen, &dish).await.ok(); + } + }) + .await; +} + +pub fn price_to_bigdecimal(s: Option<&str>) -> BigDecimal { + s.and_then(|p| p.trim_end_matches(" €").replace(',', ".").parse().ok()) + .unwrap_or_else(|| BigDecimal::new(BigInt::from(99999), 2)) +} diff --git a/web-api/.dockerignore b/web-api/.dockerignore new file mode 100644 index 0000000..3dfba38 --- /dev/null +++ b/web-api/.dockerignore @@ -0,0 +1,32 @@ +# Include any files or directories that you don't want to be copied to your +# container here (e.g., local build artifacts, temporary files, etc.). +# +# For more help, visit the .dockerignore file reference guide at +# https://docs.docker.com/engine/reference/builder/#dockerignore-file + +**/.DS_Store +**/.classpath +**/.dockerignore +**/.env +**/.git +**/.gitignore +**/.project +**/.settings +**/.toolstarget +**/.vs +**/.vscode +**/*.*proj.user +**/*.dbmdl +**/*.jfm +**/charts +**/docker-compose* +**/compose* +**/Dockerfile* +**/node_modules +**/npm-debug.log +**/secrets.dev.yaml +**/values.dev.yaml +/bin +/target +LICENSE +README.md diff --git a/web-api/Cargo.toml b/web-api/Cargo.toml new file mode 100644 index 0000000..7403c28 --- /dev/null +++ b/web-api/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "mensa-upb-api" +description = "A web api for a local database of the canteens of the University of Paderborn" +license.workspace = true +authors.workspace = true +repository.workspace = true +readme.workspace = true +version = "0.2.0" +edition = "2021" +publish = false + +[dependencies] +actix-cors = "0.7.0" +actix-governor = { version = "0.7.0", features = ["log"] } +actix-web = "4.9.0" +anyhow = { workspace = true } +bigdecimal = { version = "0.4.6", features = ["serde"] } +chrono = { workspace = true, features = ["serde"] } +dotenvy = { workspace = true } +itertools = { workspace = true } +serde = { version = "1.0.215", features = ["derive"] } +serde_json = "1.0.133" +sqlx = { workspace = true, features = ["runtime-tokio-rustls", "postgres", "migrate", "chrono", "uuid", "bigdecimal"] } +strum = { workspace = true, features = ["derive"] } +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } +tracing = "0.1.40" +tracing-subscriber = { workspace = true, features = ["env-filter"] } diff --git a/web-api/Dockerfile b/web-api/Dockerfile new file mode 100644 index 0000000..561d261 --- /dev/null +++ b/web-api/Dockerfile @@ -0,0 +1,36 @@ + +FROM rust:latest AS chef +RUN cargo install cargo-chef +WORKDIR /app + +FROM chef AS planner +COPY . . +RUN OFFLINE=true cargo chef prepare --bin mensa-upb-api --recipe-path recipe.json + +FROM chef AS builder +COPY --from=planner /app/recipe.json recipe.json +RUN cargo chef cook --bin mensa-upb-api --release --recipe-path recipe.json +COPY . . +RUN OFFLINE=true cargo build --bin mensa-upb-api --release + +FROM debian:bookworm-slim AS runtime + +ARG UID=10001 +RUN adduser \ + --disabled-password \ + --gecos "" \ + --home "/nonexistent" \ + --shell "/sbin/nologin" \ + --no-create-home \ + --uid "${UID}" \ + appuser +USER appuser + +COPY --from=builder /app/target/release/mensa-upb-api /bin/mensa-upb-api + +ENV API_INTERFACE=0.0.0.0 + +EXPOSE 8080 + +# What the container should run when it is started. +CMD ["/bin/mensa-upb-api"] \ No newline at end of file diff --git a/web-api/compose.yml b/web-api/compose.yml new file mode 100644 index 0000000..f746639 --- /dev/null +++ b/web-api/compose.yml @@ -0,0 +1,27 @@ +services: + api: + build: . + image: mensa-upb-api:latest + ports: + - 8080:8080 + environment: + - DATABASE_URL=postgres://pguser:pgpass@postgres-mensa-upb-api/postgres + - "RUST_LOG=none,mensa_upb_api=info" + - TZ=Europe/Berlin + depends_on: + - postgres + + postgres: + container_name: postgres-mensa-upb-api + image: postgres:17-alpine + environment: + - POSTGRES_USER=pguser + - POSTGRES_PASSWORD=pgpass + - POSTGRES_DB=postgres + volumes: + - db:/var/lib/postgresql/data + +volumes: + db: + + diff --git a/src/canteen.rs b/web-api/src/canteen.rs similarity index 100% rename from src/canteen.rs rename to web-api/src/canteen.rs diff --git a/src/dish.rs b/web-api/src/dish.rs similarity index 100% rename from src/dish.rs rename to web-api/src/dish.rs diff --git a/src/endpoints/menu.rs b/web-api/src/endpoints/menu.rs similarity index 100% rename from src/endpoints/menu.rs rename to web-api/src/endpoints/menu.rs diff --git a/src/endpoints/mod.rs b/web-api/src/endpoints/mod.rs similarity index 100% rename from src/endpoints/mod.rs rename to web-api/src/endpoints/mod.rs diff --git a/src/lib.rs b/web-api/src/lib.rs similarity index 100% rename from src/lib.rs rename to web-api/src/lib.rs diff --git a/src/main.rs b/web-api/src/main.rs similarity index 97% rename from src/main.rs rename to web-api/src/main.rs index bb7b873..ddbab76 100644 --- a/src/main.rs +++ b/web-api/src/main.rs @@ -27,6 +27,8 @@ async fn main() -> Result<()> { let db = PgPoolOptions::new() .connect_lazy(&env::var("DATABASE_URL").expect("missing DATABASE_URL env variable"))?; + sqlx::migrate!("../migrations").run(&db).await?; + let interface = env::var("API_INTERFACE").unwrap_or("127.0.0.1".to_string()); let port = env::var("API_PORT") .ok() diff --git a/src/menu.rs b/web-api/src/menu.rs similarity index 100% rename from src/menu.rs rename to web-api/src/menu.rs