Initial commit
This commit is contained in:
commit
8f112c1bb5
69 changed files with 2068 additions and 0 deletions
179
bot/.dockerignore
Normal file
179
bot/.dockerignore
Normal file
|
@ -0,0 +1,179 @@
|
|||
# Created by https://www.toptal.com/developers/gitignore/api/python
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=python
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
### Python Patch ###
|
||||
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
||||
poetry.toml
|
||||
|
||||
# ruff
|
||||
.ruff_cache/
|
||||
|
||||
# LSP config files
|
||||
pyrightconfig.json
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/python
|
||||
|
||||
# Ignore Telethon session files
|
||||
*.session
|
13
bot/Dockerfile
Normal file
13
bot/Dockerfile
Normal file
|
@ -0,0 +1,13 @@
|
|||
FROM python:3.11-alpine
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
|
||||
COPY requirements.txt /
|
||||
RUN pip3 install --no-cache-dir -r /requirements.txt
|
||||
RUN rm /requirements.txt
|
||||
|
||||
COPY . /app
|
||||
WORKDIR /app
|
||||
|
||||
CMD ["python3", "bot_listen.py"]
|
29
bot/README.md
Normal file
29
bot/README.md
Normal file
|
@ -0,0 +1,29 @@
|
|||
# caFICtería-aaS (bot)
|
||||
|
||||
This is the bot that actually scrapes the menus from Telegram and puts them in a JSON file.
|
||||
|
||||
## ⚙️ Usage
|
||||
|
||||
- `get_history.py` will download all messages on the channel and parse them (2018-now). Should be run when you install everything (I guess you can skip this if you don't love data 😔).
|
||||
- This will also include the menus sent in photos during a few days in July 2022 (manually parsed in `menu_photos_history.json`).
|
||||
- `bot_listen.py` will listen for new messages on the channel, and parse them when received. This is what should be running to fetch the messages received after the initial run of `get_history.py`.
|
||||
|
||||
## 🔧 Environment Variables
|
||||
|
||||
| Name | Required | Description |
|
||||
|--------------------------|----------|-------------|
|
||||
| `TG_API_ID` | YES | Your telegram API Id |
|
||||
| `TG_API_HASH` | YES | Your telegram API Hash |
|
||||
| `TG_SESSION_NAME` | NO | The telethon session name/path _(Default: "default")_ |
|
||||
| `TG_CHANNEL_NAME` | NO | The channel where the menu info is stored _(Default: "CafeteriaFIC")_ |
|
||||
| `MENU_HISTORY_FILE_PATH` | NO | The place where the JSON file with the menu history will be stored. _(Default: "/tmp/menu_history.json")_ |
|
||||
|
||||
_**Tip💡:**_ Look at [Telethon documentation](https://docs.telethon.dev/en/stable/basic/signing-in.html#signing-in) for info about the telegram authentication process
|
||||
|
||||
## 🐳 Building the Docker image
|
||||
|
||||
```bash
|
||||
git clone https://git.peprolinbot.com/peprolinbot/caFICteria-aaS.git
|
||||
cd caFICteria-aaS/bot
|
||||
docker build -t caficteria-bot .
|
||||
```
|
32
bot/bot_listen.py
Normal file
32
bot/bot_listen.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
from telethon import TelegramClient, events
|
||||
import config
|
||||
from utils import parse_menu_message, InvalidMenuMessageError
|
||||
|
||||
|
||||
client = TelegramClient(config.session_name, config.api_id, config.api_hash)
|
||||
|
||||
|
||||
@client.on(events.NewMessage(chats=config.channel_name))
|
||||
async def handler(event):
|
||||
msg = event.message
|
||||
if not msg.message is None:
|
||||
try:
|
||||
courses = parse_menu_message(msg.message)
|
||||
except InvalidMenuMessageError as e:
|
||||
print(e)
|
||||
return
|
||||
with open(config.menu_history_file_path, "rw", encoding='utf-8') as f:
|
||||
menus = json.load(f)
|
||||
menus[msg.date.strftime("%d-%m-%Y")] = {"courses": courses,
|
||||
"message": msg.message}
|
||||
json.dump(menus, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
async def main():
|
||||
await client.start()
|
||||
await client.run_until_disconnected()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import asyncio
|
||||
asyncio.run(main())
|
9
bot/config.py
Normal file
9
bot/config.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from os import getenv
|
||||
|
||||
api_id = getenv("TG_API_ID")
|
||||
api_hash = getenv("TG_API_HASH")
|
||||
session_name = getenv("TG_SESSION_NAME", "default")
|
||||
channel_name = getenv("TG_CHANNEL_NAME", "CafeteriaFIC")
|
||||
|
||||
menu_history_file_path = getenv(
|
||||
"MENU_HISTORY_FILE_PATH", "/tmp/menu_history.json")
|
48
bot/get_history.py
Normal file
48
bot/get_history.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
from telethon import TelegramClient
|
||||
import asyncio
|
||||
import json
|
||||
|
||||
from utils import parse_menu_message, InvalidMenuMessageError
|
||||
import config
|
||||
|
||||
|
||||
async def get_message_history(chat_name, limit=None):
|
||||
async with TelegramClient(config.session_name, config.api_id, config.api_hash) as client:
|
||||
chat_info = await client.get_entity(chat_name)
|
||||
|
||||
messages = await client.get_messages(entity=chat_info, limit=limit)
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def parse_message_history(chat_name, limit=None):
|
||||
messages = asyncio.run(get_message_history(
|
||||
chat_name=chat_name, limit=limit))
|
||||
|
||||
menus = {}
|
||||
for msg in messages:
|
||||
# Ignore messages without text conent and info messages (these usually include ! symbol)
|
||||
if msg.message is None or "!" in msg.message:
|
||||
continue
|
||||
|
||||
try:
|
||||
courses = parse_menu_message(msg.message)
|
||||
except InvalidMenuMessageError as e:
|
||||
print(e)
|
||||
continue
|
||||
menus[msg.date.strftime("%d-%m-%Y")] = {"courses": courses,
|
||||
"message": msg.message}
|
||||
|
||||
return menus
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
menus = parse_message_history(config.channel_name)
|
||||
|
||||
# We manually include menus in photos from July 2022
|
||||
with open("menu_photos_history.json", 'r') as file:
|
||||
data = json.load(file)
|
||||
menus.update(data)
|
||||
|
||||
with open(config.menu_history_file_path, "w", encoding='utf-8') as f:
|
||||
json.dump(menus, f, ensure_ascii=False, indent=4)
|
98
bot/menu_photos_history.json
Normal file
98
bot/menu_photos_history.json
Normal file
|
@ -0,0 +1,98 @@
|
|||
{
|
||||
"14-07-2022": [
|
||||
[
|
||||
"Espaguetis boloñesa",
|
||||
"Ensaladilla de gambas",
|
||||
"Ensalada mixta"
|
||||
],
|
||||
[
|
||||
"Lirios fritos",
|
||||
"Codillo asado"
|
||||
]
|
||||
],
|
||||
"15-07-2022": [
|
||||
[
|
||||
"Tortilla",
|
||||
"Pastel de york y queso",
|
||||
"Espaguetis con salsa de tomate y atún",
|
||||
"Gazpacho"
|
||||
],
|
||||
[
|
||||
"Varitas de merluza",
|
||||
"Merluza a la plancha",
|
||||
"Raxo de cerdo"
|
||||
]
|
||||
],
|
||||
"18-07-2022": [
|
||||
[
|
||||
"Ensalada de pasta",
|
||||
"Croquetas"
|
||||
],
|
||||
[
|
||||
"Lasaña",
|
||||
"Filete de caballa a la plancha"
|
||||
]
|
||||
],
|
||||
"19-07-2022": [
|
||||
[
|
||||
"Champiñones con jamón",
|
||||
"Raviolis con salsa de tomate casera"
|
||||
],
|
||||
[
|
||||
"Gallo a la plancha",
|
||||
"Milanesa de pollo"
|
||||
]
|
||||
],
|
||||
"20-07-2022": [
|
||||
[
|
||||
"Ensalada de pasta",
|
||||
"Crema de zanahoria"
|
||||
],
|
||||
[
|
||||
"Raxo de pollo",
|
||||
"Arroz con bacalao"
|
||||
]
|
||||
],
|
||||
"21-07-2022": [
|
||||
[
|
||||
"Melón con jamón",
|
||||
"Espaguetis a la arrabbiata"
|
||||
],
|
||||
[
|
||||
"Hamburguesa de ternera con patatas fritas",
|
||||
"Raya a la gallega"
|
||||
]
|
||||
],
|
||||
"26-07-2022": [
|
||||
[
|
||||
"Ensaladilla rusa",
|
||||
"Judías con chorizo",
|
||||
"Huevos rellenos"
|
||||
],
|
||||
[
|
||||
"Rabas de calamar",
|
||||
"Codillo asado"
|
||||
]
|
||||
],
|
||||
"27-07-2022": [
|
||||
[
|
||||
"Espinacas salteadas",
|
||||
"Raviolis con salsa de tomate casera"
|
||||
],
|
||||
[
|
||||
"Rosada al horno",
|
||||
"Albóndigas con arroz"
|
||||
]
|
||||
],
|
||||
"28-07-2022": [
|
||||
[
|
||||
"Champiñones con jamón",
|
||||
"Raviolis con salsa de tomate casera",
|
||||
"Melón con jamón"
|
||||
],
|
||||
[
|
||||
"Merluza a la plancha",
|
||||
"Chuletas de cerdo"
|
||||
]
|
||||
]
|
||||
}
|
1
bot/requirements.txt
Normal file
1
bot/requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
Telethon==1.38.1
|
97
bot/utils.py
Normal file
97
bot/utils.py
Normal file
|
@ -0,0 +1,97 @@
|
|||
import re
|
||||
|
||||
|
||||
class InvalidMenuMessageError(Exception):
|
||||
"""
|
||||
Custom exception raised when a menu can not be parsed correctly.
|
||||
Usually happens when trying to parse a random message like "Estamos de vuelta!!!")
|
||||
"""
|
||||
|
||||
def __init__(self, message):
|
||||
self.message = f"Invalid message while parsing into a menu, message: '{message}'"
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
def parse_menu_message(message: str) -> list[list]:
|
||||
"""
|
||||
Using `menu=parse_menu_message(my_message)`, `menu[0]` are the options for the first course (`menu[0]=["Pasta a la boloñesa", "Ensaladilla rusa"]` for example). `menu[1]` is the same but for the second course
|
||||
|
||||
:param message: Will receive a message's content as sent in the telegram channel.
|
||||
|
||||
:returns: A list of lists with the options for each course.
|
||||
"""
|
||||
|
||||
message = re.sub(r'\n\s*\n+', '\n\n', message)
|
||||
lines = message.splitlines()
|
||||
|
||||
if lines == []:
|
||||
raise InvalidMenuMessageError(message)
|
||||
|
||||
first_line = lines[0].lower()
|
||||
first_line_is_header = "menu" in first_line or "menú" in first_line
|
||||
if first_line_is_header:
|
||||
lines.pop(0)
|
||||
# Remove the newline after the header if it exists
|
||||
if not lines[0].strip():
|
||||
lines.pop(0)
|
||||
|
||||
# Initialize this to none in case we don't get it
|
||||
course_separator_index = None
|
||||
|
||||
# Check if course separator is a blank line
|
||||
for i, line in enumerate(lines):
|
||||
if not line.strip():
|
||||
course_separator_index = i
|
||||
break
|
||||
|
||||
if course_separator_index is None:
|
||||
# Old menus use - for first course and > for second course, or otherwise
|
||||
second_course_char = '-' if lines[0][0] == '>' else '>' if lines[0][0] == '-' else None
|
||||
if not second_course_char is None:
|
||||
for i, line in enumerate(lines):
|
||||
if line[0] == second_course_char:
|
||||
course_separator_index = i
|
||||
break
|
||||
|
||||
if course_separator_index is None:
|
||||
raise InvalidMenuMessageError(message)
|
||||
|
||||
# Some messages have "TAMBIEN PARA LLEVAR" at the end of them
|
||||
last_line = lines[-1].lower()
|
||||
if "llevar" in last_line or "preguntar" in last_line:
|
||||
lines.pop()
|
||||
|
||||
def fix_line(line):
|
||||
# Remove first character if it is not a letter (- and > are common in 2023). Leading whitespace might appear, will be deleted later
|
||||
line = re.sub(r'^[^\w]+', '', line)
|
||||
|
||||
# Strips the line (leading and trailing whitespaces on menu items are common)
|
||||
line = line.strip()
|
||||
|
||||
# Replace occurrences of "c/" with "con " (if extra space was added it will be removed after)
|
||||
line = line.replace("c/", "con ")
|
||||
|
||||
# Remove extra whitespaces in the middle of the string
|
||||
line = ' '.join(line.split())
|
||||
|
||||
# Remove trailing dot
|
||||
if line[-1] == '.':
|
||||
line = line[:-1]
|
||||
|
||||
# Capitalize
|
||||
line = line.capitalize()
|
||||
|
||||
return line
|
||||
|
||||
lines = [fix_line(line) for line in lines if line.strip()]
|
||||
|
||||
# First two lines are first course, and second two ones are second course
|
||||
first_course = lines[:course_separator_index]
|
||||
second_course = lines[course_separator_index:]
|
||||
|
||||
if len(first_course) <= 1 or len(second_course) <= 1:
|
||||
raise InvalidMenuMessageError(message)
|
||||
|
||||
courses = [first_course, second_course]
|
||||
|
||||
return courses
|
Loading…
Add table
Add a link
Reference in a new issue