import re class InvalidMenuMessageError(Exception): """ Custom exception raised when a menu can not be parsed correctly. Usually happens when trying to parse a random message like "Estamos de vuelta!!!") """ def __init__(self, message): self.message = f"Invalid message while parsing into a menu, message: '{message}'" super().__init__(self.message) def parse_menu_message(message: str) -> list[list]: """ Using `menu=parse_menu_message(my_message)`, `menu[0]` are the options for the first course (`menu[0]=["Pasta a la boloñesa", "Ensaladilla rusa"]` for example). `menu[1]` is the same but for the second course :param message: Will receive a message's content as sent in the telegram channel. :returns: A list of lists with the options for each course. """ message = re.sub(r'\n\s*\n+', '\n\n', message) lines = message.splitlines() if lines == []: raise InvalidMenuMessageError(message) first_line = lines[0].lower() first_line_is_header = "menu" in first_line or "menú" in first_line if first_line_is_header: lines.pop(0) # Remove the newline after the header if it exists if not lines[0].strip(): lines.pop(0) # Initialize this to none in case we don't get it course_separator_index = None # Check if course separator is a blank line for i, line in enumerate(lines): if not line.strip(): course_separator_index = i break if course_separator_index is None: # Old menus use - for first course and > for second course, or otherwise second_course_char = '-' if lines[0][0] == '>' else '>' if lines[0][0] == '-' else None if not second_course_char is None: for i, line in enumerate(lines): if line[0] == second_course_char: course_separator_index = i break if course_separator_index is None: raise InvalidMenuMessageError(message) # Some messages have "TAMBIEN PARA LLEVAR" at the end of them last_line = lines[-1].lower() if "llevar" in last_line or "preguntar" in last_line: lines.pop() def fix_line(line): # Remove first character if it is not a letter (- and > are common in 2023). Leading whitespace might appear, will be deleted later line = re.sub(r'^[^\w]+', '', line) # Strips the line (leading and trailing whitespaces on menu items are common) line = line.strip() # Replace occurrences of "c/" with "con " (if extra space was added it will be removed after) line = line.replace("c/", "con ") # Remove extra whitespaces in the middle of the string line = ' '.join(line.split()) # Remove trailing dot if line[-1] == '.': line = line[:-1] # Capitalize line = line.capitalize() return line lines = [fix_line(line) for line in lines if line.strip()] # First two lines are first course, and second two ones are second course first_course = lines[:course_separator_index] second_course = lines[course_separator_index:] if len(first_course) <= 1 or len(second_course) <= 1: raise InvalidMenuMessageError(message) courses = [first_course, second_course] return courses