Skip to content

Commit

Permalink
Merge pull request #3280 from vitoller/feature/import-gourmet
Browse files Browse the repository at this point in the history
import of gourmet files
  • Loading branch information
vabene1111 authored Nov 11, 2024
2 parents 1cc5a0a + 15abe9f commit faa3c99
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 62 deletions.
3 changes: 2 additions & 1 deletion cookbook/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,13 @@ class ImportExportBase(forms.Form):
COOKMATE = 'COOKMATE'
REZEPTSUITEDE = 'REZEPTSUITEDE'
PDF = 'PDF'
GOURMET = 'GOURMET'

type = forms.ChoiceField(choices=((DEFAULT, _('Default')), (PAPRIKA, 'Paprika'), (NEXTCLOUD, 'Nextcloud Cookbook'), (MEALIE, 'Mealie'), (CHOWDOWN, 'Chowdown'),
(SAFFRON, 'Saffron'), (CHEFTAP, 'ChefTap'), (PEPPERPLATE, 'Pepperplate'), (RECETTETEK, 'RecetteTek'), (RECIPESAGE, 'Recipe Sage'),
(DOMESTICA, 'Domestica'), (MEALMASTER, 'MealMaster'), (REZKONV, 'RezKonv'), (OPENEATS, 'Openeats'), (RECIPEKEEPER, 'Recipe Keeper'),
(PLANTOEAT, 'Plantoeat'), (COOKBOOKAPP, 'CookBookApp'), (COPYMETHAT, 'CopyMeThat'), (PDF, 'PDF'), (MELARECIPES, 'Melarecipes'),
(COOKMATE, 'Cookmate'), (REZEPTSUITEDE, 'Recipesuite.de')))
(COOKMATE, 'Cookmate'), (REZEPTSUITEDE, 'Recipesuite.de'), (GOURMET, 'Gourmet')))


class MultipleFileInput(forms.ClearableFileInput):
Expand Down
211 changes: 211 additions & 0 deletions cookbook/integration/gourmet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
import base64
from io import BytesIO
from lxml import etree
import requests
from pathlib import Path

from bs4 import BeautifulSoup, Tag

from cookbook.helper.HelperFunctions import validate_import_url
from cookbook.helper.ingredient_parser import IngredientParser
from cookbook.helper.recipe_url_import import parse_servings, parse_servings_text, parse_time, iso_duration_to_minutes
from cookbook.integration.integration import Integration
from cookbook.models import Ingredient, Recipe, Step, Keyword
from recipe_scrapers import scrape_html


class Gourmet(Integration):

def split_recipe_file(self, file):
encoding = 'utf-8'
byte_string = file.read()
text_obj = byte_string.decode(encoding, errors="ignore")
soup = BeautifulSoup(text_obj, "html.parser")
return soup.find_all("div", {"class": "recipe"})

def get_ingredients_recursive(self, step, ingredients, ingredient_parser):
if isinstance(ingredients, Tag):
for ingredient in ingredients.children:
if not isinstance(ingredient, Tag):
continue

if ingredient.name in ["li"]:
step_name = "".join(ingredient.findAll(text=True, recursive=False)).strip().rstrip(":")

step.ingredients.add(Ingredient.objects.create(
is_header=True,
note=step_name[:256],
original_text=step_name,
space=self.request.space,
))
next_ingrediets = ingredient.find("ul", {"class": "ing"})
self.get_ingredients_recursive(step, next_ingrediets, ingredient_parser)

else:
try:
amount, unit, food, note = ingredient_parser.parse(ingredient.text.strip())
f = ingredient_parser.get_food(food)
u = ingredient_parser.get_unit(unit)
step.ingredients.add(
Ingredient.objects.create(
food=f,
unit=u,
amount=amount,
note=note,
original_text=ingredient.text.strip(),
space=self.request.space,
)
)
except ValueError:
pass

def get_recipe_from_file(self, file):
# 'file' comes is as a beautifulsoup object

source_url = None
for item in file.find_all('a'):
if item.has_attr('href'):
source_url = item.get("href")
break

name = file.find("p", {"class": "title"}).find("span", {"itemprop": "name"}).text.strip()

recipe = Recipe.objects.create(
name=name[:128],
source_url=source_url,
created_by=self.request.user,
internal=True,
space=self.request.space,
)

for category in file.find_all("span", {"itemprop": "recipeCategory"}):
keyword, created = Keyword.objects.get_or_create(name=category.text, space=self.request.space)
recipe.keywords.add(keyword)

try:
recipe.servings = parse_servings(file.find("span", {"itemprop": "recipeYield"}).text.strip())
except AttributeError:
pass

try:
prep_time = file.find("span", {"itemprop": "prepTime"}).text.strip().split()
prep_time[0] = prep_time[0].replace(',', '.')
if prep_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']:
prep_time_min = int(float(prep_time[0]) * 60)
elif prep_time[1].lower() in ['tag', 'tage', 'day', 'days']:
prep_time_min = int(float(prep_time[0]) * 60 * 24)
else:
prep_time_min = int(prep_time[0])
recipe.waiting_time = prep_time_min
except AttributeError:
pass

try:
cook_time = file.find("span", {"itemprop": "cookTime"}).text.strip().split()
cook_time[0] = cook_time[0].replace(',', '.')
if cook_time[1].lower() in ['stunde', 'stunden', 'hour', 'hours']:
cook_time_min = int(float(cook_time[0]) * 60)
elif cook_time[1].lower() in ['tag', 'tage', 'day', 'days']:
cook_time_min = int(float(cook_time[0]) * 60 * 24)
else:
cook_time_min = int(cook_time[0])

recipe.working_time = cook_time_min
except AttributeError:
pass

for cuisine in file.find_all('span', {'itemprop': 'recipeCuisine'}):
cuisine_name = cuisine.text
keyword = Keyword.objects.get_or_create(space=self.request.space, name=cuisine_name)
if len(keyword):
recipe.keywords.add(keyword[0])

for category in file.find_all('span', {'itemprop': 'recipeCategory'}):
category_name = category.text
keyword = Keyword.objects.get_or_create(space=self.request.space, name=category_name)
if len(keyword):
recipe.keywords.add(keyword[0])

step = Step.objects.create(
instruction='',
space=self.request.space,
show_ingredients_table=self.request.user.userpreference.show_step_ingredients,
)

ingredient_parser = IngredientParser(self.request, True)

ingredients = file.find("ul", {"class": "ing"})
self.get_ingredients_recursive(step, ingredients, ingredient_parser)

instructions = file.find("div", {"class": "instructions"})
if isinstance(instructions, Tag):
for instruction in instructions.children:
if not isinstance(instruction, Tag) or instruction.text == "":
continue
if instruction.name == "h3":
if step.instruction:
step.save()
recipe.steps.add(step)
step = Step.objects.create(
instruction='',
space=self.request.space,
)

step.name = instruction.text.strip()[:128]
else:
if instruction.name == "div":
for instruction_step in instruction.children:
for br in instruction_step.find_all("br"):
br.replace_with("\n")
step.instruction += instruction_step.text.strip() + ' \n\n'

notes = file.find("div", {"class": "modifications"})
if notes:
for n in notes.children:
if n.text == "":
continue
if n.name == "h3":
step.instruction += f'*{n.text.strip()}:* \n\n'
else:
for br in n.find_all("br"):
br.replace_with("\n")

step.instruction += '*' + n.text.strip() + '* \n\n'

description = ''
try:
description = file.find("div", {"id": "description"}).text.strip()
except AttributeError:
pass
if len(description) <= 512:
recipe.description = description
else:
recipe.description = description[:480] + ' ... (full description below)'
step.instruction += '*Description:* \n\n*' + description + '* \n\n'

step.save()
recipe.steps.add(step)

# import the Primary recipe image that is stored in the Zip
try:
image_path = file.find("img").get("src")
image_filename = image_path.split("\\")[1]

for f in self.import_zip.filelist:
zip_file_name = Path(f.filename).name
if image_filename == zip_file_name:
image_file = self.import_zip.read(f)
image_bytes = BytesIO(image_file)
self.import_recipe_image(recipe, image_bytes, filetype='.jpeg')
break
except Exception as e:
print(recipe.name, ': failed to import image ', str(e))

recipe.save()
return recipe

def get_files_from_recipes(self, recipes, el, cookie):
raise NotImplementedError('Method not implemented in storage integration')

def get_file_from_recipe(self, recipe):
raise NotImplementedError('Method not implemented in storage integration')
13 changes: 13 additions & 0 deletions cookbook/integration/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,19 @@ def do_import(self, files, il, import_duplicates):
il.total_recipes = len(new_file_list)
file_list = new_file_list

if isinstance(self, cookbook.integration.gourmet.Gourmet):
self.import_zip = import_zip
new_file_list = []
for file in file_list:
if file.file_size == 0:
next
if file.filename.startswith("index.htm"):
next
if file.filename.endswith(".htm"):
new_file_list += self.split_recipe_file(BytesIO(import_zip.read(file.filename)))
il.total_recipes = len(new_file_list)
file_list = new_file_list

for z in file_list:
try:
if not hasattr(z, 'filename') or isinstance(z, Tag):
Expand Down
3 changes: 3 additions & 0 deletions cookbook/views/import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from cookbook.integration.rezeptsuitede import Rezeptsuitede
from cookbook.integration.rezkonv import RezKonv
from cookbook.integration.saffron import Saffron
from cookbook.integration.gourmet import Gourmet
from cookbook.models import ExportLog, Recipe
from recipes import settings

Expand Down Expand Up @@ -80,6 +81,8 @@ def get_integration(request, export_type):
return Cookmate(request, export_type)
if export_type == ImportExportBase.REZEPTSUITEDE:
return Rezeptsuitede(request, export_type)
if export_type == ImportExportBase.GOURMET:
return Gourmet(request, export_type)


@group_required('user')
Expand Down
Loading

0 comments on commit faa3c99

Please sign in to comment.