Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatible with recipe_scrapers v15 #3109

Merged
merged 3 commits into from
Apr 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 33 additions & 30 deletions cookbook/helper/recipe_url_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,9 @@


def get_from_scraper(scrape, request):
# converting the scrape_me object to the existing json format based on ld+json
# converting the scrape_html object to the existing json format based on ld+json

recipe_json = {
'steps': [],
'internal': True
}
recipe_json = {'steps': [], 'internal': True}
keywords = []

# assign source URL
Expand Down Expand Up @@ -157,11 +154,18 @@ def get_from_scraper(scrape, request):
# assign steps
try:
for i in parse_instructions(scrape.instructions()):
recipe_json['steps'].append({'instruction': i, 'ingredients': [], 'show_ingredients_table': request.user.userpreference.show_step_ingredients, })
recipe_json['steps'].append({
'instruction': i,
'ingredients': [],
'show_ingredients_table': request.user.userpreference.show_step_ingredients,
})
except Exception:
pass
if len(recipe_json['steps']) == 0:
recipe_json['steps'].append({'instruction': '', 'ingredients': [], })
recipe_json['steps'].append({
'instruction': '',
'ingredients': [],
})

recipe_json['description'] = recipe_json['description'][:512]
if len(recipe_json['description']) > 256: # split at 256 as long descriptions don't look good on recipe cards
Expand All @@ -182,20 +186,20 @@ def get_from_scraper(scrape, request):
'original_text': x
}
if unit:
ingredient['unit'] = {'name': unit, }
ingredient['unit'] = {
'name': unit,
}
recipe_json['steps'][0]['ingredients'].append(ingredient)
except Exception:
recipe_json['steps'][0]['ingredients'].append(
{
'amount': 0,
'unit': None,
'food': {
'name': x,
},
'note': '',
'original_text': x
}
)
recipe_json['steps'][0]['ingredients'].append({
'amount': 0,
'unit': None,
'food': {
'name': x,
},
'note': '',
'original_text': x
})
except Exception:
pass

Expand Down Expand Up @@ -248,14 +252,16 @@ def get_from_youtube_scraper(url, request):
'working_time': 0,
'waiting_time': 0,
'image': "",
'keywords': [{'name': kw.name, 'label': kw.name, 'id': kw.pk}],
'keywords': [{
'name': kw.name,
'label': kw.name,
'id': kw.pk
}],
'source_url': url,
'steps': [
{
'ingredients': [],
'instruction': ''
}
]
'steps': [{
'ingredients': [],
'instruction': ''
}]
}

try:
Expand Down Expand Up @@ -452,10 +458,7 @@ def normalize_string(string):


def iso_duration_to_minutes(string):
match = re.match(
r'P((?P<years>\d+)Y)?((?P<months>\d+)M)?((?P<weeks>\d+)W)?((?P<days>\d+)D)?T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+)S)?',
string
).groupdict()
match = re.match(r'P((?P<years>\d+)Y)?((?P<months>\d+)M)?((?P<weeks>\d+)W)?((?P<days>\d+)D)?T((?P<hours>\d+)H)?((?P<minutes>\d+)M)?((?P<seconds>\d+)S)?', string).groupdict()
return int(match['days'] or 0) * 24 * 60 + int(match['hours'] or 0) * 60 + int(match['minutes'] or 0)


Expand Down
2 changes: 1 addition & 1 deletion cookbook/helper/scrapers/scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def __init__(
html=None,
url=None,
):
self.wild_mode = False
self.supported_only = False
self.meta_http_equiv = False
self.soup = BeautifulSoup(html, "html.parser")
self.url = url
Expand Down
46 changes: 26 additions & 20 deletions cookbook/tests/other/test_automations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from django.contrib import auth
from django.test import RequestFactory
from django_scopes import scope
from recipe_scrapers import scrape_html

from cookbook.helper.automation_helper import AutomationEngine
from cookbook.helper.recipe_url_import import get_from_scraper
from cookbook.helper.scrapers.scrapers import text_scraper
from cookbook.models import Automation

DATA_DIR = "cookbook/tests/other/test_data/"
Expand Down Expand Up @@ -73,12 +73,14 @@ def test_unit_automation(u1_s1, arg):
assert (automation.apply_unit_automation(arg[0]) == target_name) is True


@pytest.mark.parametrize("arg", [
[[1, 'egg', 'white'], '', [1, '', 'egg', 'white']],
[[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']],
[[1, 'êgg', 'white'], '', [1, 'êgg', 'white']],
[[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']],
])
@pytest.mark.parametrize(
"arg", [
[[1, 'egg', 'white'], '', [1, '', 'egg', 'white']],
[[1, 'Egg', 'white'], '', [1, '', 'Egg', 'white']],
[[1, 'êgg', 'white'], '', [1, 'êgg', 'white']],
[[1, 'egg', 'white'], 'whole', [1, 'whole', 'egg', 'white']],
]
)
def test_never_unit_automation(u1_s1, arg):
user = auth.get_user(u1_s1)
space = user.userspace_set.first().space
Expand All @@ -97,13 +99,15 @@ def test_never_unit_automation(u1_s1, arg):
['.*allrecipes.*', True],
['.*google.*', False],
])
@pytest.mark.parametrize("arg", [
[Automation.DESCRIPTION_REPLACE],
[Automation.INSTRUCTION_REPLACE],
[Automation.NAME_REPLACE],
[Automation.FOOD_REPLACE],
[Automation.UNIT_REPLACE],
])
@pytest.mark.parametrize(
"arg", [
[Automation.DESCRIPTION_REPLACE],
[Automation.INSTRUCTION_REPLACE],
[Automation.NAME_REPLACE],
[Automation.FOOD_REPLACE],
[Automation.UNIT_REPLACE],
]
)
def test_regex_automation(u1_s1, arg, source):
user = auth.get_user(u1_s1)
space = user.userspace_set.first().space
Expand All @@ -124,11 +128,13 @@ def test_regex_automation(u1_s1, arg, source):
assert (automation.apply_regex_replace_automation(fail, arg[0]) == target) == False


@pytest.mark.parametrize("arg", [
['second first', 'first second'],
['longer string second first longer string', 'longer string first second longer string'],
['second fails first', 'second fails first'],
])
@pytest.mark.parametrize(
"arg", [
['second first', 'first second'],
['longer string second first longer string', 'longer string first second longer string'],
['second fails first', 'second fails first'],
]
)
def test_transpose_automation(u1_s1, arg):
user = auth.get_user(u1_s1)
space = user.userspace_set.first().space
Expand Down Expand Up @@ -160,7 +166,7 @@ def test_url_import_regex_replace(u1_s1):
else:
test_file = os.path.join(os.getcwd(), 'cookbook', 'tests', 'other', 'test_data', recipe)
with open(test_file, 'r', encoding='UTF-8') as d:
scrape = text_scraper(text=d.read(), url="https://www.allrecipes.com")
scrape = scrape_html(html=d.read(), org_url="https://testrecipe.test", supported_only=False)
with scope(space=space):
for t in types:
Automation.objects.get_or_create(name=t, type=t, param_1='.*', param_2=find_text, param_3='', created_by=user, space=space)
Expand Down
6 changes: 3 additions & 3 deletions cookbook/views/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from django_scopes import scopes_disabled
from icalendar import Calendar, Event
from oauth2_provider.models import AccessToken
from recipe_scrapers import scrape_me
from recipe_scrapers import scrape_html
from recipe_scrapers._exceptions import NoSchemaFoundInWildMode
from requests.exceptions import MissingSchema
from rest_framework import decorators, status, viewsets
Expand Down Expand Up @@ -1437,8 +1437,8 @@ def post(self, request, *args, **kwargs):
else:
try:
if validators.url(url, public=True):
scrape = scrape_me(url_path=url, wild_mode=True)

html = requests.get(url).content
scrape = scrape_html(org_url=url, html=html, supported_only=False)
else:
return Response({'error': True, 'msg': _('Invalid Url')}, status=status.HTTP_400_BAD_REQUEST)
except NoSchemaFoundInWildMode:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Jinja2==3.1.3
django-webpack-loader==3.0.1
git+https://github.com/BITSOLVER/django-js-reverse@071e304fd600107bc64bbde6f2491f1fe049ec82
django-allauth==0.61.1
recipe-scrapers==14.53.0
recipe-scrapers==15.0.0-rc2
django-scopes==2.0.0
django-treebeard==4.7
django-cors-headers==4.3.1
Expand Down
Loading