Skip to content

Commit

Permalink
Fix: Tofoo (#1244)
Browse files Browse the repository at this point in the history
  • Loading branch information
jknndy authored Oct 1, 2024
1 parent 18ed031 commit 9c5f8c6
Show file tree
Hide file tree
Showing 5 changed files with 1,134 additions and 1,355 deletions.
72 changes: 40 additions & 32 deletions recipe_scrapers/tofoo.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import re

from ._abstract import AbstractScraper
from ._exceptions import StaticValueException
from ._grouping_utils import group_ingredients
from ._utils import normalize_string
from ._utils import get_minutes


class Tofoo(AbstractScraper):
Expand All @@ -11,52 +12,59 @@ def host(cls):
return "tofoo.co.uk"

def author(self):
return "The Tofoo co."
raise StaticValueException(return_value="The Tofoo co.")

def title(self):
return self.soup.find(
"h1", {"class": "recipe-detail__title h3 blue"}
).get_text()
return self.soup.find("div", {"class": "hero__content"}).find("h1").get_text()

def category(self):
category_text = self.soup.find(
"div", {"class": "recipe-detail__ins h6"}
).get_text()
normalized_category = normalize_string(category_text)
return normalized_category
def _find_hero_stat(self, label):
hero_stats = self.soup.find("ul", {"class": "hero__stats"})
if hero_stats:
for li in hero_stats.find_all("li"):
if re.search(rf"{label}:", li.get_text()):
return li.get_text()
return None

def yields(self):
desc = self.soup.find("div", {"class": "recipe-detail__desc"}).get_text()
match = re.search(r"Serves (\d+)", desc)
if match:
return int(match.group(1))
serves_text = self._find_hero_stat("Serves")
if serves_text:
match = re.search(r"Serves:\s*(\d+)", serves_text)
if match:
return int(match.group(1))
return None

def total_time(self):
desc = self.soup.find("div", {"class": "recipe-detail__desc"}).get_text()

prep_time_match = re.search(r"Prep (\d+) min", desc)
cooking_time_match = re.search(r"Cooking (\d+) min", desc)
def prep_time(self):
prep_text = self._find_hero_stat("Prep")
return get_minutes(prep_text) if prep_text else 0

prep_time = int(prep_time_match.group(1)) if prep_time_match else 0
cooking_time = int(cooking_time_match.group(1)) if cooking_time_match else 0
def cook_time(self):
cook_text = self._find_hero_stat("Cooking")
return get_minutes(cook_text) if cook_text else 0

return prep_time + cooking_time # Return the summed time in minutes
def total_time(self):
return self.prep_time() + self.cook_time()

def ingredients(self):
ingredients_div = self.soup.find("div", {"class": "block-raw-material__body"})
ingredients = [li.get_text() for li in ingredients_div.find_all("li")]
return ingredients
ingredients_div = self.soup.find(
"div", {"class": "recipe_details__ingredients"}
)
return [li.get_text() for li in ingredients_div.find_all("li")]

def ingredient_groups(self):
return group_ingredients(
self.ingredients(),
self.soup,
".block-raw-material h5",
".block-raw-material li",
".recipe_details__ingredient h5",
".recipe_details__ingredient li",
)

def instructions(self):
instructions_div = self.soup.find("div", {"class": "sect--do-this__title"})
ol = instructions_div.find_next_sibling("ol")
instructions = [li.get_text() for li in ol.find_all("li")]
return "\n".join(instructions)
instructions_div = self.soup.find("div", {"class": "recipe_details__steps"})
ol = instructions_div.find("div", {"class": "recipe_details__steps__ol"}).find(
"ol"
)
return "\n".join([li.get_text() for li in ol.find_all("li")])

def keywords(self):
hero_cats = self.soup.find("ul", {"class": "hero__cats"})
return [li.get_text() for li in hero_cats.find_all("li")] if hero_cats else []
37 changes: 10 additions & 27 deletions tests/test_data/tofoo.co.uk/tofoo_1.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"canonical_url": "https://tofoo.co.uk/recipes/banh-mi-2/",
"site_name": "Tofoo",
"host": "tofoo.co.uk",
"language": "en-US",
"language": "en-GB",
"title": "Banh Mi",
"ingredients": [
"100g Naked Tofoo, sliced",
Expand All @@ -24,39 +24,22 @@
"1/4 tsp Sriracha",
"Squeeze of lime"
],
"ingredient_groups": [
{
"ingredients": [
"100g Naked Tofoo, sliced",
"1 demi baguette",
"1/2 tsp soy sauce",
"1/2 lime, zested",
"1/2 tsp grated ginger",
"1/4 tsp garlic paste",
"10g daikon, shredded",
"10g carrot, shredded",
"10g cucumber, shredded",
"1 tbsp rice vinegar",
"oil",
"1g red chilli, sliced",
"10g radish, sliced",
"20g coriander (approx 10 sprigs)",
"Freshly ground black pepper.",
"1/4 tsp Sriracha",
"Squeeze of lime"
],
"purpose": "Get this stuff"
}
],
"instructions_list": [
"Slice the Baguette in half lengthways. Mix together the soy sauce, lime zest, ginger, and garlic in a bowl.",
"Marinade the Tofoo for 30 minutes.",
"Shred the Daikon, Carrot & Cucumber and lightly pickle in the rice vinegar for 10-20 minutes.",
"Heat a frying pan, add the oil, and lightly fry the Tofoo for a few minutes until it turns golden brown. Allow to cool.",
"Place all the fillings into the baguette, in layers, and drizzle over the Sriracha. Season with pepper and finish with a squeeze of lime."
],
"category": "Vegan",
"yields": 1,
"total_time": 35,
"image": "https://tofoo.co.uk/wp-content/uploads/2020/09/Tofoo_Banh_Mi3134_v3_WebHeader.jpg"
"cook_time": 5,
"prep_time": 30,
"image": "https://tofoo.co.uk/wp-content/uploads/2020/09/Tofoo_Banh_Mi3134_v3_WebHeader.jpg",
"keywords": [
"Naked",
"Dinner",
"Fakeaway",
"Lunch"
]
}
Loading

0 comments on commit 9c5f8c6

Please sign in to comment.