-
Notifications
You must be signed in to change notification settings - Fork 562
/
Copy path__init__.py
761 lines (735 loc) · 28.6 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
from __future__ import annotations
import contextlib
import warnings
from typing import Any
from ._abstract import AbstractScraper
from ._exceptions import NoSchemaFoundInWildMode, WebsiteNotImplementedError
from ._factory import SchemaScraperFactory
from ._utils import get_host_name
from .aberlehome import AberleHome
from .abril import Abril
from .abuelascounter import AbuelasCounter
from .acouplecooks import ACoupleCooks
from .addapinch import AddAPinch
from .afghankitchenrecipes import AfghanKitchenRecipes
from .akispetretzikis import AkisPetretzikis
from .albertheijn import AlbertHeijn
from .allrecipes import AllRecipes
from .alltomat import AllTomat
from .altonbrown import AltonBrown
from .amazingribs import AmazingRibs
from .ambitiouskitchen import AmbitiousKitchen
from .archanaskitchen import ArchanasKitchen
from .argiro import Argiro
from .arla import Arla
from .atelierdeschefs import AtelierDesChefs
from .averiecooks import AverieCooks
from .bakingmischief import BakingMischief
from .bakingsense import BakingSense
from .barefootcontessa import BareFootContessa
from .bbcfood import BBCFood
from .bbcgoodfood import BBCGoodFood
from .bestrecipes import BestRecipes
from .bettybossi import BettyBossi
from .bettycrocker import BettyCrocker
from .biancazapatka import BiancaZapatka
from .bigoven import BigOven
from .blueapron import BlueApron
from .bluejeanchef import BlueJeanChef
from .bodybuilding import Bodybuilding
from .bonappetit import BonAppetit
from .bongeats import BongEats
from .bowlofdelicious import BowlOfDelicious
from .breadtopia import Breadtopia
from .briceletbaklava import BricelEtBaklava
from .budgetbytes import BudgetBytes
from .carlsbadcravings import CarlsBadCravings
from .castironketo import CastIronKeto
from .cdkitchen import CdKitchen
from .chefkoch import Chefkoch
from .chefnini import Chefnini
from .chefsavvy import ChefSavvy
from .closetcooking import ClosetCooking
from .comidinhasdochef import ComidinhasDoChef
from .cookeatshare import CookEatShare
from .cookieandkate import CookieAndKate
from .cookingcircle import CookingCircle
from .cookinglight import CookingLight
from .cookpad import CookPad
from .cooktalk import CookTalk
from .coopse import CoopSE
from .copykat import CopyKat
from .costco import Costco
from .countryliving import CountryLiving
from .cucchiaio import Cucchiaio
from .cuisineaz import CuisineAZ
from .cybercook import Cybercook
from .davidlebovitz import DavidLebovitz
from .delish import Delish
from .dishnz import Dishnz
from .domesticateme import DomesticateMe
from .downshiftology import Downshiftology
from .dr import Dr
from .eatingbirdfood import EatingBirdFood
from .eatingwell import EatingWell
from .eatliverun import EatLiveRun
from .eatsmarter import Eatsmarter
from .eattolerant import EatTolerant
from .eatwell101 import EatWell101
from .eatwhattonight import EatWhatTonight
from .elavegan import ElaVegan
from .emmikochteinfach import EmmiKochtEinfach
from .epicurious import Epicurious
from .errenskitchen import ErrensKitchen
from .ethanchlebowski import EthanChlebowski
from .farmhousedelivery import FarmhouseDelivery
from .farmhouseonboone import FarmhouseOnBoone
from .fattoincasadabenedetta import FattoInCasaDaBenedetta
from .fifteenspatulas import FifteenSpatulas
from .finedininglovers import FineDiningLovers
from .fitmencook import FitMenCook
from .fitslowcookerqueen import FitSlowCookerQueen
from .food import Food
from .food52 import Food52
from .foodandwine import FoodAndWine
from .foodfidelity import FoodFidelity
from .foodnetwork import FoodNetwork
from .foodrepublic import FoodRepublic
from .forksoverknives import ForksOverKnives
from .forktospoon import ForkToSpoon
from .franzoesischkochen import FranzoesischKochen
from .g750g import G750g
from .gesundaktiv import GesundAktiv
from .giallozafferano import GialloZafferano
from .gimmesomeoven import GimmeSomeOven
from .globo import Globo
from .godt import Godt
from .gonnawantseconds import GonnaWantSeconds
from .goodfooddiscoveries import GoodFoodDiscoveries
from .goodhousekeeping import GoodHousekeeping
from .goustojson import GoustoJson
from .grandfrais import GrandFrais
from .greatbritishchefs import GreatBritishChefs
from .grimgrains import GrimGrains
from .grouprecipes import GroupRecipes
from .halfbakedharvest import HalfBakedHarvest
from .handletheheat import HandleTheHeat
from .hassanchef import HassanChef
from .headbangerskitchen import HeadbangersKitchen
from .heatherchristo import HeatherChristo
from .heb import HEB
from .hellofresh import HelloFresh
from .herseyland import HerseyLand
from .homechef import HomeChef
from .hostthetoast import Hostthetoast
from .ica import Ica
from .ig import IG
from .imworthy import ImWorthy
from .inbloombakery import InBloomBakery
from .indianhealthyrecipes import IndianHealthyRecipes
from .innit import Innit
from .insanelygoodrecipes import InsanelyGoodRecipes
from .inspiralized import Inspiralized
from .izzycooking import IzzyCooking
from .jamieoliver import JamieOliver
from .jimcooksfoodgood import JimCooksFoodGood
from .joshuaweissman import JoshuaWeissman
from .joyfoodsunshine import Joyfoodsunshine
from .joythebaker import JoyTheBaker
from .juliegoodwin import JulieGoodwin
from .justataste import JustATaste
from .justbento import JustBento
from .justonecookbook import JustOneCookbook
from .kennymcgovern import KennyMcGovern
from .keukenliefdenl import KeukenLiefdeNL
from .kingarthur import KingArthur
from .kitchenaidaustralia import KitchenAidAustralia
from .kitchenstories import KitchenStories
from .kochbar import Kochbar
from .kochbucher import Kochbucher
from .koket import Koket
from .kptncook import KptnCook
from .kuchniadomowa import KuchniaDomowa
from .kuchynalidla import KuchynaLidla
from .kwestiasmaku import KwestiaSmaku
from .latelierderoxane import LAtelierDeRoxane
from .leanandgreenrecipes import LeanAndGreenRecipes
from .lecker import Lecker
from .lecremedelacrumb import LeCremeDeLaCrumb
from .lekkerensimpel import LekkerEnSimpel
from .leukerecepten import Leukerecepten
from .lifestyleofafoodie import LifestyleOfAFoodie
from .littlespicejar import LittleSpiceJar
from .livelytable import LivelyTable
from .lovingitvegan import Lovingitvegan
from .maangchi import Maangchi
from .madensverden import MadensVerden
from .madewithlau import MadeWithLau
from .madsvin import Madsvin
from .marleyspoon import MarleySpoon
from .marmiton import Marmiton
from .marthastewart import MarthaStewart
from .matprat import Matprat
from .mccormick import McCormick
from .meljoulwan import Meljoulwan
from .melskitchencafe import MelsKitchenCafe
from .mindmegette import Mindmegette
from .minimalistbaker import Minimalistbaker
from .ministryofcurry import MinistryOfCurry
from .misya import Misya
from .mob import Mob
from .momswithcrockpots import MomsWithCrockPots
from .monsieurcuisine import MonsieurCuisine
from .motherthyme import MotherThyme
from .moulinex import Moulinex
from .mundodereceitasbimby import MundoDeReceitasBimby
from .mybakingaddiction import MyBakingAddiction
from .myjewishlearning import MyJewishLearning
from .mykitchen101 import MyKitchen101
from .mykitchen101en import MyKitchen101en
from .myrecipes import MyRecipes
from .nhshealthierfamilies import NHSHealthierFamilies
from .nibbledish import NibbleDish
from .nihhealthyeating import NIHHealthyEating
from .norecipes import NoRecipes
from .nourishedbynutrition import NourishedByNutrition
from .nrkmat import NRKMat
from .number2pencil import Number2Pencil
from .nutritionbynathalie import NutritionByNathalie
from .nutritionfacts import NutritionFacts
from .nytimes import NYTimes
from .ohsheglows import OhSheGlows
from .omnivorescookbook import OmnivoresCookbook
from .onceuponachef import OnceUponAChef
from .onehundredonecookbooks import OneHundredOneCookBooks
from .owenhan import OwenHan
from .paleorunningmomma import PaleoRunningMomma
from .panelinha import Panelinha
from .paninihappy import PaniniHappy
from .persnicketyplates import PersnicketyPlates
from .pickuplimes import PickUpLimes
from .pinchofyum import PinchOfYum
from .pingodoce import PingoDoce
from .pinkowlkitchen import PinkOwlKitchen
from .platingpixels import PlatingPixels
from .plowingthroughlife import PlowingThroughLife
from .popsugar import PopSugar
from .practicalselfreliance import PracticalSelfReliance
from .pressureluckcooking import PressureLuckCooking
from .primaledgehealth import PrimalEdgeHealth
from .projectgezond import ProjectGezond
from .przepisy import Przepisy
from .purelypope import PurelyPope
from .purplecarrot import PurpleCarrot
from .rachlmansfield import RachlMansfield
from .rainbowplantlife import RainbowPlantLife
from .realfoodtesco import RealFoodTesco
from .realsimple import RealSimple
from .receitasnestlebr import ReceitasNestleBR
from .recept import Recept
from .reciperunner import RecipeRunner
from .recipetineats import RecipeTinEats
from .redhousespice import RedHouseSpice
from .reishunger import Reishunger
from .rezeptwelt import Rezeptwelt
from .ricetta import Ricetta
from .ricetteperbimby import RicettePerBimby
from .rosannapansino import RosannaPansino
from .rutgerbakt import RutgerBakt
from .saboresanjinomoto import SaboresAnjinomoto
from .sallysbakingaddiction import SallysBakingAddiction
from .sallysblog import SallysBlog
from .saltpepperskillet import SaltPepperSkillet
from .saveur import Saveur
from .seriouseats import SeriousEats
from .simpleveganista import SimpleVeganista
from .simplycookit import SimplyCookit
from .simplyquinoa import SimplyQuinoa
from .simplyrecipes import SimplyRecipes
from .simplywhisked import SimplyWhisked
from .skinnytaste import SkinnyTaste
from .smulweb import Smulweb
from .sobors import SoBors
from .southerncastiron import SouthernCastIron
from .southernliving import SouthernLiving
from .spendwithpennies import SpendWithPennies
from .springlane import Springlane
from .staysnatched import StaySnatched
from .steamykitchen import SteamyKitchen
from .streetkitchen import StreetKitchen
from .strongrfastr import StrongrFastr
from .sunbasket import SunBasket
from .sundpaabudget import SundPaaBudget
from .sunset import Sunset
from .sweetcsdesigns import SweetCsDesigns
from .sweetpeasandsaffron import SweetPeasAndSaffron
from .tasteatlas import TasteAtlas
from .tasteau import TasteAU
from .tasteofhome import TasteOfHome
from .tastesbetterfromscratch import TastesBetterFromScratch
from .tastesoflizzyt import TastesOfLizzyT
from .tasty import Tasty
from .tastykitchen import TastyKitchen
from .theclevercarrot import TheCleverCarrot
from .theexpertguides import TheExpertGuides
from .thehappyfoodie import TheHappyFoodie
from .thekitchencommunity import TheKitchenCommunity
from .thekitchenmagpie import TheKitchenMagPie
from .thekitchn import TheKitchn
from .themagicalslowcooker import TheMagicalSlowCooker
from .themodernproper import TheModernProper
from .thepalatablelife import ThePalatableLife
from .thepioneerwoman import ThePioneerWoman
from .therecipecritic import Therecipecritic
from .thespruceeats import TheSpruceEats
from .thevintagemixer import TheVintageMixer
from .thewoksoflife import Thewoksoflife
from .thinlicious import Thinlicious
from .tidymom import TidyMom
from .timesofindia import TimesOfIndia
from .tineno import TineNo
from .tofoo import Tofoo
from .tudogostoso import TudoGostoso
from .twopeasandtheirpod import TwoPeasAndTheirPod
from .uitpaulineskeukennl import UitPaulinesKeukenNL
from .usapears import USAPears
from .usdamyplate import USDAMyPlate
from .valdemarsro import Valdemarsro
from .vanillaandbean import VanillaAndBean
from .vegetarbloggen import Vegetarbloggen
from .vegolosi import Vegolosi
from .vegrecipesofindia import VegRecipesOfIndia
from .waitrose import Waitrose
from .watchwhatueat import WatchWhatUEat
from .wearenotmartha import WeAreNotMartha
from .weightwatchers import WeightWatchers
from .weightwatcherspublic import WeightWatchersPublic
from .wellplated import WellPlated
from .whatsgabycooking import WhatsGabyCooking
from .whole30 import Whole30
from .wholefoods import WholeFoods
from .wikicookbook import WikiCookbook
from .williamssonoma import WilliamsSonoma
from .womensweeklyfood import WomensWeeklyFood
from .woolworths import Woolworths
from .woop import Woop
from .yemek import Yemek
from .yummly import Yummly
from .zaubertopf import ZauberTopf
from .zeitwochenmarkt import ZeitWochenmarkt
from .zenbelly import ZenBelly
SCRAPERS = {
ACoupleCooks.host(): ACoupleCooks,
AberleHome.host(): AberleHome,
Abril.host(): Abril,
AbuelasCounter.host(): AbuelasCounter,
AddAPinch.host(): AddAPinch,
AfghanKitchenRecipes.host(): AfghanKitchenRecipes,
AkisPetretzikis.host(): AkisPetretzikis,
AlbertHeijn.host(): AlbertHeijn,
AllRecipes.host(): AllRecipes,
AllTomat.host(): AllTomat,
AltonBrown.host(): AltonBrown,
AmazingRibs.host(): AmazingRibs,
AmbitiousKitchen.host(): AmbitiousKitchen,
ArchanasKitchen.host(): ArchanasKitchen,
Argiro.host(): Argiro,
Arla.host(): Arla,
AtelierDesChefs.host(): AtelierDesChefs,
AverieCooks.host(): AverieCooks,
BBCFood.host(): BBCFood,
BBCFood.host(domain="co.uk"): BBCFood,
BBCGoodFood.host(): BBCGoodFood,
BakingSense.host(): BakingSense,
BakingMischief.host(): BakingMischief,
BareFootContessa.host(): BareFootContessa,
BestRecipes.host(): BestRecipes,
BettyBossi.host(): BettyBossi,
BettyCrocker.host(): BettyCrocker,
BiancaZapatka.host(): BiancaZapatka,
BigOven.host(): BigOven,
BlueApron.host(): BlueApron,
BlueJeanChef.host(): BlueJeanChef,
Bodybuilding.host(): Bodybuilding,
BonAppetit.host(): BonAppetit,
BowlOfDelicious.host(): BowlOfDelicious,
BongEats.host(): BongEats,
Breadtopia.host(): Breadtopia,
BricelEtBaklava.host(): BricelEtBaklava,
BudgetBytes.host(): BudgetBytes,
CarlsBadCravings.host(): CarlsBadCravings,
CastIronKeto.host(): CastIronKeto,
CdKitchen.host(): CdKitchen,
ChefSavvy.host(): ChefSavvy,
Chefkoch.host(): Chefkoch,
Chefnini.host(): Chefnini,
ClosetCooking.host(): ClosetCooking,
ComidinhasDoChef.host(): ComidinhasDoChef,
CookEatShare.host(): CookEatShare,
CookPad.host(): CookPad,
CookTalk.host(): CookTalk,
CookieAndKate.host(): CookieAndKate,
CookingCircle.host(): CookingCircle,
CookingLight.host(): CookingLight,
CoopSE.host(): CoopSE,
CopyKat.host(): CopyKat,
Costco.host(): Costco,
CountryLiving.host(): CountryLiving,
Cucchiaio.host(): Cucchiaio,
CuisineAZ.host(): CuisineAZ,
Cybercook.host(): Cybercook,
DavidLebovitz.host(): DavidLebovitz,
Delish.host(): Delish,
Dishnz.host(): Dishnz,
EatLiveRun.host(): EatLiveRun,
ElaVegan.host(): ElaVegan,
FitSlowCookerQueen.host(): FitSlowCookerQueen,
GrandFrais.host(): GrandFrais,
HeatherChristo.host(): HeatherChristo,
InBloomBakery.host(): InBloomBakery,
JoshuaWeissman.host(): JoshuaWeissman,
JoyTheBaker.host(): JoyTheBaker,
KitchenAidAustralia.host(): KitchenAidAustralia,
KuchynaLidla.host(): KuchynaLidla,
McCormick.host(): McCormick,
Moulinex.host(): Moulinex,
MundoDeReceitasBimby.host(): MundoDeReceitasBimby,
MyJewishLearning.host(): MyJewishLearning,
NutritionFacts.host(): NutritionFacts,
PinchOfYum.host(): PinchOfYum,
Recept.host(): Recept,
RicettePerBimby.host(): RicettePerBimby,
StrongrFastr.host(): StrongrFastr,
TasteAtlas.host(): TasteAtlas,
ThePalatableLife.host(): ThePalatableLife,
Thinlicious.host(): Thinlicious,
DomesticateMe.host(): DomesticateMe,
Downshiftology.host(): Downshiftology,
Dr.host(): Dr,
EatWell101.host(): EatWell101,
EatWhatTonight.host(): EatWhatTonight,
EatingBirdFood.host(): EatingBirdFood,
EatingWell.host(): EatingWell,
Eatsmarter.host(): Eatsmarter,
Eatsmarter.host(domain="de"): Eatsmarter,
EatTolerant.host(): EatTolerant,
EmmiKochtEinfach.host(): EmmiKochtEinfach,
Epicurious.host(): Epicurious,
ErrensKitchen.host(): ErrensKitchen,
EthanChlebowski.host(): EthanChlebowski,
FarmhouseDelivery.host(): FarmhouseDelivery,
FarmhouseOnBoone.host(): FarmhouseOnBoone,
FattoInCasaDaBenedetta.host(): FattoInCasaDaBenedetta,
FifteenSpatulas.host(): FifteenSpatulas,
FineDiningLovers.host(): FineDiningLovers,
FitMenCook.host(): FitMenCook,
Food.host(): Food,
Food52.host(): Food52,
FoodAndWine.host(): FoodAndWine,
FoodFidelity.host(): FoodFidelity,
FoodNetwork.host(): FoodNetwork,
FoodNetwork.host(domain="com"): FoodNetwork,
FoodRepublic.host(): FoodRepublic,
ForkToSpoon.host(): ForkToSpoon,
ForksOverKnives.host(): ForksOverKnives,
FranzoesischKochen.host(): FranzoesischKochen,
G750g.host(): G750g,
GialloZafferano.host(): GialloZafferano,
GimmeSomeOven.host(): GimmeSomeOven,
Globo.host(): Globo,
Godt.host(): Godt,
GonnaWantSeconds.host(): GonnaWantSeconds,
GoodFoodDiscoveries.host(): GoodFoodDiscoveries,
GoodHousekeeping.host(): GoodHousekeeping,
GoustoJson.host(): GoustoJson,
GreatBritishChefs.host(): GreatBritishChefs,
GrimGrains.host(): GrimGrains,
GroupRecipes.host(): GroupRecipes,
HEB.host(): HEB,
HalfBakedHarvest.host(): HalfBakedHarvest,
HandleTheHeat.host(): HandleTheHeat,
HassanChef.host(): HassanChef,
HeadbangersKitchen.host(): HeadbangersKitchen,
HelloFresh.host(): HelloFresh,
HelloFresh.host(domain="at"): HelloFresh,
HelloFresh.host(domain="be"): HelloFresh,
HelloFresh.host(domain="ca"): HelloFresh,
HelloFresh.host(domain="ch"): HelloFresh,
HelloFresh.host(domain="co.nz"): HelloFresh,
HelloFresh.host(domain="co.uk"): HelloFresh,
HelloFresh.host(domain="com.au"): HelloFresh,
HelloFresh.host(domain="de"): HelloFresh,
HelloFresh.host(domain="dk"): HelloFresh,
HelloFresh.host(domain="es"): HelloFresh,
HelloFresh.host(domain="fr"): HelloFresh,
HelloFresh.host(domain="ie"): HelloFresh,
HelloFresh.host(domain="it"): HelloFresh,
HelloFresh.host(domain="lu"): HelloFresh,
HelloFresh.host(domain="nl"): HelloFresh,
HelloFresh.host(domain="no"): HelloFresh,
HelloFresh.host(domain="se"): HelloFresh,
HerseyLand.host(): HerseyLand,
HomeChef.host(): HomeChef,
Hostthetoast.host(): Hostthetoast,
Ica.host(): Ica,
ImWorthy.host(): ImWorthy,
IG.host(): IG,
IndianHealthyRecipes.host(): IndianHealthyRecipes,
Innit.host(): Innit,
InsanelyGoodRecipes.host(): InsanelyGoodRecipes,
Inspiralized.host(): Inspiralized,
IzzyCooking.host(): IzzyCooking,
JamieOliver.host(): JamieOliver,
JimCooksFoodGood.host(): JimCooksFoodGood,
Joyfoodsunshine.host(): Joyfoodsunshine,
JulieGoodwin.host(): JulieGoodwin,
JustATaste.host(): JustATaste,
JustBento.host(): JustBento,
JustOneCookbook.host(): JustOneCookbook,
KennyMcGovern.host(): KennyMcGovern,
KeukenLiefdeNL.host(): KeukenLiefdeNL,
KingArthur.host(): KingArthur,
KitchenStories.host(): KitchenStories,
Kochbar.host(): Kochbar,
Kochbucher.host(): Kochbucher,
Koket.host(): Koket,
KptnCook.host(): KptnCook,
KptnCook.host(subdomain="sharing"): KptnCook,
KuchniaDomowa.host(): KuchniaDomowa,
KwestiaSmaku.host(): KwestiaSmaku,
LAtelierDeRoxane.host(): LAtelierDeRoxane,
LeCremeDeLaCrumb.host(): LeCremeDeLaCrumb,
LeanAndGreenRecipes.host(): LeanAndGreenRecipes,
Lecker.host(): Lecker,
LekkerEnSimpel.host(): LekkerEnSimpel,
Leukerecepten.host(): Leukerecepten,
LifestyleOfAFoodie.host(): LifestyleOfAFoodie,
LittleSpiceJar.host(): LittleSpiceJar,
LivelyTable.host(): LivelyTable,
Lovingitvegan.host(): Lovingitvegan,
Maangchi.host(): Maangchi,
MadensVerden.host(): MadensVerden,
MadeWithLau.host(): MadeWithLau,
Madsvin.host(): Madsvin,
MarleySpoon.host(): MarleySpoon,
MarleySpoon.host(domain="de"): MarleySpoon,
MarleySpoon.host(domain="com.au"): MarleySpoon,
MarleySpoon.host(domain="be"): MarleySpoon,
MarleySpoon.host(domain="nl"): MarleySpoon,
MarleySpoon.host(domain="at"): MarleySpoon,
MarleySpoon.host(domain="se"): MarleySpoon,
Marmiton.host(): Marmiton,
MarthaStewart.host(): MarthaStewart,
Matprat.host(): Matprat,
Meljoulwan.host(): Meljoulwan,
MelsKitchenCafe.host(): MelsKitchenCafe,
Mindmegette.host(): Mindmegette,
Minimalistbaker.host(): Minimalistbaker,
MinistryOfCurry.host(): MinistryOfCurry,
Misya.host(): Misya,
Mob.host(domain="mob.co.uk"): Mob,
Mob.host(domain="mobkitchen.co.uk"): Mob,
MomsWithCrockPots.host(): MomsWithCrockPots,
MonsieurCuisine.host(): MonsieurCuisine,
MotherThyme.host(): MotherThyme,
MyBakingAddiction.host(): MyBakingAddiction,
MyKitchen101.host(): MyKitchen101,
MyKitchen101en.host(): MyKitchen101en,
MyRecipes.host(): MyRecipes,
NRKMat.host(): NRKMat,
NibbleDish.host(): NibbleDish,
NHSHealthierFamilies.host(): NHSHealthierFamilies,
NIHHealthyEating.host(): NIHHealthyEating,
NYTimes.host(): NYTimes,
NoRecipes.host(): NoRecipes,
NourishedByNutrition.host(): NourishedByNutrition,
Number2Pencil.host(): Number2Pencil,
NutritionByNathalie.host(): NutritionByNathalie,
OhSheGlows.host(): OhSheGlows,
OmnivoresCookbook.host(): OmnivoresCookbook,
OnceUponAChef.host(): OnceUponAChef,
OneHundredOneCookBooks.host(): OneHundredOneCookBooks,
OwenHan.host(): OwenHan,
PaleoRunningMomma.host(): PaleoRunningMomma,
Panelinha.host(): Panelinha,
PaniniHappy.host(): PaniniHappy,
PersnicketyPlates.host(): PersnicketyPlates,
PickUpLimes.host(): PickUpLimes,
PingoDoce.host(): PingoDoce,
PinkOwlKitchen.host(): PinkOwlKitchen,
PlatingPixels.host(): PlatingPixels,
PlowingThroughLife.host(): PlowingThroughLife,
PopSugar.host(): PopSugar,
PracticalSelfReliance.host(): PracticalSelfReliance,
PracticalSelfReliance.host(domain="creativecanning.com"): PracticalSelfReliance,
PressureLuckCooking.host(): PressureLuckCooking,
PrimalEdgeHealth.host(): PrimalEdgeHealth,
ProjectGezond.host(): ProjectGezond,
Przepisy.host(): Przepisy,
PurelyPope.host(): PurelyPope,
PurpleCarrot.host(): PurpleCarrot,
RachlMansfield.host(): RachlMansfield,
RainbowPlantLife.host(): RainbowPlantLife,
RealFoodTesco.host(): RealFoodTesco,
RealSimple.host(): RealSimple,
RealFoodTesco.host(): RealFoodTesco,
ReceitasNestleBR.host(): ReceitasNestleBR,
RecipeRunner.host(): RecipeRunner,
RecipeTinEats.host(): RecipeTinEats,
RedHouseSpice.host(): RedHouseSpice,
Reishunger.host(): Reishunger,
Rezeptwelt.host(): Rezeptwelt,
Ricetta.host(): Ricetta,
RosannaPansino.host(): RosannaPansino,
RutgerBakt.host(): RutgerBakt,
SaboresAnjinomoto.host(): SaboresAnjinomoto,
SallysBakingAddiction.host(): SallysBakingAddiction,
SallysBlog.host(): SallysBlog,
SaltPepperSkillet.host(): SaltPepperSkillet,
Saveur.host(): Saveur,
SeriousEats.host(): SeriousEats,
SimpleVeganista.host(): SimpleVeganista,
SimplyCookit.host(): SimplyCookit,
SimplyQuinoa.host(): SimplyQuinoa,
SimplyRecipes.host(): SimplyRecipes,
SimplyWhisked.host(): SimplyWhisked,
SkinnyTaste.host(): SkinnyTaste,
Smulweb.host(): Smulweb,
SoBors.host(): SoBors,
SouthernCastIron.host(): SouthernCastIron,
SouthernLiving.host(): SouthernLiving,
SpendWithPennies.host(): SpendWithPennies,
Springlane.host(): Springlane,
StaySnatched.host(): StaySnatched,
SteamyKitchen.host(): SteamyKitchen,
StreetKitchen.host(): StreetKitchen,
SunBasket.host(): SunBasket,
SundPaaBudget.host(): SundPaaBudget,
Sunset.host(): Sunset,
SweetCsDesigns.host(): SweetCsDesigns,
SweetPeasAndSaffron.host(): SweetPeasAndSaffron,
TasteAU.host(): TasteAU,
TasteOfHome.host(): TasteOfHome,
TastesBetterFromScratch.host(): TastesBetterFromScratch,
TastesOfLizzyT.host(): TastesOfLizzyT,
Tasty.host(): Tasty,
TastyKitchen.host(): TastyKitchen,
TheCleverCarrot.host(): TheCleverCarrot,
TheExpertGuides.host(): TheExpertGuides,
TheHappyFoodie.host(): TheHappyFoodie,
TheKitchenCommunity.host(): TheKitchenCommunity,
TheKitchenMagPie.host(): TheKitchenMagPie,
TheKitchn.host(): TheKitchn,
TheMagicalSlowCooker.host(): TheMagicalSlowCooker,
TheModernProper.host(): TheModernProper,
ThePioneerWoman.host(): ThePioneerWoman,
TheSpruceEats.host(): TheSpruceEats,
TheVintageMixer.host(): TheVintageMixer,
Therecipecritic.host(): Therecipecritic,
Thewoksoflife.host(): Thewoksoflife,
TidyMom.host(): TidyMom,
TimesOfIndia.host(): TimesOfIndia,
TineNo.host(): TineNo,
Tofoo.host(): Tofoo,
TudoGostoso.host(): TudoGostoso,
TwoPeasAndTheirPod.host(): TwoPeasAndTheirPod,
USAPears.host(): USAPears,
USDAMyPlate.host(): USDAMyPlate,
Valdemarsro.host(): Valdemarsro,
VanillaAndBean.host(): VanillaAndBean,
VegRecipesOfIndia.host(): VegRecipesOfIndia,
Vegetarbloggen.host(): Vegetarbloggen,
Vegolosi.host(): Vegolosi,
Waitrose.host(): Waitrose,
WatchWhatUEat.host(): WatchWhatUEat,
WeAreNotMartha.host(): WeAreNotMartha,
WeightWatchers.host(): WeightWatchers,
WeightWatchersPublic.host(): WeightWatchersPublic,
WellPlated.host(): WellPlated,
WhatsGabyCooking.host(): WhatsGabyCooking,
Whole30.host(): Whole30,
WholeFoods.host(): WholeFoods,
WholeFoods.host(domain="co.uk"): WholeFoods,
WilliamsSonoma.host(): WilliamsSonoma,
WomensWeeklyFood.host(): WomensWeeklyFood,
Woop.host(): Woop,
WikiCookbook.host(): WikiCookbook,
Woolworths.host(): Woolworths,
Yemek.host(): Yemek,
Yummly.host(): Yummly,
ZauberTopf.host(): ZauberTopf,
ZeitWochenmarkt.host(): ZeitWochenmarkt,
ZenBelly.host(): ZenBelly,
GesundAktiv.host(): GesundAktiv,
UitPaulinesKeukenNL.host(): UitPaulinesKeukenNL,
}
def get_supported_urls() -> set[str]:
return set(SCRAPERS.keys())
def scraper_exists_for(url_path: str) -> bool:
host_name = get_host_name(url_path)
return host_name in get_supported_urls()
def scrape_me(url_path: str, **options: Any) -> AbstractScraper:
host_name = get_host_name(url_path)
if options:
msg = (
"Scraper options arguments (e.g. proxies=, timeout=) are deprecated, and "
"support for them will be dropped in future. To migrate, please:\n"
"\n"
" * Use an HTTP client (such as 'requests' or 'httpx') configured with "
"the proxies/timeout settings you want.\n"
" * Retrieve recipe HTML using the appropriately-configured HTTP client.\n"
" * Scrape retrieved recipe HTML using the 'recipe_scrapers.scrape_html' "
"function.\n"
)
warnings.warn(msg, DeprecationWarning)
try:
scraper = SCRAPERS[host_name]
except KeyError:
if not options.get("wild_mode", False):
raise WebsiteNotImplementedError(host_name)
else:
options.pop("wild_mode")
wild_scraper = SchemaScraperFactory.generate(url_path, **options)
if not wild_scraper.schema.data:
raise NoSchemaFoundInWildMode(url_path)
return wild_scraper
return scraper(url_path, **options)
def scrape_html(
html: str, org_url: str | None = None, **options: dict[str, Any]
) -> AbstractScraper:
"""
Takes a string of HTML and returns a scraper object. If the org_url is specified,
then the scraper will use that URL to resolve a defined scraper, otherwise it will
fall back to wild mode. If no schema is found in wild mode then a
NoSchemaFoundInWildMode exception will be raised.
Args:
html (str): Raw HTML in text form.
org_url (str, optional): Original URL of the HTML. Defaults to None.
Raises:
NoSchemaFoundInWildMode: If no schema is found in wild mode.
Returns:
AbstractScraper: a scraper instance implementing AbstractScraper for the requested website.
"""
host_name = get_host_name(org_url) if org_url is not None else None
if options:
msg = (
"Scraper options arguments (e.g. proxies=, timeout=) are deprecated, and "
"support for them will be dropped in future. To migrate, please:\n"
"\n"
" * Use an HTTP client (such as 'requests' or 'httpx') configured with "
"the proxies/timeout settings you want.\n"
" * Retrieve recipe HTML using the appropriately-configured HTTP client.\n"
)
warnings.warn(msg, DeprecationWarning)
scraper = None
if host_name:
with contextlib.suppress(KeyError):
scraper = SCRAPERS[host_name]
if not scraper:
wild_scraper = SchemaScraperFactory.generate(url=org_url, html=html, **options)
if not wild_scraper.schema.data:
raise NoSchemaFoundInWildMode(org_url)
return wild_scraper
return scraper(url=org_url, html=html, **options)
__all__ = ["scrape_me", "scrape_html"]
name = "recipe_scrapers"