mirror of
https://github.com/ClaytonWWilson/Listify.git
synced 2026-03-10 18:55:03 +00:00
Chaining, retries, and general scraping durability
Make the scraping infrastructure more durable since the scraping service often fails to deliver
This commit is contained in:
@@ -1,11 +1,15 @@
|
||||
import json
|
||||
import boto3
|
||||
import time
|
||||
|
||||
|
||||
def lambda_handler(event, context):
|
||||
with open("words.txt") as words_file:
|
||||
list_num = event["list_num"]
|
||||
with open("prefix_list_part" + str(list_num) + ".txt") as words_file:
|
||||
words = json.load(words_file)
|
||||
print(words)
|
||||
for word in words:
|
||||
time.sleep(6)
|
||||
client = boto3.client('lambda')
|
||||
response = client.invoke(
|
||||
FunctionName='KohlsScraper',
|
||||
@@ -13,7 +17,16 @@ def lambda_handler(event, context):
|
||||
LogType="None",
|
||||
Payload= """{"toScrape": \"""" + word + "\"}"
|
||||
)
|
||||
if (event["linked"]):
|
||||
if list_num < 16:
|
||||
time.sleep(200)
|
||||
client.invoke(
|
||||
FunctionName='RunOrchestrator',
|
||||
InvocationType="Event",
|
||||
LogType="None",
|
||||
Payload= "{\"list_num\": "+ str(list_num + 1) + ",\"linked\": true}"
|
||||
)
|
||||
return {
|
||||
'statusCode': 200,
|
||||
'body': json.dumps('Hello from Lambda!')
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user