Chaining, retries, and general scraping durability

Make the scraping infrastructure more durable since the scraping service often fails to deliver
This commit is contained in:
NMerz
2020-11-01 11:30:03 -05:00
parent 79598bf9e9
commit ee04a06e0e
21 changed files with 74 additions and 19 deletions

View File

@@ -1,11 +1,15 @@
import json
import boto3
import time
def lambda_handler(event, context):
with open("words.txt") as words_file:
list_num = event["list_num"]
with open("prefix_list_part" + str(list_num) + ".txt") as words_file:
words = json.load(words_file)
print(words)
for word in words:
time.sleep(6)
client = boto3.client('lambda')
response = client.invoke(
FunctionName='KohlsScraper',
@@ -13,7 +17,16 @@ def lambda_handler(event, context):
LogType="None",
Payload= """{"toScrape": \"""" + word + "\"}"
)
if (event["linked"]):
if list_num < 16:
time.sleep(200)
client.invoke(
FunctionName='RunOrchestrator',
InvocationType="Event",
LogType="None",
Payload= "{\"list_num\": "+ str(list_num + 1) + ",\"linked\": true}"
)
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
}