Lambdaify Kohls scraping

Move to Lambda function setup and add orchestration and word lists for full runs. Credit for the word lists goes to: http://www.desiquintans.com/nounlist
2026-03-10 18:55:03 +00:00 · 2020-10-18 17:51:08 -04:00
parent 7dbea8f805
commit 79598bf9e9
9 changed files with 6927 additions and 55 deletions
--- a/Lambdas/Scraping/prefix_list_builder.py
+++ b/Lambdas/Scraping/prefix_list_builder.py
@@ -0,0 +1,25 @@
+wordlist = []
+with open("nounlist.txt") as nounlist:
+  for noun in nounlist:
+    wordlist.append(noun)
+
+prefix_list = []
+for word in wordlist:
+  prefix_list.append(word[:min(len(word), 3)])
+
+short_list = []
+short_list2 = []
+for prefix in prefix_list:
+  prefix = prefix.strip()
+  if len(short_list) < 700:
+    if (len(short_list) == 0 or short_list[-1] != prefix):
+      short_list.append(prefix)
+  else:
+    if ((len(short_list2) == 0 or short_list2[-1] != prefix) and short_list[-1] != prefix):
+      short_list2.append(prefix)
+
+with open("prefix_list_part1.txt", "w") as prefix_list_part1:
+  json.dump(short_list, prefix_list_part1)
+
+with open("prefix_list_part2.txt", "w") as prefix_list_part2:
+  json.dump(short_list2, prefix_list_part2)