From 8e6dbac183e1d88dde196f975f9160560443033a Mon Sep 17 00:00:00 2001
From: NMerz <nmerz@icloud.com>
Date: Sat, 17 Oct 2020 22:56:25 -0400
Subject: [PATCH] Kohl's scraping outline

Create an outline with some of the initial work needed to pull in data from Kohl's for populating the database, specifically, a non-durable extraction prototype and integration with a web requesting/scraping service
---
 Lambdas/Scraping/KohlsScraper.py | 37 ++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 Lambdas/Scraping/KohlsScraper.py

diff --git a/Lambdas/Scraping/KohlsScraper.py b/Lambdas/Scraping/KohlsScraper.py
new file mode 100644
index 0000000..f01fc9d
--- /dev/null
+++ b/Lambdas/Scraping/KohlsScraper.py
@@ -0,0 +1,37 @@
+import requests
+
+headers = { 
+    "apikey": ""
+}
+
+params = (
+   ("url","https://www.kohls.com/search.jsp?submit-search=web-regular&search=shoes"),
+   ("location","na"),
+);
+
+response = requests.get('https://app.zenscrape.com/api/v1/get', headers=headers, params=params);
+print(response.text)
+
+soup = BeautifulSoup(response.text, 'html.parser')
+
+for match in soup.find_all(id=re.compile(".*_prod_price")):
+   price = None
+   description = ""
+   match_split = match.text.split()
+   for section in match_split:
+     if '$' in section:
+       description = ""
+       if price == None:
+         price = section
+       continue
+     if ('(' in section) or (')' in section):
+       continue
+     description += section + " "
+   description = description.strip()
+   imgUrl = ""
+   imgUrlBase = 'https://media.kohlsimg.com/is/image/kohls/'
+   for prior in match.previous_siblings:
+     if imgUrlBase in str(prior):
+       imgUrl = imgUrlBase + str(prior).split(imgUrlBase)[1].split('?')[0].split('"')[0]
+   print(price + " for: " + description + " @: " + imgUrl)
+