Kohl's scraping outline

Create an outline with some of the initial work needed to pull in data from Kohl's for populating the database, specifically, a non-durable extraction prototype and integration with a web requesting/scraping service
2026-03-10 18:55:03 +00:00 · 2020-10-17 22:56:25 -04:00
parent 8fa081e4b7
commit 8e6dbac183
1 changed files with 37 additions and 0 deletions
--- a/Lambdas/Scraping/KohlsScraper.py
+++ b/Lambdas/Scraping/KohlsScraper.py
@@ -0,0 +1,37 @@
 import requests
 headers = { 
    "apikey": ""
 }
 params = (
   ("url","https://www.kohls.com/search.jsp?submit-search=web-regular&search=shoes"),
   ("location","na"),
 );
 response = requests.get('https://app.zenscrape.com/api/v1/get', headers=headers, params=params);
 print(response.text)
 soup = BeautifulSoup(response.text, 'html.parser')
 for match in soup.find_all(id=re.compile(".*_prod_price")):
   price = None
   description = ""
   match_split = match.text.split()
   for section in match_split:
     if '$' in section:
       description = ""
       if price == None:
         price = section
       continue
     if ('(' in section) or (')' in section):
       continue
     description += section + " "
   description = description.strip()
   imgUrl = ""
   imgUrlBase = 'https://media.kohlsimg.com/is/image/kohls/'
   for prior in match.previous_siblings:
     if imgUrlBase in str(prior):
       imgUrl = imgUrlBase + str(prior).split(imgUrlBase)[1].split('?')[0].split('"')[0]
   print(price + " for: " + description + " @: " + imgUrl)