Add Kohls data to database

Place data in the database when scraped

Also include loading credentials from config files
This commit is contained in:
NMerz 2020-10-18 14:10:41 -04:00
parent 8e6dbac183
commit 7dbea8f805
2 changed files with 58 additions and 22 deletions

2
.gitignore vendored
View File

@ -90,3 +90,5 @@ Lambdas/Lists/target/classes/META-INF/Lists.kotlin_module
Listify/app/src/main/res/raw/auths.json
Lambdas/Lists/target/surefire-reports/TestInputUtils.txt
Lambdas/Lists/target/surefire-reports/TEST-TestInputUtils.xml
Lambdas/Scraping/scraperConfigs.json
Lambdas/Scraping/dbConfigs.json

View File

@ -1,7 +1,16 @@
import requests
import json
import pymysql.cursors
import time
scraper_configs = None
with open("scraperConfigs.json", "r") as scraper_configs_file:
scraper_configs = json.load(scraper_configs_file)
headers = {
"apikey": ""
"apikey": scraper_configs["apikey"]
}
params = (
@ -9,29 +18,54 @@ params = (
("location","na"),
);
response = requests.get('https://app.zenscrape.com/api/v1/get', headers=headers, params=params);
response = requests.get("https://app.zenscrape.com/api/v1/get", headers=headers, params=params);
print(response.text)
soup = BeautifulSoup(response.text, 'html.parser')
soup = BeautifulSoup(response.text, "html.parser")
insert_params = []
for match in soup.find_all(id=re.compile(".*_prod_price")):
price = None
description = ""
match_split = match.text.split()
for section in match_split:
if '$' in section:
description = ""
if price == None:
price = section
continue
if ('(' in section) or (')' in section):
continue
description += section + " "
description = description.strip()
imgUrl = ""
imgUrlBase = 'https://media.kohlsimg.com/is/image/kohls/'
for prior in match.previous_siblings:
if imgUrlBase in str(prior):
imgUrl = imgUrlBase + str(prior).split(imgUrlBase)[1].split('?')[0].split('"')[0]
print(price + " for: " + description + " @: " + imgUrl)
price = None
description = ""
match_split = match.text.split()
for section in match_split:
if '$' in section:
description = ""
if price == None:
price = section
continue
if ('(' in section) or (')' in section):
continue
description += section + " "
description = description.strip()
imgUrl = ""
imgUrlBase = "https://media.kohlsimg.com/is/image/kohls/"
for prior in match.previous_siblings:
if imgUrlBase in str(prior):
imgUrl = imgUrlBase + str(prior).split(imgUrlBase)[1].split('?')[0].split('"')[0]
print(price + " for: " + description + " @: " + imgUrl)
insert_params.append((3, description, float(price.split('$')[1]), imgUrl))
db_configs = None
with open("dbConfigs.json", "r") as db_configs_file:
db_configs = json.load(db_configs_file)
connection = pymysql.connect(host=db_configs["host"],
user=db_configs["user"],
password=db_configs["password"],
db=db_configs["db_name"],
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
PRODUCT_INSERT_SYNTAX = "INSERT INTO Product (chainID, description, price, imageURL) VALUES (%s, %s, %s, %s);"
cursor.executemany(PRODUCT_INSERT_SYNTAX, insert_params)
connection.commit()
except Exception as e:
print(e)
traceback.print_exc()
finally:
connection.close()