mirror of
https://github.com/ClaytonWWilson/Listify.git
synced 2025-12-15 18:28:47 +00:00
Add Kohls data to database
Place data in the database when scraped Also include loading credentials from config files
This commit is contained in:
parent
8e6dbac183
commit
7dbea8f805
2
.gitignore
vendored
2
.gitignore
vendored
@ -90,3 +90,5 @@ Lambdas/Lists/target/classes/META-INF/Lists.kotlin_module
|
|||||||
Listify/app/src/main/res/raw/auths.json
|
Listify/app/src/main/res/raw/auths.json
|
||||||
Lambdas/Lists/target/surefire-reports/TestInputUtils.txt
|
Lambdas/Lists/target/surefire-reports/TestInputUtils.txt
|
||||||
Lambdas/Lists/target/surefire-reports/TEST-TestInputUtils.xml
|
Lambdas/Lists/target/surefire-reports/TEST-TestInputUtils.xml
|
||||||
|
Lambdas/Scraping/scraperConfigs.json
|
||||||
|
Lambdas/Scraping/dbConfigs.json
|
||||||
|
|||||||
@ -1,7 +1,16 @@
|
|||||||
import requests
|
import requests
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pymysql.cursors
|
||||||
|
import time
|
||||||
|
|
||||||
|
scraper_configs = None
|
||||||
|
with open("scraperConfigs.json", "r") as scraper_configs_file:
|
||||||
|
scraper_configs = json.load(scraper_configs_file)
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
"apikey": ""
|
"apikey": scraper_configs["apikey"]
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
params = (
|
params = (
|
||||||
@ -9,29 +18,54 @@ params = (
|
|||||||
("location","na"),
|
("location","na"),
|
||||||
);
|
);
|
||||||
|
|
||||||
response = requests.get('https://app.zenscrape.com/api/v1/get', headers=headers, params=params);
|
response = requests.get("https://app.zenscrape.com/api/v1/get", headers=headers, params=params);
|
||||||
print(response.text)
|
print(response.text)
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, 'html.parser')
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
|
|
||||||
|
insert_params = []
|
||||||
|
|
||||||
for match in soup.find_all(id=re.compile(".*_prod_price")):
|
for match in soup.find_all(id=re.compile(".*_prod_price")):
|
||||||
price = None
|
price = None
|
||||||
description = ""
|
description = ""
|
||||||
match_split = match.text.split()
|
match_split = match.text.split()
|
||||||
for section in match_split:
|
for section in match_split:
|
||||||
if '$' in section:
|
if '$' in section:
|
||||||
description = ""
|
description = ""
|
||||||
if price == None:
|
if price == None:
|
||||||
price = section
|
price = section
|
||||||
continue
|
continue
|
||||||
if ('(' in section) or (')' in section):
|
if ('(' in section) or (')' in section):
|
||||||
continue
|
continue
|
||||||
description += section + " "
|
description += section + " "
|
||||||
description = description.strip()
|
description = description.strip()
|
||||||
imgUrl = ""
|
imgUrl = ""
|
||||||
imgUrlBase = 'https://media.kohlsimg.com/is/image/kohls/'
|
imgUrlBase = "https://media.kohlsimg.com/is/image/kohls/"
|
||||||
for prior in match.previous_siblings:
|
for prior in match.previous_siblings:
|
||||||
if imgUrlBase in str(prior):
|
if imgUrlBase in str(prior):
|
||||||
imgUrl = imgUrlBase + str(prior).split(imgUrlBase)[1].split('?')[0].split('"')[0]
|
imgUrl = imgUrlBase + str(prior).split(imgUrlBase)[1].split('?')[0].split('"')[0]
|
||||||
print(price + " for: " + description + " @: " + imgUrl)
|
print(price + " for: " + description + " @: " + imgUrl)
|
||||||
|
insert_params.append((3, description, float(price.split('$')[1]), imgUrl))
|
||||||
|
|
||||||
|
db_configs = None
|
||||||
|
with open("dbConfigs.json", "r") as db_configs_file:
|
||||||
|
db_configs = json.load(db_configs_file)
|
||||||
|
|
||||||
|
|
||||||
|
connection = pymysql.connect(host=db_configs["host"],
|
||||||
|
user=db_configs["user"],
|
||||||
|
password=db_configs["password"],
|
||||||
|
db=db_configs["db_name"],
|
||||||
|
charset='utf8mb4',
|
||||||
|
cursorclass=pymysql.cursors.DictCursor)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with connection.cursor() as cursor:
|
||||||
|
PRODUCT_INSERT_SYNTAX = "INSERT INTO Product (chainID, description, price, imageURL) VALUES (%s, %s, %s, %s);"
|
||||||
|
cursor.executemany(PRODUCT_INSERT_SYNTAX, insert_params)
|
||||||
|
connection.commit()
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
traceback.print_exc()
|
||||||
|
finally:
|
||||||
|
connection.close()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user