mirror of
https://github.com/ClaytonWWilson/Scraper-for-theTVDB.com.git
synced 2025-12-13 00:58:47 +00:00
Added progress bars to the download operations.
This commit is contained in:
parent
dc27ff581f
commit
fc1a0d1963
@ -4,7 +4,7 @@ import datetime
|
||||
import requests
|
||||
import dateutil.parser
|
||||
|
||||
from utils import clearScreen
|
||||
from utils import clear_screen
|
||||
|
||||
|
||||
def login():
|
||||
@ -113,7 +113,7 @@ def refreshToken():
|
||||
obj.close()
|
||||
print("\nNew token acquired!\n")
|
||||
break
|
||||
clearScreen()
|
||||
clear_screen()
|
||||
else:
|
||||
login["TOKEN"] = getToken(LOGIN_DATA)
|
||||
login["TIMESTAMP"] = str(datetime.datetime.now().replace(tzinfo=None))
|
||||
|
||||
10
launcher.py
10
launcher.py
@ -2,13 +2,13 @@ from authentication import login
|
||||
from main import download
|
||||
from main import wait
|
||||
from utils import clear_downloads
|
||||
from utils import clearScreen
|
||||
from utils import clear_screen
|
||||
from search import search
|
||||
|
||||
# TODO fix naming convention for all variables and functions
|
||||
|
||||
while True:
|
||||
clearScreen()
|
||||
clear_screen()
|
||||
print("=============================\n"
|
||||
"Image fetcher for theTVDB.com\n"
|
||||
"=============================\n")
|
||||
@ -23,16 +23,16 @@ while True:
|
||||
choice = input("> ").lower().strip()
|
||||
|
||||
if choice == "1": # TODO catch KeyboardInterrupt at search
|
||||
series = search()
|
||||
series = search() # BUG Searching for 'one punc' causes a keyerror when reading the 'data' key
|
||||
if series != None:
|
||||
download(series)
|
||||
wait()
|
||||
elif choice == "2":
|
||||
clearScreen()
|
||||
clear_screen()
|
||||
clear_downloads()
|
||||
wait()
|
||||
elif choice == "3": # TODO add a printout that tells the user who is currently logged in
|
||||
clearScreen()
|
||||
clear_screen()
|
||||
login()
|
||||
wait()
|
||||
elif choice == "4":
|
||||
|
||||
96
main.py
96
main.py
@ -6,9 +6,11 @@ import dateutil
|
||||
from bs4 import BeautifulSoup
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from utils import APIConnector
|
||||
from utils import create_file_name
|
||||
from utils import ProgressBar
|
||||
|
||||
|
||||
# TODO add counters for number of images downloaded and deleted
|
||||
@ -31,6 +33,8 @@ def download(series):
|
||||
# Create series folder
|
||||
os.makedirs(os.path.join("downloads", series.folder_name), exist_ok=True)
|
||||
|
||||
print("Downloading data for " + series.name)
|
||||
|
||||
api_con = APIConnector()
|
||||
|
||||
# Download series text data to info.json
|
||||
@ -42,6 +46,8 @@ def download(series):
|
||||
with open(info_path, 'wb') as f:
|
||||
f.write(res.content)
|
||||
|
||||
|
||||
|
||||
# Make a folder for actors
|
||||
actors_folder_path = os.path.join("downloads", series.folder_name, "actors")
|
||||
os.makedirs(actors_folder_path)
|
||||
@ -59,10 +65,23 @@ def download(series):
|
||||
actors_profile_folder_path = os.path.join("downloads", series.folder_name, "actors", "profiles")
|
||||
os.makedirs(actors_profile_folder_path)
|
||||
|
||||
# Count the number of actor profile pictures that will be downloaded
|
||||
amount = 0
|
||||
for actor in json.loads(res.content)["data"]:
|
||||
amount += 1
|
||||
|
||||
# Create a progress bar
|
||||
progress_bar = ProgressBar(amount)
|
||||
|
||||
# Download each actor's profile picture and save it as their name
|
||||
for actor in json.loads(res.content)["data"]:
|
||||
name = create_file_name(actor["name"])
|
||||
# Print progress bar to the screen
|
||||
sys.stdout.write("\rDownloading Actors... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), str(actor["id"]) + ".jpg"))
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
name = create_file_name(actor["name"])
|
||||
|
||||
# Check if there is an image for the actor
|
||||
if actor["image"] != "":
|
||||
# print("downloading " + actor["image"])
|
||||
@ -72,6 +91,15 @@ def download(series):
|
||||
else:
|
||||
# Use a default image if one does not exist on theTVDB.com
|
||||
shutil.copyfile(os.path.join("resources", "default_person.jpg"), os.path.join(actors_profile_folder_path, name + '_' + str(actor["id"]) + ".jpg"))
|
||||
|
||||
progress_bar.increment()
|
||||
|
||||
# Print that the operation is done
|
||||
sys.stdout.write("\rDownloading Actors... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "Done. "))
|
||||
sys.stdout.flush()
|
||||
print()
|
||||
|
||||
|
||||
|
||||
# Make a folder for episodes
|
||||
episodes_folder_path = os.path.join("downloads", series.folder_name, "episodes")
|
||||
@ -94,14 +122,34 @@ def download(series):
|
||||
with open(os.path.join(episodes_folder_path, "episodes.json"), 'wb') as f:
|
||||
f.write(res.content)
|
||||
|
||||
# Count the number of episode pictures and data that will be downloaded
|
||||
amount = 0
|
||||
for episode in json.loads(res.content)["data"]:
|
||||
amount += 1
|
||||
|
||||
progress_bar = ProgressBar(amount * 2)
|
||||
|
||||
# Seperate episode data into individual json files for each episode and download episode thumbnails
|
||||
for episode in json.loads(res.content)["data"]:
|
||||
episode_path = os.path.join(episodes_folder_path, "Season " + str(episode["airedSeason"]), "Episode {} - {}".format(str(episode["airedEpisodeNumber"]), episode["episodeName"]))
|
||||
img_res = requests.get("https://www.thetvdb.com/banners/" + episode["filename"])
|
||||
with open(episode_path + ".json", 'w') as f:
|
||||
# Update progress bar and display it in the terminal
|
||||
sys.stdout.write("\rDownloading Episodes... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "Episode {} - {}".format(str(episode["airedEpisodeNumber"]), episode["episodeName"] + ".json ")))
|
||||
sys.stdout.flush()
|
||||
f.write(json.dumps(episode))
|
||||
progress_bar.increment()
|
||||
with open(episode_path + ".jpg", 'wb') as f:
|
||||
# Update progress bar and display it in the terminal
|
||||
sys.stdout.write("\rDownloading Episodes... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "Episode {} - {}".format(str(episode["airedEpisodeNumber"]), episode["episodeName"] + ".jpg ")))
|
||||
sys.stdout.flush()
|
||||
f.write(img_res.content)
|
||||
progress_bar.increment()
|
||||
|
||||
sys.stdout.write("\rDownloading Episodes... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "Done. "))
|
||||
sys.stdout.flush()
|
||||
print()
|
||||
|
||||
|
||||
# Make a folder for images
|
||||
images_folder_path = os.path.join("downloads", series.folder_name, "images")
|
||||
@ -122,34 +170,80 @@ def download(series):
|
||||
banners_page = requests.get("{}/artwork/banners".format(series.url))
|
||||
banners_soup = BeautifulSoup(banners_page.content, "html.parser")
|
||||
|
||||
# Make a progress bar for the banners
|
||||
amount = 0
|
||||
for image in banners_soup.find_all("img", attrs={"class": "media-object img-responsive"}):
|
||||
amount += 1
|
||||
|
||||
progress_bar = ProgressBar(amount)
|
||||
|
||||
|
||||
counter = 0
|
||||
for image in banners_soup.find_all("img", attrs={"class":"media-object img-responsive"}):
|
||||
sys.stdout.write("\rDownloading Banners... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "{:03d}.jpg".format(counter)))
|
||||
sys.stdout.flush()
|
||||
|
||||
image_res = requests.get(image["src"])
|
||||
with open(os.path.join(banners_folder_path, "{:03d}.jpg".format(counter)), 'wb') as f:
|
||||
f.write(image_res.content)
|
||||
counter+=1
|
||||
progress_bar.increment()
|
||||
|
||||
sys.stdout.write("\rDownloading Banners... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "Done. "))
|
||||
sys.stdout.flush()
|
||||
print()
|
||||
|
||||
|
||||
# Download fanart
|
||||
fanart_page = requests.get("{}/artwork/fanart".format(series.url))
|
||||
fanart_soup = BeautifulSoup(fanart_page.content, "html.parser")
|
||||
|
||||
# Make a progress bar for the fanart
|
||||
amount = 0
|
||||
for image in fanart_soup.find_all("img", attrs={"class": "media-object img-responsive"}):
|
||||
amount += 1
|
||||
|
||||
progress_bar = ProgressBar(amount)
|
||||
|
||||
|
||||
counter = 0
|
||||
for image in fanart_soup.find_all("img", attrs={"class":"media-object img-responsive"}):
|
||||
sys.stdout.write("\rDownloading Fanart... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "{:03d}.jpg".format(counter)))
|
||||
sys.stdout.flush()
|
||||
image_res = requests.get(image["src"])
|
||||
with open(os.path.join(fanart_folder_path, "{:03d}.jpg".format(counter)), 'wb') as f:
|
||||
f.write(image_res.content)
|
||||
counter+=1
|
||||
progress_bar.increment()
|
||||
|
||||
sys.stdout.write("\rDownloading Fanart... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "Done. "))
|
||||
sys.stdout.flush()
|
||||
print()
|
||||
|
||||
# Download posters
|
||||
posters_page = requests.get("{}/artwork/poster".format(series.url))
|
||||
posters_soup = BeautifulSoup(posters_page.content, "html.parser")
|
||||
|
||||
# Make a progress bar for the posters
|
||||
amount = 0
|
||||
for image in posters_soup.find_all("img", attrs={"class": "media-object img-responsive"}):
|
||||
amount += 1
|
||||
|
||||
progress_bar = ProgressBar(amount)
|
||||
|
||||
counter = 0
|
||||
for image in posters_soup.find_all("img", attrs={"class":"media-object img-responsive"}):
|
||||
sys.stdout.write("\rDownloading Posters... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "{:03d}.jpg".format(counter)))
|
||||
sys.stdout.flush()
|
||||
image_res = requests.get(image["src"])
|
||||
with open(os.path.join(posters_folder_path, "{:03d}.jpg".format(counter)), 'wb') as f:
|
||||
f.write(image_res.content)
|
||||
counter+=1
|
||||
progress_bar.increment()
|
||||
|
||||
sys.stdout.write("\rDownloading Posters... {} {}% {}".format(progress_bar.to_string(), progress_bar.get_percent(), "Done. "))
|
||||
sys.stdout.flush()
|
||||
print()
|
||||
|
||||
def installReqs():
|
||||
if is_pip_installed() == True:
|
||||
|
||||
12
search.py
12
search.py
@ -9,14 +9,15 @@ import urllib.parse
|
||||
|
||||
from utils import APIConnector
|
||||
from utils import clear_downloads
|
||||
from utils import clearScreen
|
||||
from utils import clear_screen
|
||||
from utils import create_file_name
|
||||
from authentication import check_timestamp
|
||||
from authentication import checkStatus
|
||||
from authentication import refreshToken
|
||||
|
||||
class Series:
|
||||
def __init__(self, folder_name, id, url):
|
||||
def __init__(self, name, folder_name, id, url):
|
||||
self.name = name
|
||||
self.folder_name = folder_name
|
||||
self.id = str(id)
|
||||
self.url = url
|
||||
@ -33,7 +34,8 @@ def search():
|
||||
else:
|
||||
save_time = dateutil.parser.parse(login["TIMESTAMP"])
|
||||
cur_time = datetime.datetime.now().replace(tzinfo=None) # TODO use UTC time?
|
||||
if check_timestamp(save_time, cur_time) == False:
|
||||
if check_timestamp(save_time, cur_time) == False: # Token is expired
|
||||
print("Your token has expired. Attempting to get a new one...")
|
||||
refreshToken()
|
||||
except Exception as ex:
|
||||
# print(ex)
|
||||
@ -72,7 +74,7 @@ def search():
|
||||
|
||||
title = -1
|
||||
print()
|
||||
clearScreen()
|
||||
clear_screen()
|
||||
while title < 0 or title > len(search_results["data"]) - 1: # Looping until the user chooses
|
||||
print("Results:") # a series from the printed list
|
||||
count = 1 # or they input '0' to cancel
|
||||
@ -97,5 +99,5 @@ def search():
|
||||
|
||||
print()
|
||||
|
||||
series = Series(create_file_name(search_results["data"][title]["seriesName"]), search_results["data"][title]["id"], "https://www.thetvdb.com/series/" + search_results["data"][title]["slug"])
|
||||
series = Series(search_results["data"][title]["seriesName"], create_file_name(search_results["data"][title]["seriesName"]), search_results["data"][title]["id"], "https://www.thetvdb.com/series/" + search_results["data"][title]["slug"])
|
||||
return series
|
||||
|
||||
67
utils.py
67
utils.py
@ -5,7 +5,7 @@ import requests
|
||||
import shutil
|
||||
|
||||
|
||||
|
||||
# Handles all communication to the API
|
||||
class APIConnector:
|
||||
def __init__(self):
|
||||
with open("login.json", "r") as f:
|
||||
@ -28,7 +28,49 @@ class APIConnector:
|
||||
def send_http_req(self, api_path):
|
||||
return requests.get(api_path, headers=self.auth_headers)
|
||||
|
||||
# recursively counts the number of folders and files in the download folder
|
||||
# Simple progress bar for long file download operations
|
||||
class ProgressBar:
|
||||
def __init__(self, size, counter=None):
|
||||
self.size = int(size)
|
||||
|
||||
# Creating the bar array which will be the visual representation of the progress bar
|
||||
self.bar_array = []
|
||||
for i in range(0, 21):
|
||||
bar = ''
|
||||
bar += '['
|
||||
j = -1
|
||||
for j in range(0, i):
|
||||
bar += '='
|
||||
for k in range(j, 19):
|
||||
bar += ' '
|
||||
bar += ']'
|
||||
self.bar_array.append(bar)
|
||||
|
||||
if counter is None:
|
||||
self.counter = 0
|
||||
else:
|
||||
self.counter = int(counter)
|
||||
if self.counter < 0 or self.counter > self.size:
|
||||
raise IndexError("ProgressBar counter out of bounds.")
|
||||
|
||||
def increment(self):
|
||||
self.counter += 1
|
||||
if (self.counter > self.size):
|
||||
raise IndexError("ProgressBar counter out of bounds.")
|
||||
|
||||
def get_percent(self):
|
||||
return int((self.counter / self.size) * 100)
|
||||
|
||||
def to_string(self):
|
||||
return self.bar_array[int((self.counter / self.size) * 20)]
|
||||
|
||||
def print(self):
|
||||
print(self.to_string())
|
||||
|
||||
|
||||
|
||||
# Recursively counts the number of folders and files in the download folder.
|
||||
# It's used for displaying stats on how much is in the "downloads" folder
|
||||
stats = [-1, 0] # [folders, files] Start at -1 to ignore the "downloads" folder itself
|
||||
def stat_downloads(path):
|
||||
if os.path.isfile(path):
|
||||
@ -72,27 +114,8 @@ def clear_downloads():
|
||||
print("Deleted {} series, {} folders, and {} files totaling {}".format(series_count, counts[0], counts[1], total_size_str))
|
||||
else:
|
||||
print("There isn't anything to delete.")
|
||||
|
||||
# folders = ["banner", "fanart", "poster"]
|
||||
# del_count = 0
|
||||
# for folder in folders:
|
||||
# if os.path.exists(folder):
|
||||
# image_list = os.listdir(folder)
|
||||
# if len(image_list) != 0:
|
||||
# print("Clearing " + folder + "/")
|
||||
# for x in image_list: # TODO check if folder is empty
|
||||
# print("Deleting {}/{}".format(folder, x))
|
||||
# del_path = os.path.join(folder + "\\" + x)
|
||||
# os.remove(del_path)
|
||||
# del_count += 1
|
||||
# print()
|
||||
# else:
|
||||
# print("'{}' is already empty".format(folder))
|
||||
# else:
|
||||
# os.makedirs(folder)
|
||||
# print("Deleted {} images.\n".format(del_count))
|
||||
|
||||
def clearScreen():
|
||||
def clear_screen():
|
||||
IS_WINDOWS = os.name == "nt"
|
||||
if IS_WINDOWS:
|
||||
os.system("cls")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user