57 lines
1.9 KiB
Python
Executable File
57 lines
1.9 KiB
Python
Executable File
#!/usr/bin/python3.7
|
|
import os
|
|
import re
|
|
import time
|
|
import math
|
|
import requests
|
|
from tqdm import tqdm
|
|
from bs4 import BeautifulSoup as bs
|
|
|
|
base_url = 'https://modarchive.org/index.php?query=54&request=search&search_type=genre&page={}'
|
|
headers = {'User-Agent' : 'I want music, im sorry :c, I will do it slowly'}
|
|
|
|
with open("last_page", "r") as f:
|
|
last_page = int(f.read())
|
|
|
|
for page_index in range(last_page + 1, 75):
|
|
url = base_url.format(page_index)
|
|
print('Requesting url {}'.format(url))
|
|
|
|
request = requests.get(url, headers = headers)
|
|
page = request.content
|
|
|
|
soup = bs(page, 'html.parser')
|
|
links = soup.find_all('a', title = 'Download')
|
|
print('Found {} links to download'.format(len(links)))
|
|
|
|
for i, link in enumerate(links, start=1):
|
|
remote_file = requests.get(link['href'], stream=True, headers = headers)
|
|
content_disposition= remote_file.headers['content-disposition']
|
|
filename = re.findall("filename=(.+)", content_disposition)[0]
|
|
|
|
if os.path.isfile('mods/' + filename) :
|
|
print('{} already exists, skiping'.format(filename))
|
|
continue
|
|
|
|
file_size = int(remote_file.headers['content-length'])
|
|
block_size = 1024
|
|
wrote = 0
|
|
|
|
|
|
with open('mods/' + filename, 'wb') as handle:
|
|
for data in tqdm(remote_file.iter_content(block_size), desc='{} -> {}'.format(i, link['href']), total = math.ceil(file_size//block_size), unit = 'KB', unit_scale = True, miniters = 0, mininterval = 0):
|
|
handle.write(data)
|
|
|
|
print('Waiting 10 seconds between downloads')
|
|
for _ in tqdm(range(10), unit = 's', unit_scale = True):
|
|
time.sleep(1)
|
|
|
|
|
|
print('Waiting 60 seconds between pages')
|
|
for _ in tqdm(range(60), unit = 's', unit_scale = True):
|
|
time.sleep(1)
|
|
|
|
with open("last_page", "w") as f:
|
|
f.write(str(page_index))
|
|
|