#!/usr/bin/python3.7 import os import re import time import math import requests from tqdm import tqdm from bs4 import BeautifulSoup as bs base_url = 'https://modarchive.org/index.php?query=54&request=search&search_type=genre&page={}' headers = {'User-Agent' : 'I want music, im sorry :c, I will do it slowly'} with open("last_page", "r") as f: last_page = int(f.read()) for page_index in range(last_page + 1, 75): url = base_url.format(page_index) print('Requesting url {}'.format(url)) request = requests.get(url, headers = headers) page = request.content soup = bs(page, 'html.parser') links = soup.find_all('a', title = 'Download') print('Found {} links to download'.format(len(links))) for i, link in enumerate(links, start=1): remote_file = requests.get(link['href'], stream=True, headers = headers) content_disposition= remote_file.headers['content-disposition'] filename = re.findall("filename=(.+)", content_disposition)[0] if os.path.isfile('mods/' + filename) : print('{} already exists, skiping'.format(filename)) continue file_size = int(remote_file.headers['content-length']) block_size = 1024 wrote = 0 with open('mods/' + filename, 'wb') as handle: for data in tqdm(remote_file.iter_content(block_size), desc='{} -> {}'.format(i, link['href']), total = math.ceil(file_size//block_size), unit = 'KB', unit_scale = True, miniters = 0, mininterval = 0): handle.write(data) print('Waiting 10 seconds between downloads') for _ in tqdm(range(10), unit = 's', unit_scale = True): time.sleep(1) print('Waiting 60 seconds between pages') for _ in tqdm(range(60), unit = 's', unit_scale = True): time.sleep(1) with open("last_page", "w") as f: f.write(str(page_index))