diff options
Diffstat (limited to 'server/getfeaturedpictures.py')
-rwxr-xr-x | server/getfeaturedpictures.py | 19 |
1 files changed, 16 insertions, 3 deletions
diff --git a/server/getfeaturedpictures.py b/server/getfeaturedpictures.py index c15953f..4f553fd 100755 --- a/server/getfeaturedpictures.py +++ b/server/getfeaturedpictures.py @@ -2,7 +2,7 @@ import requests import json import time -import urllib.parse +from urllib.parse import quote headers = {'Accept-Encoding':'gzip', 'User-Agent': 'contact pommicket+jigsaw @ gmail.com '} def make_file_request(cmcontinue): @@ -19,7 +19,7 @@ def make_file_request(cmcontinue): def get_urls_of_images(images): while True: time.sleep(1) - url = 'https://commons.wikimedia.org/w/api.php?action=query&format=json&maxlag=5&prop=imageinfo&iiprop=url&titles=' + urllib.parse.quote('|'.join(images)) + url = 'https://commons.wikimedia.org/w/api.php?action=query&format=json&maxlag=5&prop=imageinfo&iiprop=url&titles=' + quote('|'.join(images)) response = requests.get(url,headers=headers) if 'X-Database-Lag' in response.headers: time.sleep(5) @@ -51,12 +51,25 @@ def get_featured_files(): def get_featured_urls(): with open('featuredpictures_files.txt', 'r') as f: files = [line.strip() for line in f] - with open('featuredpictures.txt', 'w') as f: + with open('featuredpictures_urls.txt', 'w') as f: for i in range(0, len(files), 30): print('got URLs for',i,'files') batch = files[i:min(len(files), i + 30)] urls = get_urls(batch) f.write(''.join(url + '\n' for url in urls)) + +def combine(): + with open('featuredpictures_files.txt', 'r') as f: + files = [line.strip() for line in f] + with open('featuredpictures_urls.txt', 'r') as f: + urls = [line.strip() for line in f] + assert all(urls) + assert all(files) + assert len(files) == len(urls) + with open('featuredpictures.txt', 'w') as out: + for f, u in zip(files, urls): + out.write(f'{u} https://commons.wikimedia.org/wiki/{quote(f)}\n') if __name__ == '__main__': get_featured_files() get_featured_urls() + combine() |