summaryrefslogtreecommitdiff
path: root/server/getfeaturedpictures.py
diff options
context:
space:
mode:
Diffstat (limited to 'server/getfeaturedpictures.py')
-rwxr-xr-xserver/getfeaturedpictures.py19
1 files changed, 16 insertions, 3 deletions
diff --git a/server/getfeaturedpictures.py b/server/getfeaturedpictures.py
index c15953f..4f553fd 100755
--- a/server/getfeaturedpictures.py
+++ b/server/getfeaturedpictures.py
@@ -2,7 +2,7 @@
import requests
import json
import time
-import urllib.parse
+from urllib.parse import quote
headers = {'Accept-Encoding':'gzip', 'User-Agent': 'contact pommicket+jigsaw @ gmail.com '}
def make_file_request(cmcontinue):
@@ -19,7 +19,7 @@ def make_file_request(cmcontinue):
def get_urls_of_images(images):
while True:
time.sleep(1)
- url = 'https://commons.wikimedia.org/w/api.php?action=query&format=json&maxlag=5&prop=imageinfo&iiprop=url&titles=' + urllib.parse.quote('|'.join(images))
+ url = 'https://commons.wikimedia.org/w/api.php?action=query&format=json&maxlag=5&prop=imageinfo&iiprop=url&titles=' + quote('|'.join(images))
response = requests.get(url,headers=headers)
if 'X-Database-Lag' in response.headers:
time.sleep(5)
@@ -51,12 +51,25 @@ def get_featured_files():
def get_featured_urls():
with open('featuredpictures_files.txt', 'r') as f:
files = [line.strip() for line in f]
- with open('featuredpictures.txt', 'w') as f:
+ with open('featuredpictures_urls.txt', 'w') as f:
for i in range(0, len(files), 30):
print('got URLs for',i,'files')
batch = files[i:min(len(files), i + 30)]
urls = get_urls(batch)
f.write(''.join(url + '\n' for url in urls))
+
+def combine():
+ with open('featuredpictures_files.txt', 'r') as f:
+ files = [line.strip() for line in f]
+ with open('featuredpictures_urls.txt', 'r') as f:
+ urls = [line.strip() for line in f]
+ assert all(urls)
+ assert all(files)
+ assert len(files) == len(urls)
+ with open('featuredpictures.txt', 'w') as out:
+ for f, u in zip(files, urls):
+ out.write(f'{u} https://commons.wikimedia.org/wiki/{quote(f)}\n')
if __name__ == '__main__':
get_featured_files()
get_featured_urls()
+ combine()