diff options
author | Papoteur <papoteur@mageia.org> | 2019-07-03 14:57:20 +0200 |
---|---|---|
committer | Papoteur <papoteur@mageia.org> | 2019-07-03 14:57:20 +0200 |
commit | 6bbc91a0fbde8f030c4513104eeb4513a0be2afe (patch) | |
tree | 02230b625ee99cef1ac83f0cbc9a4bfa7f26823f | |
parent | 56974731cdc0174bc36302684a34ce66110853b7 (diff) | |
download | doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.gz doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.bz2 doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.xz doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.zip |
Avoid rejection from the server, adding known User-Agent
-rw-r--r-- | autodownload.py | 36 |
1 files changed, 18 insertions, 18 deletions
diff --git a/autodownload.py b/autodownload.py index a42ab6e8..234131d2 100644 --- a/autodownload.py +++ b/autodownload.py @@ -6,10 +6,10 @@ server follows this tree: server_path/language/manual.zip - To use it, you have to modify the server URL at the bottom of the script + To use it, you have to pass the base url or to modify the default server URL at the bottom of the script (variable "base_url"), - and modify the last line of the script to tell which kind of manual it is - ("installer" or "mcc") + and pass the name to the script to tell which kind of manual it is + ("installer", "draklive", "netinstall" or "mcc") .. warning:: IT SHOULD NOT BE RUN IN THE GIT CLONE!!! @@ -26,23 +26,24 @@ """ -from urllib.request import urlopen, urlretrieve +from urllib.request import urlopen, Request from urllib.error import HTTPError from html.parser import HTMLParser - +from shutil import copyfileobj manual_name_prefix = {"installer": "DrakX-", - "MCC": "MCC-", + "mcc": "MCC-", "draklive": "DrakLive-", "netinstall": "NetInstall-"} manual_name_suffix = "WebHelp-zipped.zip" - +# to avoid rejection of standard User-Agent of urllib +agent_header = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30'} def usage(): print("python3 autodownload.py <manual> [<server_url>]") - print("\t <manual>: MCC, installer or draklive") + print("\t <manual>: MCC, installer, netinstall or draklive") print("\t <server_url>: (optional) if not given: " + - "http://docteam.mageia.nl/zipped/ will be used") + "http://docteam.mageia.org.uk/zipped/ will be used") class ArchivePageHTMLParser(HTMLParser): def __init__(self): @@ -55,8 +56,9 @@ class ArchivePageHTMLParser(HTMLParser): self.data.append(data[:-1]) def getPage(url): + print(url) parseur = ArchivePageHTMLParser() - for word in urlopen(url).readlines(): + for word in urlopen(Request(url, headers=agent_header)).readlines(): parseur.feed(word.strip().decode('ascii')) return parseur.data @@ -70,16 +72,15 @@ def getZip(url, manual, lang): manual_name_suffix language_url = "/".join([url, lang, filename]) - print("\n%s" % lang) - print(filename) - print(language_url) + print("Get %s\n"%language_url) try: - urlretrieve(language_url, filename) + with urlopen(Request(language_url, headers=agent_header)) as response: + with open(filename,"wb") as dest_file: + copyfileobj(response, dest_file) except HTTPError as e: print(e) - if __name__ == "__main__": import sys if len(sys.argv) not in (2, 3): @@ -93,9 +94,8 @@ if __name__ == "__main__": if len(sys.argv) == 3: base_url = sys.argv[2] else: - base_url = "http://docteam.mageia.nl/zipped" - base_url = base_url + "/" + manual - print(base_url) + base_url = "http://docteam.mageia.org.uk/zipped" + base_url = base_url + "/" + manual language_list = getPage(base_url) for elem in language_list: getZip(base_url, manual, elem) |