#/usr/bin/env python3 """ Automate the download of availlable manual. This script automates the download of manual for every languages availlable, from the specifyed server (see at the bottom of the script) as long as the server follows this tree: server_path/language/manual.zip To use it, you have to modify the server URL at the bottom of the script (variable "base_url"), and modify the last line of the script to tell which kind of manual it is ("installer" or "mcc") .. warning:: IT SHOULD NOT BE RUN IN THE GIT CLONE!!! To use the update script, you need a clean git clone, so you must not polluate it with the downloaded manuals drakx-installer-help currently doesn't follow the above path rules so you need to do: wget --directory-prefix=./drakx-installer-help/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/drakx-installer-help/ similarly you can use wget --directory-prefix=./MCC/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/MCC/ wget --directory-prefix=./netinstall/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/netinstall/ wget --directory-prefix=./installer/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/installer/ wget --directory-prefix=./draklive/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/draklive/ """ from urllib.request import urlopen, urlretrieve from urllib.error import HTTPError from html.parser import HTMLParser manual_name_prefix = {"installer": "DrakX-", "MCC": "MCC-", "draklive": "DrakLive-", "netinstall": "NetInstall-"} manual_name_suffix = "WebHelp-zipped.zip" def usage(): print("python3 autodownload.py []") print("\t : MCC, installer or draklive") print("\t : (optional) if not given: " + "http://docteam.mageia.nl/zipped/ will be used") class ArchivePageHTMLParser(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.recording = 0 self.data = [] def handle_data(self, data): if data.endswith('/'): self.data.append(data[:-1]) def getPage(url): parseur = ArchivePageHTMLParser() for word in urlopen(url).readlines(): parseur.feed(word.strip().decode('ascii')) return parseur.data def getZip(url, manual, lang): if lang == 'en' and manual == "mcc": filename = manual_name_prefix[manual] + \ manual_name_suffix else: filename = manual_name_prefix[manual] + \ lang.upper() + '-' + \ manual_name_suffix language_url = "/".join([url, lang, filename]) print("\n%s" % lang) print(filename) print(language_url) try: urlretrieve(language_url, filename) except HTTPError as e: print(e) if __name__ == "__main__": import sys if len(sys.argv) not in (2, 3): usage() else: # check manual manual = sys.argv[1] if manual not in manual_name_prefix.keys(): usage() else: if len(sys.argv) == 3: base_url = sys.argv[2] else: base_url = "http://docteam.mageia.nl/zipped" base_url = base_url + "/" + manual print(base_url) language_list = getPage(base_url) for elem in language_list: getZip(base_url, manual, elem)