1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
#/usr/bin/env python3
""" Automate the download of availlable manual.
This script automates the download of manual for every languages availlable,
from the specifyed server (see at the bottom of the script) as long as the
server follows this tree:
server_path/language/manual.zip
To use it, you have to pass the base url or to modify the default server URL at the bottom of the script
(variable "base_url"),
and pass the name to the script to tell which kind of manual it is
("installer", "draklive", "netinstall" or "mcc")
.. warning:: IT SHOULD NOT BE RUN IN THE GIT CLONE!!!
To use the update script, you need a clean git clone, so you must not
polluate it with the downloaded manuals
drakx-installer-help currently doesn't follow the above path rules so you need to do:
wget --directory-prefix=./drakx-installer-help/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/drakx-installer-help/
similarly you can use
wget --directory-prefix=./MCC/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/MCC/
wget --directory-prefix=./netinstall/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/netinstall/
wget --directory-prefix=./installer/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/installer/
wget --directory-prefix=./draklive/ --timestamping --no-host-directories --recursive --no-directories --no-parent --accept zip http://docteam.mageia.nl/zipped/draklive/
"""
from urllib.request import urlopen, Request
from urllib.error import HTTPError
from html.parser import HTMLParser
from shutil import copyfileobj
manual_name_prefix = {"installer": "DrakX-",
"mcc": "MCC-",
"draklive": "DrakLive-",
"netinstall": "NetInstall-"}
manual_name_suffix = "WebHelp-zipped.zip"
# to avoid rejection of standard User-Agent of urllib
agent_header = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30'}
def usage():
print("python3 autodownload.py <manual> [<server_url>]")
print("\t <manual>: MCC, installer, netinstall or draklive")
print("\t <server_url>: (optional) if not given: " +
"http://docteam.mageia.org.uk/zipped/ will be used")
class ArchivePageHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.recording = 0
self.data = []
def handle_data(self, data):
if data.endswith('/'):
self.data.append(data[:-1])
def getPage(url):
print(url)
parseur = ArchivePageHTMLParser()
for word in urlopen(Request(url, headers=agent_header)).readlines():
parseur.feed(word.strip().decode('ascii'))
return parseur.data
def getZip(url, manual, lang):
if lang == 'en' and manual == "mcc":
filename = manual_name_prefix[manual] + \
manual_name_suffix
else:
filename = manual_name_prefix[manual] + \
lang.upper() + '-' + \
manual_name_suffix
language_url = "/".join([url, lang, filename])
print("Get %s\n"%language_url)
try:
with urlopen(Request(language_url, headers=agent_header)) as response:
with open(filename,"wb") as dest_file:
copyfileobj(response, dest_file)
except HTTPError as e:
print(e)
if __name__ == "__main__":
import sys
if len(sys.argv) not in (2, 3):
usage()
else:
# check manual
manual = sys.argv[1]
if manual not in manual_name_prefix.keys():
usage()
else:
if len(sys.argv) == 3:
base_url = sys.argv[2]
else:
base_url = "http://docteam.mageia.org.uk/zipped"
base_url = base_url + "/" + manual
language_list = getPage(base_url)
for elem in language_list:
getZip(base_url, manual, elem)
|