1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
#/usr/bin/env python3
""" Automate the download of availlable manual.
This script automates the download of manual for every languages availlable,
from the specifyed server (see at the bottom of the script) as long as the
server follows this tree:
server_path/language/manual.zip
To use it, you have to modify the server URL at the bottom of the script
(variable "base_url"),
and modify the last line of the script to tell which kind of manual it is
("installer" or "mcc")
"""
from urllib.request import urlopen, urlretrieve
from urllib.error import HTTPError
from html.parser import HTMLParser
manual_name_prefix = {"installer": "DrakX-",
"mcc": "MCC-"}
manual_name_suffix = "WebHelp-zipped.zip"
class ArchivePageHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.recording = 0
self.data = []
def handle_data(self, data):
if data.endswith('/'):
self.data.append(data[:-1])
def getPage(url):
parseur = ArchivePageHTMLParser()
for word in urlopen(url).readlines():
parseur.feed(word.strip().decode('ascii'))
return parseur.data
def getZip(url, manual, lang):
if lang == 'en' and manual == "mcc":
filename = manual_name_prefix[manual] + \
manual_name_suffix
else:
filename = manual_name_prefix[manual] + \
lang.upper() + '-' + \
manual_name_suffix
language_url = "/".join([url, lang, filename])
print("\n%s" % lang)
print(filename)
print(language_url)
try:
urlretrieve(language_url, filename)
except HTTPError as e:
print(e)
if __name__ == "__main__":
base_url = "http://waesvanm.home.xs4all.nl/Mageia_4_documentation/zipped/installer/"
language_list = getPage(base_url)
for elem in language_list:
getZip(base_url, "installer", elem)
|