aboutsummaryrefslogtreecommitdiffstats
path: root/autodownload.py
diff options
context:
space:
mode:
authorPapoteur <papoteur@mageia.org>2019-07-03 14:57:20 +0200
committerPapoteur <papoteur@mageia.org>2019-07-03 14:57:20 +0200
commit6bbc91a0fbde8f030c4513104eeb4513a0be2afe (patch)
tree02230b625ee99cef1ac83f0cbc9a4bfa7f26823f /autodownload.py
parent56974731cdc0174bc36302684a34ce66110853b7 (diff)
downloaddoc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.gz
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.bz2
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.xz
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.zip
Avoid rejection from the server, adding known User-Agent
Diffstat (limited to 'autodownload.py')
-rw-r--r--autodownload.py36
1 files changed, 18 insertions, 18 deletions
diff --git a/autodownload.py b/autodownload.py
index a42ab6e8..234131d2 100644
--- a/autodownload.py
+++ b/autodownload.py
@@ -6,10 +6,10 @@
server follows this tree:
server_path/language/manual.zip
- To use it, you have to modify the server URL at the bottom of the script
+ To use it, you have to pass the base url or to modify the default server URL at the bottom of the script
(variable "base_url"),
- and modify the last line of the script to tell which kind of manual it is
- ("installer" or "mcc")
+ and pass the name to the script to tell which kind of manual it is
+ ("installer", "draklive", "netinstall" or "mcc")
.. warning:: IT SHOULD NOT BE RUN IN THE GIT CLONE!!!
@@ -26,23 +26,24 @@
"""
-from urllib.request import urlopen, urlretrieve
+from urllib.request import urlopen, Request
from urllib.error import HTTPError
from html.parser import HTMLParser
-
+from shutil import copyfileobj
manual_name_prefix = {"installer": "DrakX-",
- "MCC": "MCC-",
+ "mcc": "MCC-",
"draklive": "DrakLive-",
"netinstall": "NetInstall-"}
manual_name_suffix = "WebHelp-zipped.zip"
-
+# to avoid rejection of standard User-Agent of urllib
+agent_header = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30'}
def usage():
print("python3 autodownload.py <manual> [<server_url>]")
- print("\t <manual>: MCC, installer or draklive")
+ print("\t <manual>: MCC, installer, netinstall or draklive")
print("\t <server_url>: (optional) if not given: " +
- "http://docteam.mageia.nl/zipped/ will be used")
+ "http://docteam.mageia.org.uk/zipped/ will be used")
class ArchivePageHTMLParser(HTMLParser):
def __init__(self):
@@ -55,8 +56,9 @@ class ArchivePageHTMLParser(HTMLParser):
self.data.append(data[:-1])
def getPage(url):
+ print(url)
parseur = ArchivePageHTMLParser()
- for word in urlopen(url).readlines():
+ for word in urlopen(Request(url, headers=agent_header)).readlines():
parseur.feed(word.strip().decode('ascii'))
return parseur.data
@@ -70,16 +72,15 @@ def getZip(url, manual, lang):
manual_name_suffix
language_url = "/".join([url, lang, filename])
- print("\n%s" % lang)
- print(filename)
- print(language_url)
+ print("Get %s\n"%language_url)
try:
- urlretrieve(language_url, filename)
+ with urlopen(Request(language_url, headers=agent_header)) as response:
+ with open(filename,"wb") as dest_file:
+ copyfileobj(response, dest_file)
except HTTPError as e:
print(e)
-
if __name__ == "__main__":
import sys
if len(sys.argv) not in (2, 3):
@@ -93,9 +94,8 @@ if __name__ == "__main__":
if len(sys.argv) == 3:
base_url = sys.argv[2]
else:
- base_url = "http://docteam.mageia.nl/zipped"
- base_url = base_url + "/" + manual
- print(base_url)
+ base_url = "http://docteam.mageia.org.uk/zipped"
+ base_url = base_url + "/" + manual
language_list = getPage(base_url)
for elem in language_list:
getZip(base_url, manual, elem)