Avoid rejection from the server, adding known User-Agent

author: Papoteur <papoteur@mageia.org> 2019-07-03 14:57:20 +0200
committer: Papoteur <papoteur@mageia.org> 2019-07-03 14:57:20 +0200
commit: 6bbc91a0fbde8f030c4513104eeb4513a0be2afe (patch)
tree: 02230b625ee99cef1ac83f0cbc9a4bfa7f26823f
parent: 56974731cdc0174bc36302684a34ce66110853b7 (diff)
download: doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.gz
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.bz2
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.xz
doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.zip
1 files changed, 18 insertions, 18 deletions
diff --git a/autodownload.py b/autodownload.py
index a42ab6e8..234131d2 100644
--- a/autodownload.py
+++ b/autodownload.py
@@ -6,10 +6,10 @@
     server follows this tree:
         server_path/language/manual.zip
 
-    To use it, you have to modify the server URL at the bottom of the script
+    To use it, you have to pass the base url or to modify the default server URL at the bottom of the script
     (variable "base_url"),
-    and modify the last line of the script to tell which kind of manual it is
-    ("installer" or "mcc")
+    and pass the name to the script to tell which kind of manual it is
+    ("installer", "draklive", "netinstall" or "mcc")
 
     .. warning:: IT SHOULD NOT BE RUN IN THE GIT CLONE!!!
 
@@ -26,23 +26,24 @@
 
 
 """
-from urllib.request import urlopen, urlretrieve
+from urllib.request import urlopen, Request
 from urllib.error import HTTPError
 from html.parser import HTMLParser
-
+from shutil import copyfileobj
 
 manual_name_prefix = {"installer": "DrakX-",
-                      "MCC": "MCC-",
+                      "mcc": "MCC-",
                       "draklive": "DrakLive-",
                       "netinstall": "NetInstall-"}
 manual_name_suffix = "WebHelp-zipped.zip"
-
+# to avoid rejection of standard User-Agent of urllib
+agent_header = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.1 Safari/603.1.30'}
 
 def usage():
     print("python3 autodownload.py <manual> [<server_url>]")
-    print("\t <manual>: MCC, installer or draklive")
+    print("\t <manual>: MCC, installer, netinstall or draklive")
     print("\t <server_url>: (optional) if not given: " +
-          "http://docteam.mageia.nl/zipped/ will be used")
+          "http://docteam.mageia.org.uk/zipped/ will be used")
 
 class ArchivePageHTMLParser(HTMLParser):
     def __init__(self):
@@ -55,8 +56,9 @@ class ArchivePageHTMLParser(HTMLParser):
             self.data.append(data[:-1])
 
 def getPage(url):
+    print(url)
     parseur = ArchivePageHTMLParser()
-    for word in urlopen(url).readlines():
+    for word in urlopen(Request(url, headers=agent_header)).readlines():
         parseur.feed(word.strip().decode('ascii'))
     return parseur.data
 
@@ -70,16 +72,15 @@ def getZip(url, manual, lang):
                    manual_name_suffix
 
     language_url = "/".join([url, lang, filename])
-    print("\n%s" % lang)
-    print(filename)
-    print(language_url)
+    print("Get %s\n"%language_url)
     try:
-        urlretrieve(language_url, filename)
+        with urlopen(Request(language_url, headers=agent_header)) as response:
+            with open(filename,"wb") as dest_file:
+                copyfileobj(response, dest_file)
     except HTTPError as e:
         print(e)
 
 
-
 if __name__ == "__main__":
     import sys
     if len(sys.argv) not in (2, 3):
@@ -93,9 +94,8 @@ if __name__ == "__main__":
             if len(sys.argv) == 3:
                 base_url = sys.argv[2]
             else:
-                base_url = "http://docteam.mageia.nl/zipped"
-            base_url = base_url + "/" + manual 
-            print(base_url)
+                base_url = "http://docteam.mageia.org.uk/zipped"
+            base_url = base_url + "/" + manual
             language_list = getPage(base_url)
             for elem in language_list:
                 getZip(base_url, manual, elem)
author	Papoteur <papoteur@mageia.org>	2019-07-03 14:57:20 +0200
committer	Papoteur <papoteur@mageia.org>	2019-07-03 14:57:20 +0200
commit	6bbc91a0fbde8f030c4513104eeb4513a0be2afe (patch)
tree	02230b625ee99cef1ac83f0cbc9a4bfa7f26823f
parent	56974731cdc0174bc36302684a34ce66110853b7 (diff)
download	doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.gz doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.bz2 doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.tar.xz doc-6bbc91a0fbde8f030c4513104eeb4513a0be2afe.zip