aboutsummaryrefslogtreecommitdiffstats
path: root/MgaRepo/binrepo.py
blob: a66df099807a1d1a207990aa2778d7a1c49c6338 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
from MgaRepo import Error, config, mirror, layout
from MgaRepo.util import execcmd, rellink, get_helper
from MgaRepo.svn import SVN

from tqdm import tqdm
import sys
import os
import string
import stat
import shutil
import re
import tempfile
import hashlib
import urllib.parse
import httplib2
import subprocess

from io import StringIO

SOURCES_FILE = "sha1.lst"

class ChecksumError(Error):
    pass

def is_binary(path):
    raw = config.get("binrepo", "upload-match",
            "\.(7z|Z|bin|bz2|cpio|db|deb|egg|gem|gz|jar|jisp|lzma|"\
               "pdf|pgn\\.gz|pk3|png|rpm|run|sdz|smzip|tar|tbz|"\
               "tbz2|tgz|ttf|uqm|wad|war|xar|xpi|xz|zip|wav|mp3|ogg|"\
	       "jpg|png|gif|avi|mpg|mpeg|rar)$")
    maxsize = config.getint("binrepo", "upload-match-size", "1048576") # 1MiB
    expr = re.compile(raw)
    name = os.path.basename(path)
    if expr.search(name):
        return True
    st = os.stat(path)
    if st[stat.ST_SIZE] >= maxsize:
        return True
    fd = open(path, 'rb')
    if b'\0' in bytes(fd.read(0x10000)):
        return True
    return False

def find_binaries(paths):
    new = []
    for path in paths:
        if os.path.isdir(path):
            for name in os.listdir(path):
                fpath = os.path.join(path, name)
                if is_binary(fpath):
                    new.append(fpath)
        else:
            if is_binary(path):
                new.append(path)
    return new

def download_binary(topdir, sha1, filename):
    fmt = config.get("global", "download-command",
	    "wget -c -O '$dest' $url")
    url = config.get("binrepo", "download_url",
	    "https://binrepo.mageia.org/")
    url = mirror.normalize_path(url + "/" + sha1)
    dest = os.path.join(topdir, 'SOURCES', filename)
    if os.path.exists(dest):
        if file_hash(dest) == sha1:
            return 1
        else:
            raise Error("File with incorrect sha1sum: %s" % dest)
    context = {"dest": dest, "url": url}
    try:
        cmd = string.Template(fmt).substitute(context)
    except KeyError as e:
        raise Error("invalid variable %r in download-command "\
		"configuration option" % e)
    try:
        status, output = execcmd(cmd, show=True)
    except Error as e:
        os.unlink(dest)
        raise Error("Could not download file %s\n" % url)

def download_binaries(topdir):
    spath = sources_path(topdir)
    if not os.path.exists(spath):
        raise Error("'%s' was not found" % spath)
    entries = parse_sources(spath)
    for name, sha1 in entries.items():
        download_binary(topdir, sha1, name)

def binary_exists(sha1sum):
    dlurl = config.get("binrepo", "download_url",
	    "https://binrepo.mageia.org/")
    dlurl = mirror.normalize_path(dlurl + "/" + sha1sum)
    h = httplib2.Http()
    resp, content = h.request(dlurl, 'HEAD')
    return resp.status == 200

def upload_binary(topdir, filename):
    filepath = os.path.join(topdir, 'SOURCES', filename)
    if not os.path.exists(filepath):
        raise Error("'%s' was not found" % filepath)
    sha1sum = file_hash(filepath)
    if binary_exists(sha1sum):
        return
    host = config.get("binrepo", "upload_host")
    # upload_bin_helper by default: /usr/local/bin/wrapper.upload-bin
    upload_bin_helper = get_helper("upload-bin")
    command = ["ssh", host, upload_bin_helper, filename]
    try:
        b = os.path.getsize(filepath)
    except Error as e:
        raise Error("Could not open file %s\n" % filepath)
    with open(filepath, "rb") as f:
        # file is sent chunk by chunk to allow displaying of a progress bar
        bs = 4096 * 128
        written = 0
        ncuts = int(b / bs)
        pbar = tqdm(total=b, unit='B', unit_scale=True, unit_divisor=1024, desc=filename)
        while ncuts <= 100:
            bs = int(bs / 2)
            ncuts = int(b / bs)
        p = subprocess.Popen(command, stdin=subprocess.PIPE)
        for i in range(0, int(ncuts) + 1):
                buf = f.read(bs)
                p.stdin.write(buf)
                written += len(buf)
                pbar.update(len(buf))
                p.stdin.flush()
        pbar.close()
        p.communicate()

def import_binaries(topdir, pkgname):
    """Import all binaries from a given package checkout

    @topdir: the path to the svn checkout
    """
    sourcesdir = os.path.join(topdir, "SOURCES")
    binaries = find_binaries([sourcesdir])
    for path in binaries:
        upload_binary(topdir, os.path.basename(path))
    update_sources(topdir, added=binaries)
    svn = SVN()
    svn.add(sources_path(topdir))

def parse_sources(path):
    entries = {}
    try:
        f = open(path, encoding="utf-8")
    except IOError:
        return []

    for rawline in f:
        line = rawline.strip()
        try:
            sum, name = line.split(None, 1)
        except ValueError:
            # failed to unpack, line format error
            raise Error("invalid line in sources file: %s" % rawline)
        entries[name] = sum
    return entries

def file_hash(path):
    sum = hashlib.sha1()
    with open(path, 'rb') as f:
      while True:
        block = f.read(4096)
        if not block:
            break
        sum.update(block)
    f.close()
    return sum.hexdigest()

def sources_path(topdir):
    path = os.path.join(topdir, "SOURCES", SOURCES_FILE)
    return path

def update_sources(topdir, added=[], removed=[]):
    path = sources_path(topdir)
    entries = {}
    if os.path.isfile(path):
        entries = parse_sources(path)
    f = open(path, "w") # open before calculating hashes
    for name in removed:
        if name in entries:
            del entries[name]
    for added_path in added:
        name = os.path.basename(added_path)
        entries[name] = file_hash(added_path)
    for name in sorted(entries):
        f.write("%s  %s\n" % (entries[name], name))
    f.close()

def check_sources(topdir):
    """Verify hashes against binrepo files

    Returns a list of files that differ. Files have do not exist locally are
    ignored.
    """
    changed = []
    path = sources_path(topdir)
    if os.path.isfile(path):
        entries = parse_sources(path)
        for filename in entries:
            filepath = os.path.join(topdir, 'SOURCES', filename)
            if os.path.exists(filepath):
                name = os.path.basename(filepath)
                if entries[name] != file_hash(filepath):
                    changed.append(name)
    return changed