aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfilip <filip.komar@gmail.com>2021-03-14 23:29:23 +0100
committerfilip <filip.komar@gmail.com>2021-03-14 23:29:23 +0100
commita7dd5743f97aa92d94424651ad532c65002e1fea (patch)
tree5bdef848dfd8f060ae87d025dc015a0fe1c1959e
parent1768448fc898c2c75ae5cea9880780c4e3799398 (diff)
downloadwww-a7dd5743f97aa92d94424651ad532c65002e1fea.tar
www-a7dd5743f97aa92d94424651ad532c65002e1fea.tar.gz
www-a7dd5743f97aa92d94424651ad532c65002e1fea.tar.bz2
www-a7dd5743f97aa92d94424651ad532c65002e1fea.tar.xz
www-a7dd5743f97aa92d94424651ad532c65002e1fea.zip
more rubust parsing of mirror list with improved error testing and reporting
-rw-r--r--lib/Downloads.php87
1 files changed, 48 insertions, 39 deletions
diff --git a/lib/Downloads.php b/lib/Downloads.php
index 4ca48383f..c31381d59 100644
--- a/lib/Downloads.php
+++ b/lib/Downloads.php
@@ -164,52 +164,62 @@ class Downloads
}
$num_of_all_mirrs = count($data);
$num_of_tested_mirrs = 0;
+ $common_patern = '/distrib/8/i586';
+ // this huge regex magic achieved with a lot of help from great https://regex101.com/
+ $single_mirror_parsing_regex = '/\s*continent\s*=\s*(?<continent>\w*)\s*,\s*';
+ $single_mirror_parsing_regex .= '\s*zone\s*=\s*(?<zone>\w*)\s*,\s*';
+ $single_mirror_parsing_regex .= '\s*country\s*=\s*(?<country>\w*)\s*,\s*';
+ $single_mirror_parsing_regex .= '(?:\s*city\s*=\s*(?<city>[\S ,]*)\s*,\s*)?';
+ $single_mirror_parsing_regex .= '\s*latitude\s*=\s*[-]?\d*\.?\d*\s*,\s*';
+ $single_mirror_parsing_regex .= '\s*longitude\s*=\s*[-]?\d*\.?\d*\s*,\s*';
+ $single_mirror_parsing_regex .= '\s*version\s*=\s*\w*\s*,\s*arch\s*=\s*\w*\s*,\s*type\s*=\s*\w*\s*,\s*';
+ $single_mirror_parsing_regex .= '\s*url\s*=\s*(?<url>\S*)\s*/m';
foreach ($data as $line) {
$num_of_tested_mirrs++;
$mirrs_processed = sprintf("%.0f %%", $num_of_tested_mirrs / $num_of_all_mirrs * 100);
- $line = explode(',', trim($line));
- $m = array();
- foreach ($line as $val) {
- $val = explode('=', trim($val));
- if (!empty($val[1])) {
- $m[$val[0]] = $val[1];
- } else {
- // workaround to add second values with unescaped comma in csv line to the previous (valid) value
- end($m);
- $m[key($m)] = $m[key($m)] . ", " . $val[0];
- if ($refresh_country_and_city_arrays == true) {
- // store $line to examine faults
- $faults[] = $line;
- }
- }
+ $regex_error = preg_match_all($single_mirror_parsing_regex, $line, $matches, PREG_SET_ORDER, 0);
+ if (false === $regex_error) {
+ $faults['error: regex parse failed'][] = $line;
+ echo 'Regex parse failed error in line ' . $line;
+ continue;
+ }
+ $m = $matches[0];
+ if ('' == $m['url']) {
+ $faults['error: parse url'][] = $line;
+ echo 'Url parse error in line ' . $line;
+ continue;
+ }
+ if ('' == $m['continent']) {
+ $faults['error: parse continent'][] = $line;
+ echo 'Continent parse error in line ' . $line;
+ continue;
+ } else {
+ $mirr_continent = $m['continent'];
+ }
+ if (false === strpos($m['url'], $common_patern)) {
+ $faults['error: url distrib missing'][] = $line;
+ echo "Url no $common_patern error in line $line";
+ continue;
}
$pu = parse_url($m['url']);
if (in_array($pu['scheme'], array('http', 'https', 'ftp'))) {
$item = array(
- 'zone' => isset($m['zone']) ? $m['zone'] : '?',
- 'country' => isset($m['country']) ? $m['country'] : '?',
- 'city' => isset($m['city']) ? $m['city'] : '-',
+ 'zone' => ('' != $m['zone']) ? $m['zone'] : '?',
+ 'country' => ('' != $m['country']) ? $m['country'] : '?',
+ 'city' => ('' != $m['city']) ? trim($m['city']) : '-',
// BEWARE of the path substitution here. Must match.
- 'url' => str_replace('/distrib/8/i586', '', $m['url'])
+ 'url' => str_replace($common_patern, '', $m['url'])
);
if ($refresh_country_and_city_arrays == true) {
// prepare details for i18n
- if (isset($m['city'])) {
- if (isset($cities_i18n[$m['city']])) {
- $mirror_cities[$m['city']] = $cities_i18n[$m['city']];
+ if ('-' != $item['city']) {
+ if (isset($cities_i18n[$item['city']])) {
+ $mirror_cities[$item['city']] = $cities_i18n[$item['city']];
} else {
- $mirror_cities[$m['city']] = '_r("' . $m['city'] . '") // new city (not yet in /en/downloads/get/lib.php)';
+ $mirror_cities[$item['city']] = '_r("' . $item['city'] . '") // new city (not yet in /en/downloads/get/lib.php)';
}
- } else if ($refresh_country_and_city_arrays == true) {
- // store data in $faults array to easy find location manually with latitude and longitude if needed
- $latitude = $m['latitude'];
- $longitude = $m['longitude'];
- $map_magnification = 5;
- $map_data = "set latitude is $latitude and longitude $longitude";
- $map_data .= " (<a href=\"https://www.openstreetmap.org/search?query=" . $latitude;
- $map_data .= "%20" . $longitude . "#map=" . $map_magnification . "/" . $latitude;
- $map_data .= "/" . $longitude . "\" >OpenStreetMap</a>)";
- $faults[$map_data] = $line;
+ } else {
+ $faults['notice: no city given'][] = $line;
}
if (isset($countries[$item['country']])) {
$mirror_countries[$item['country']] = $countries[$item['country']];
@@ -221,18 +231,17 @@ class Downloads
if ($documentation) {
$test_file = $item['url'].'/doc/mga8/date.txt';
} else if ($mirrorlist) {
- $test_file = $item['url'].'/distrib/8/x86_64/media/core/updates/repodata/repomd.xml';
+ $test_file = $item['url'].'/distrib/8/x86_64/media/core/updates/repodata/repomd.xml'; // when changing, please change $common_patern too
} else {
$test_file = $item['url'].'/iso/8/torrents/Mageia-8-Live-Xfce-i586.torrent';
}
if (false === @file_get_contents($test_file)) {
$num_dn++;
- echo "Down $num_dn (up: $num_up, $mirrs_processed mirrors tested) $test_file \n";
+ echo "Down $num_dn (up: $num_up, about $mirrs_processed mirrors tested) $test_file \n";
} else {
$num_up++;
- echo "Up $num_up (down: $num_dn, $mirrs_processed mirrors tested) $test_file \n";
-// $mirrors[$m['country']][] = $item;
- $mirrors['_C:' . $m['continent']][] = $item;
+ echo "Up $num_up (down: $num_dn, about $mirrs_processed mirrors tested) $test_file \n";
+ $mirrors['_C:' . $mirr_continent][] = $item;
}
}
}
@@ -264,7 +273,7 @@ class Downloads
var_export($mirror_cities);
echo ";\nArray of countries for i18n: ";
var_export($countries_u);
- echo ";\nArray of faults: ";
+ echo ";\nArray of faults and warnings: ";
var_export($faults);
echo ";\n";
}