From a7dd5743f97aa92d94424651ad532c65002e1fea Mon Sep 17 00:00:00 2001 From: filip Date: Sun, 14 Mar 2021 23:29:23 +0100 Subject: more rubust parsing of mirror list with improved error testing and reporting --- lib/Downloads.php | 87 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/lib/Downloads.php b/lib/Downloads.php index 4ca48383f..c31381d59 100644 --- a/lib/Downloads.php +++ b/lib/Downloads.php @@ -164,52 +164,62 @@ class Downloads } $num_of_all_mirrs = count($data); $num_of_tested_mirrs = 0; + $common_patern = '/distrib/8/i586'; + // this huge regex magic achieved with a lot of help from great https://regex101.com/ + $single_mirror_parsing_regex = '/\s*continent\s*=\s*(?\w*)\s*,\s*'; + $single_mirror_parsing_regex .= '\s*zone\s*=\s*(?\w*)\s*,\s*'; + $single_mirror_parsing_regex .= '\s*country\s*=\s*(?\w*)\s*,\s*'; + $single_mirror_parsing_regex .= '(?:\s*city\s*=\s*(?[\S ,]*)\s*,\s*)?'; + $single_mirror_parsing_regex .= '\s*latitude\s*=\s*[-]?\d*\.?\d*\s*,\s*'; + $single_mirror_parsing_regex .= '\s*longitude\s*=\s*[-]?\d*\.?\d*\s*,\s*'; + $single_mirror_parsing_regex .= '\s*version\s*=\s*\w*\s*,\s*arch\s*=\s*\w*\s*,\s*type\s*=\s*\w*\s*,\s*'; + $single_mirror_parsing_regex .= '\s*url\s*=\s*(?\S*)\s*/m'; foreach ($data as $line) { $num_of_tested_mirrs++; $mirrs_processed = sprintf("%.0f %%", $num_of_tested_mirrs / $num_of_all_mirrs * 100); - $line = explode(',', trim($line)); - $m = array(); - foreach ($line as $val) { - $val = explode('=', trim($val)); - if (!empty($val[1])) { - $m[$val[0]] = $val[1]; - } else { - // workaround to add second values with unescaped comma in csv line to the previous (valid) value - end($m); - $m[key($m)] = $m[key($m)] . ", " . $val[0]; - if ($refresh_country_and_city_arrays == true) { - // store $line to examine faults - $faults[] = $line; - } - } + $regex_error = preg_match_all($single_mirror_parsing_regex, $line, $matches, PREG_SET_ORDER, 0); + if (false === $regex_error) { + $faults['error: regex parse failed'][] = $line; + echo 'Regex parse failed error in line ' . $line; + continue; + } + $m = $matches[0]; + if ('' == $m['url']) { + $faults['error: parse url'][] = $line; + echo 'Url parse error in line ' . $line; + continue; + } + if ('' == $m['continent']) { + $faults['error: parse continent'][] = $line; + echo 'Continent parse error in line ' . $line; + continue; + } else { + $mirr_continent = $m['continent']; + } + if (false === strpos($m['url'], $common_patern)) { + $faults['error: url distrib missing'][] = $line; + echo "Url no $common_patern error in line $line"; + continue; } $pu = parse_url($m['url']); if (in_array($pu['scheme'], array('http', 'https', 'ftp'))) { $item = array( - 'zone' => isset($m['zone']) ? $m['zone'] : '?', - 'country' => isset($m['country']) ? $m['country'] : '?', - 'city' => isset($m['city']) ? $m['city'] : '-', + 'zone' => ('' != $m['zone']) ? $m['zone'] : '?', + 'country' => ('' != $m['country']) ? $m['country'] : '?', + 'city' => ('' != $m['city']) ? trim($m['city']) : '-', // BEWARE of the path substitution here. Must match. - 'url' => str_replace('/distrib/8/i586', '', $m['url']) + 'url' => str_replace($common_patern, '', $m['url']) ); if ($refresh_country_and_city_arrays == true) { // prepare details for i18n - if (isset($m['city'])) { - if (isset($cities_i18n[$m['city']])) { - $mirror_cities[$m['city']] = $cities_i18n[$m['city']]; + if ('-' != $item['city']) { + if (isset($cities_i18n[$item['city']])) { + $mirror_cities[$item['city']] = $cities_i18n[$item['city']]; } else { - $mirror_cities[$m['city']] = '_r("' . $m['city'] . '") // new city (not yet in /en/downloads/get/lib.php)'; + $mirror_cities[$item['city']] = '_r("' . $item['city'] . '") // new city (not yet in /en/downloads/get/lib.php)'; } - } else if ($refresh_country_and_city_arrays == true) { - // store data in $faults array to easy find location manually with latitude and longitude if needed - $latitude = $m['latitude']; - $longitude = $m['longitude']; - $map_magnification = 5; - $map_data = "set latitude is $latitude and longitude $longitude"; - $map_data .= " (OpenStreetMap)"; - $faults[$map_data] = $line; + } else { + $faults['notice: no city given'][] = $line; } if (isset($countries[$item['country']])) { $mirror_countries[$item['country']] = $countries[$item['country']]; @@ -221,18 +231,17 @@ class Downloads if ($documentation) { $test_file = $item['url'].'/doc/mga8/date.txt'; } else if ($mirrorlist) { - $test_file = $item['url'].'/distrib/8/x86_64/media/core/updates/repodata/repomd.xml'; + $test_file = $item['url'].'/distrib/8/x86_64/media/core/updates/repodata/repomd.xml'; // when changing, please change $common_patern too } else { $test_file = $item['url'].'/iso/8/torrents/Mageia-8-Live-Xfce-i586.torrent'; } if (false === @file_get_contents($test_file)) { $num_dn++; - echo "Down $num_dn (up: $num_up, $mirrs_processed mirrors tested) $test_file \n"; + echo "Down $num_dn (up: $num_up, about $mirrs_processed mirrors tested) $test_file \n"; } else { $num_up++; - echo "Up $num_up (down: $num_dn, $mirrs_processed mirrors tested) $test_file \n"; -// $mirrors[$m['country']][] = $item; - $mirrors['_C:' . $m['continent']][] = $item; + echo "Up $num_up (down: $num_dn, about $mirrs_processed mirrors tested) $test_file \n"; + $mirrors['_C:' . $mirr_continent][] = $item; } } } @@ -264,7 +273,7 @@ class Downloads var_export($mirror_cities); echo ";\nArray of countries for i18n: "; var_export($countries_u); - echo ";\nArray of faults: "; + echo ";\nArray of faults and warnings: "; var_export($faults); echo ";\n"; } -- cgit v1.2.1