From 4271523522ac8af56aa0f8ea07dcb698f08a9085 Mon Sep 17 00:00:00 2001 From: filip Date: Wed, 31 Dec 2014 19:15:24 +0100 Subject: speed up our report about differences between Tx and our git for Cauldron resources with separation to languages and resources + some other improvements NOTE: there is a different naming convention between Tx and git for some lanuages: Tx git sr@latin vs. sr@Latn.po sr@latin vs. sr@latin.po is exception in: http://gitweb.mageia.org/software/i18n/system-config-printer/plain/po uz@Cyrl vs. uz@cyrillic.po en_GB vs. en_gb.po is exception in: http://gitweb.mageia.org/web/identity/plain/lib/CatDap/I18N Should we do some cleanup? --- langs/report_tx_git.php | 280 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 240 insertions(+), 40 deletions(-) (limited to 'langs/report_tx_git.php') diff --git a/langs/report_tx_git.php b/langs/report_tx_git.php index 19761b141..4b8df4828 100644 --- a/langs/report_tx_git.php +++ b/langs/report_tx_git.php @@ -42,6 +42,20 @@ if (in_array($c, array('Webpages', 'Documentation', 'Cauldron'))) { } else { $resource_type = 'Webpages'; // default } +if (isset($_GET['l'])) { + $wanted_language = strip_tags(trim($_GET['l'])); + $wanted_lang_name = get_language_name($wanted_language) . " ("; + $wanted_lang_name .= build_transifex_link($wanted_language, NULL, $resource_type) . ")"; +} else { + $wanted_language = NULL; +} +// NOTE: there is a different naming convention between Tx and git for some lanuages: +// Tx git +// sr@latin vs. sr@Latn.po +// sr@latin vs. sr@latin.po exception: http://gitweb.mageia.org/software/i18n/system-config-printer/plain/po +// uz@Cyrl vs. uz@cyrillic.po +// en_GB vs. en_gb.po exception: http://gitweb.mageia.org/web/identity/plain/lib/CatDap/I18N +$wanted_resource = (isset($_GET['r']) ? strip_tags(trim($_GET['r'])) : NULL); if ('Documentation' == $resource_type) { $git_resources = array( @@ -194,9 +208,25 @@ if ('Documentation' == $resource_type) { 'tx_name' => 'userdrake2', 'git_path' => 'http://gitweb.mageia.org/software/userdrake/plain/po', ), - ); // identity-catdap, indexhtml_about, isodumper + array( + 'pot_name' => 'messages', + 'tx_name' => 'identity-catdap', + 'git_path' => 'http://gitweb.mageia.org/web/identity/plain/lib/CatDap/I18N', + ), + array( + 'pot_name' => 'index', + 'tx_name' => 'indexhtml_about', + 'git_path' => 'http://gitweb.mageia.org/software/indexhtml/plain/about/po', + ), + array( + 'pot_name' => 'isodumper', + 'tx_name' => 'isodumper', + 'git_path' => 'https://github.com/papoteur-mga/isodumper/raw/master/po', + ), + ); } - +$isodumper_note = "Check "; +$isodumper_note .= "isodumper resource manualy as it's in github so it can't be handled properly with this report."; /** * Transifex API implementation in php detailed on http://docs.transifex.com/developer/api/projects @@ -226,7 +256,8 @@ function tx_call($tx_request, $project = 'project/mageia/') $errors['tx_json_error'] = "$error Please reload the page and report this on mailing list if it persist."; $tx_result_array = array(); } else if (FALSE === $tx_result) { - $errors['tx_call'] = "API call to Transifex $tx_request failed. Please reload the page and report this on mailing list if it persist."; + $error = "API call to Transifex $tx_request failed."; + $errors['tx_call'] = "$error Please reload the page and report this on mailing list if it persist."; $tx_result_array = array(); } return $tx_result_array; @@ -246,7 +277,9 @@ function tx_call($tx_request, $project = 'project/mageia/') */ function build_links($git_resource_name, $language_code, $resource_type, $stat_data) { - $tx_resource_name = str_replace(array('/', 'mageia-welcome'), array('-', 'Mageia%20Welcome'), $git_resource_name); + $search_array = array('/', 'mageia-welcome', 'identity-catdap'); + $replace_array = array('-', 'Mageia%20Welcome', 'Identity%20(CatDap)'); + $tx_resource_name = str_replace($search_array, $replace_array, $git_resource_name); $locale_name = locale_underscore_to_hyphen($language_code); if ('Webpages' == $resource_type) { if ('nav' == $git_resource_name) { @@ -255,10 +288,19 @@ function build_links($git_resource_name, $language_code, $resource_type, $stat_d $git_link = sprintf('http://gitweb.mageia.org/web/www/tree/langs/%s/%s.po', $locale_name, $git_resource_name); } } else { - $git_link = sprintf('%s/%s.po', $stat_data['git_path'], $language_code); + // fixing exceptions as there is a different naming convention between Tx and git for some lanuages: + if ('system-config-printer' == $git_resource_name && 'sr@latin' == $language_code) { + $lang_code = 'sr@latin'; + } else if ('identity-catdap' == $git_resource_name && 'en_GB' == $language_code) { + $lang_code = 'en_gb'; + } else { + $tx_array = array('sr@latin', 'uz@Cyrl'); + $git_array = array('sr@Latn', 'uz@cyrillic'); + $lang_code = str_replace($tx_array, $git_array, $language_code); + } + $git_link = sprintf('%s/%s.po', $stat_data['git_path'], $lang_code); } - $links_and_num = "Tx: "; + $links_and_num = build_transifex_link($language_code, 'Tx', $resource_type, $tx_resource_name) . ": "; $links_and_num .= $stat_data['tx_untran']; $links_and_num .= (0 == $stat_data['tx_untran'] ? ' - full' : ''); $links_and_num .= ', git: '; @@ -268,6 +310,37 @@ function build_links($git_resource_name, $language_code, $resource_type, $stat_d return $links_and_num; } +/** + * Build customized Transifex link + * + * @param string $tx_language_code like 'pt_BR' + * @param string $link_name is the visible link name + * @param string $tx_category like 'Webpages' + * @param string $tx_resource_name like 'page-4' + * + * @return string customized Transifex link +*/ +function build_transifex_link($tx_language_code, $link_name = NULL, $tx_category = NULL, $tx_resource_name = NULL) +{ + $prefix = ""; + if(is_null($link_name)) { + $transifex_url .= $tx_language_code; + } else { + $transifex_url .= $link_name; + } + $transifex_url .= ""; + + return $transifex_url; +} + /** * Generating report about git resources statistics * @@ -304,8 +377,18 @@ function generating_report($language_codes, $resource_names, $path = NULL, $pot_ $source_strings = $stat['source_strings']; // unify resource names, navigation is a special exception $resource_name = str_replace(array('../_nav/langs/en', 'en/', '.pot'), array('nav', '', ''), $f); - // create pt_BR from pt-br and alike to unify languages - $web_language_code = locale_hyphen_underscore($l, true); + // fixing exceptions as there is a different naming convention between Tx and git for some lanuages: + if ('system-config-printer' == $resource_name && 'sr@latin' == $l) { + $web_language_code = 'sr@latin'; + } else if ('identity-catdap' == $resource_name && 'en_gb' == $l) { + $web_language_code = 'en_GB'; + } else { + $git_array = array('sr@Latn', 'uz@cyrillic'); + $tx_array = array('sr@latin', 'uz@Cyrl'); + // create pt_BR from pt-br and alike to unify languages + $web_language_code = locale_hyphen_underscore($l, true); + $web_language_code = str_replace($git_array, $tx_array, $web_language_code); + } $num_of_not_fully_trans = $num_of_fuzzy_or_missing + $num_of_untranslated; $report[] = array( 'num_of_all_strings' => $stat['a'], @@ -337,7 +420,8 @@ function get_language_name($language_code) if (is_null($tx_languages_details)) { $tx_languages_details = tx_call("languages", ''); } - $key_exists = recursive_array_search($language_code, $tx_languages_details); // is language code in the $tx_languages_details array? + // is language code in the $tx_languages_details array? + $key_exists = recursive_array_search($language_code, $tx_languages_details); if ($key_exists !== FALSE) { $language_name = $tx_languages_details[$key_exists]['name']; } else { @@ -351,20 +435,39 @@ function get_language_name($language_code) $tx_resources = tx_call("resources"); // create separate array ($tx_resources_info) and add statistics to it from TX -$tx_resources_info = array(); +$tx_resources_info = array(); +$tx_cauldron_resources = array(); foreach ($tx_resources as $one_resource) { $tx_resource_name = $one_resource['slug']; $category = $one_resource['categories'][0]; + if ('Cauldron' == $category) { + $tx_cauldron_resources[] = $tx_resource_name; + } // limit resource type only on one if ($category == $resource_type) { - // add statistic - $tx_stat_for_resource = tx_call("resource/$tx_resource_name/stats"); - $tx_resource_name = str_replace('page-', '', $tx_resource_name); // unify resource names - $one_resource['tx_resource_name'] = $tx_resource_name; - $one_resource['statistic'] = $tx_stat_for_resource; - $tx_resources_info[] = $one_resource; + if (is_null($wanted_resource) || $tx_resource_name == $wanted_resource) { + // add statistic + if (is_null($wanted_language)) { + $tx_stat_for_resource = tx_call("resource/$tx_resource_name/stats"); + } else { + $tx_stat_for_resource[$wanted_language] = tx_call("resource/$tx_resource_name/stats/$wanted_language"); + } + $tx_resource_name = str_replace('page-', '', $tx_resource_name); // unify resource names + $one_resource['tx_resource_name'] = $tx_resource_name; + $one_resource['statistic'] = $tx_stat_for_resource; + $tx_resources_info[] = $one_resource; + } } } +$tx_languages = array(); +if (is_null($wanted_language)) { + foreach ($tx_stat_for_resource as $tx_resource_language => $tx_resource_language_stat) { + $tx_languages[] = $tx_resource_language; + } +} else { + $tx_stat_for_resource = tx_call("resource/$tx_resource_name/stats"); + $tx_languages = array_keys($tx_stat_for_resource); +} // create array ($report) with statistics from git $report = array(); @@ -373,13 +476,34 @@ if ('Webpages' == $resource_type) { $report = generating_report(get_other_langs(), $enFiles); } else { foreach ($git_resources as $git_resource_details) { - $git_path = $git_resource_details['git_path']; $tx_name = $git_resource_details['tx_name']; + if (!is_null($wanted_resource) && $tx_name != $wanted_resource) { + continue; + } + $git_path = $git_resource_details['git_path']; $pot_name = $git_resource_details['pot_name']; $raw_html_dump = file_get_contents($git_path); + if (FALSE === $raw_html_dump) { + $errors['file_get_contents_failed'] = "Access to $git_path failed."; + } // list all po files from links within $raw_html_dump - preg_match_all("/('>)([a-z_A-Z]+)(\.po<)/", $raw_html_dump, $language_codes); - $one_report = generating_report(array_values($language_codes[2]), array($tx_name), $git_path, $pot_name); + preg_match_all("/('>)([a-z_A-Z@-]+)(\.po<)/", $raw_html_dump, $language_codes); + $git_language_codes = $language_codes[2]; + if (is_null($wanted_language)) { + $report_language_codes = $git_language_codes; + } else { + if (in_array($wanted_language, $git_language_codes)) { + $report_language_codes = array($wanted_language); + } else { + $report_language_codes = array(); + $note = ''; + if ('isodumper' == $tx_name) { + $note = $isodumper_note; + } + $errors['lang_not_in_git'] = "Language $wanted_lang_name is not present in git for $tx_name resource. $note"; + } + } + $one_report = generating_report(array_values($report_language_codes), array($tx_name), $git_path, $pot_name); $report = array_merge($one_report, $report); } } @@ -442,7 +566,8 @@ foreach ($tx_resources_info as $tx_resource_info) { // this bellow should normaly only happen inside of a tx sync window so remember it } else if ($tx_resource_all_strings_in_lang != $git_resource_num_of_all_strings) { if (!array_key_exists($git_resource_name, $nonequal_num_of_all_strings)) { - $nonequal_num_of_all_strings[$git_resource_name] = "$git_resource_name git:$git_resource_num_of_all_strings/tx:$tx_resource_all_strings_in_lang"; + $nonequal_num_text = "$git_resource_name git:$git_resource_num_of_all_strings/tx:$tx_resource_all_strings_in_lang"; + $nonequal_num_of_all_strings[$git_resource_name] = $nonequal_num_text; } } } @@ -465,7 +590,16 @@ ksort($tx_all_langs_above_treshold); // sort by lang sort($git_all_languages); sort($nonequal_num_of_all_strings, SORT_STRING); -$tx_only_resources = array_diff($tx_all_resource_names, $git_compare_resources); +$tx_all_resources_above_treshold = array(); +foreach ($tx_all_langs_above_treshold as $tx_only_language => $resource_above_treshold_for_lang) { + foreach ($resource_above_treshold_for_lang as $resource_above_treshold) { + $tx_all_resources_above_treshold[] = $resource_above_treshold; + } +} +$tx_all_resources_above_treshold = array_unique($tx_all_resources_above_treshold); +sort($tx_all_resources_above_treshold, SORT_STRING); + +$tx_only_resources = array_diff($tx_all_resources_above_treshold, $git_compare_resources); $git_only_resources = array_diff($git_compare_resources, $tx_all_resource_names); $tx_only_languages = array_diff(array_keys($tx_all_langs_above_treshold), $git_all_languages); $git_only_languages = array_diff($git_all_languages, $tx_all_languages); @@ -473,19 +607,32 @@ $git_only_languages = array_diff($git_all_languages, $tx_all_languages); // preparing text for languages not present yet in our git repositories $tx_only_languages_details = ''; if (0 < count($tx_only_languages)) { - $tx_only_languages_details = "

Languages with at least one resource for $resource_type translated over treshold "; + if (!is_null($wanted_language) && !is_null($wanted_resource)) { + $tx_only_languages_details = "

Language $wanted_lang_name with $wanted_resource resource for $resource_type translated over treshold "; + } else if (!is_null($wanted_language) && is_null($wanted_resource)) { + $tx_only_languages_details = "

Language $wanted_lang_name with at least one resource for $resource_type translated over treshold "; + } else if (is_null($wanted_language) && !is_null($wanted_resource)) { + $tx_only_languages_details = "

Languages with $wanted_resource resource for $resource_type translated over treshold "; + } else { + $tx_only_languages_details = "

Languages with at least one resource for $resource_type translated over treshold "; + } $tx_only_languages_details .= "($tx_lang_completed_treshold %) but not present yet in our git:

'; } @@ -497,8 +644,19 @@ if (0 < count($git_only_languages)) { } if (0 < count($tx_only_resources)) { - $error = "Some resources (" . implode(", ", $tx_only_resources) . ") are present only in Transifex"; - $errors['tx_only_resources'] = "$error but not in our git. Please report that."; + if (!is_null($wanted_language)) { + $tx_only_resources_array = array(); + foreach ($tx_only_resources as $tx_only_resource) { + $tx_only_resource_text = build_transifex_link($wanted_language, $tx_only_resource, $resource_type, $tx_only_resource); + $tx_only_resources_array[] = $tx_only_resource_text; + } + $tx_only_resources_text = implode(", ", $tx_only_resources_array); + } else { + $tx_only_resources_text = implode(", ", $tx_only_resources); + } + $error = "Some resources ($tx_only_resources_text) are above treshold "; + $error .= "($tx_lang_completed_treshold %) and still present only in Transifex"; + $errors['tx_only_resources'] = "$error but not in our git. Please report that. $isodumper_note"; } if (0 < count($git_only_resources)) { @@ -511,13 +669,16 @@ if (0 < count($parse_errors)) { foreach ($parse_errors as $single_git_resource_name => $git_resource_languages) { $errors[] = "$single_git_resource_name.po (languages: " . implode(", ", $git_resource_languages) . ")"; } - $errors['po_file_parse'] = "It seems that parsing of some resources failed: " . implode(", ", $errors) . "."; + $reload = "Please reload the page and report this on mailing list if it persist."; + $errors['po_file_parse'] = "It seems that parsing of some resources failed: " . implode(", ", $errors) . ". $reload"; } if (0 < count($nonequal_num_of_all_strings)) { - $error = "Some resources (" . implode(", ", $nonequal_num_of_all_strings) . ") have different number of all strings"; - $bug = "Note that there is a bug which influence proper parsing of msec and mageia-kde-translation."; - $errors['nonequal_num_of_all_strings'] = "$error between our git and Transifex. Please report that on the mailing list if it happens across several days. $bug"; + $error = "Some resources (" . implode(", ", $nonequal_num_of_all_strings) . ") "; + $error .= "have different number of all strings between our git and Transifex."; + $bug = "Note that there is a "; + $bug .= "known bug which influence proper parsing of msec and mageia-kde-translation."; + $errors['nonequal_num_of_all_strings'] = "$error Please report that on the mailing list if it happens across several days. $bug"; } // making a list of resources with Tx/git differences @@ -655,13 +816,31 @@ if ('Documentation' != $resource_type) { if ('Cauldron' != $resource_type) { // $links[] = 'software translations'; } -$reports_links = '

You can also see reports about differences in ' . implode(" or ", $links) . '. And please don\'t forget great '; -$reports_links .= 'wiki page about Transifex and git syncing.

'; + +// list $tx_languages and $tx_cauldron_resources +$languages_links = array(); +foreach ($tx_languages as $tx_language) { + $language_name = get_language_name($tx_language); + $languages_links[] = "$tx_language"; +} +sort($languages_links, SORT_STRING); +$languages_link = implode(", ", $languages_links); +$resources_links = array(); +foreach ($tx_cauldron_resources as $tx_resource_name) { + $resources_links[] = "$tx_resource_name"; +} +$resources_link = implode(", ", $resources_links); + +$reports_links = '

You can also see reports about differences in ' . implode(" or ", $links) . '.

'; +$reports_links .= "

For software translations please choose language ($languages_link) or resource ($resources_link).

"; +$reports_links .= '

There is a nice '; +$reports_links .= 'wiki page about Transifex and git synchronising.

'; echo $reports_links . PHP_EOL; // print out any errors if (0 < count($errors)) { - $error_text = '