From 23f94e83c7fc5b0b14ff759db2339acf3148fd19 Mon Sep 17 00:00:00 2001 From: filip Date: Sun, 4 Sep 2016 00:21:41 +0200 Subject: added string comparison between git and TX (currently for one file only) http://mag/langs/report_tx_git.php?c=Webpages&l=sl&r=en/404.pot http://mag/langs/report_tx_git.php?c=Documentation&l=ro&r=doc_installer http://mag/langs/report_tx_git.php?c=Cauldron&l=de&r=drakx_standalone Note that mageiasync (not in gettext format) and page-nav (different location & filename) are not yet working correctly. Some links for calling the comparison will be needed too. --- langs/report_tx_git.php | 192 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 149 insertions(+), 43 deletions(-) (limited to 'langs/report_tx_git.php') diff --git a/langs/report_tx_git.php b/langs/report_tx_git.php index 6bd3732ab..840e381bb 100644 --- a/langs/report_tx_git.php +++ b/langs/report_tx_git.php @@ -36,9 +36,9 @@ define('HLANG', TRUE); include 'lib.php'; $errors = array(); // stored for error management ;) -$c = get_sane_string('c'); -if (in_array($c, array('Webpages', 'Documentation', 'Cauldron'))) { - $resource_type = $c; // filter only valid resource types +$wanted_category = get_sane_string('c'); +if (in_array($wanted_category, array('Webpages', 'Documentation', 'Cauldron'))) { + $resource_type = $wanted_category; // filter only valid resource types } else { $resource_type = 'Webpages'; // default } @@ -55,6 +55,10 @@ if (!empty($wanted_language)) { // uz@Cyrl vs. uz@cyrillic.po // en_GB vs. en_gb.po exception: http://gitweb.mageia.org/web/identity/plain/lib/CatDap/I18N $wanted_resource = get_sane_string('r'); +$wanted_resource_name = ''; +if (!empty($wanted_resource)) { + $wanted_resource_name = str_replace(array('../_nav/langs/en', 'en/', '.pot'), array('nav', '', ''), $wanted_resource); +} if ('Documentation' == $resource_type) { $git_resources = array( @@ -280,21 +284,16 @@ function tx_call($tx_request, $project = 'project/mageia/') * * @return string $resource_name like 'page-about-constitution' (tx name) */ -function resource_name_conversion($resource_name, $category = '', $tx_to_git_name_conversion = FALSE) +function resource_name_conversion($resource_name, $category = '') { - $tx_names = array('/', 'mageia-welcome', 'identity-catdap'); - $git_names = array('-', 'Mageia%20Welcome', 'Identity%20(CatDap)'); - if ($tx_to_git_name_conversion) { - $resource_name = str_replace($tx_names, $git_names, $resource_name); - } else { - $resource_name = str_replace($git_names, $tx_names, $resource_name); - } if ('Webpages' == $category) { - if ($tx_to_git_name_conversion) { - $resource_name = str_replace('page-', '', $resource_name); - } else { - $resource_name = 'page-' . $resource_name; - } + $tx_names = array('nav', '-'); + $git_names = array('_nav/langs/en', '/'); + $resource_name = 'page-' . str_replace($git_names, $tx_names, $resource_name); + } else if ('Cauldron' == $category) { + $tx_names = array('mageia-welcome', 'identity-catdap'); + $git_names = array('Mageia%20Welcome', 'Identity%20(CatDap)'); + $resource_name = str_replace($git_names, $tx_names, $resource_name); } return $resource_name; @@ -395,7 +394,7 @@ function build_transifex_link($tx_language_code, $link_name = NULL, $tx_category * * @return array */ -function generating_report($language_codes, $resource_names, $path = NULL, $pot_name = NULL) +function generating_report($language_codes, $resource_names, $path = NULL, $pot_name = NULL, $compared = NULL) { $report = array(); foreach ($resource_names as $f) { @@ -417,7 +416,7 @@ function generating_report($language_codes, $resource_names, $path = NULL, $pot_ $partial_pot_name = substr($pot_name, 0, -6); // cuts '_en.ts' from mageiaSync_en.ts $stat = _ts_diff($l, $partial_pot_name, $source_strings, $path); } else { - $stat = _po_diff($l, $resource, $source_strings, $path); + $stat = _po_diff($l, $resource, $source_strings, $path, $compared); } $num_of_fuzzy_or_missing = count($stat['fuzzy_or_missing']); } else { // file $langF doesn't exists in 'Webpages' $resource_type @@ -443,6 +442,10 @@ function generating_report($language_codes, $resource_names, $path = NULL, $pot_ $num_of_not_fully_trans = $num_of_fuzzy_or_missing + $num_of_untranslated; $report[] = array( 'num_of_all_strings' => $stat['a'], +// 'fuzzy_or_missing_str' => $stat['fuzzy_or_missing'], +// 'untranslated_strings' => $stat['notrans'], +// 'source_strings' => $stat['source_strings'], + 'differences' => $stat['differences'], 'resource_name' => $resource_name, 'web_language_code' => $web_language_code, 'num_of_not_fully_trans' => $num_of_not_fully_trans, @@ -486,36 +489,74 @@ function get_language_name($language_code) // get resources data from TX $tx_resources = tx_call("resources"); +$details_wanted = FALSE; +if (!empty($wanted_language) && !empty($wanted_resource_name)) { + $details_wanted = TRUE; +} + +if ($details_wanted) { + $prefixed_resource_name = resource_name_conversion($wanted_resource_name, $wanted_category); + $tx_file = tx_call("resource/$prefixed_resource_name/translation/$wanted_language/"); + $tx_file_content = $tx_file['content']; + $tx_file_content = explode("\n", $tx_file_content); + $parsed_tx_file_content = phpmo_parse_po_file($tx_file_content, FALSE); +} else { + $parsed_tx_file_content = NULL; +} + // create separate array ($tx_resources_info) and add statistics to it from TX $tx_resources_info = array(); $tx_cauldron_resources = array(); $tx_stat_for_resource = array(); foreach ($tx_resources as $one_resource) { - $tx_resource_name = $one_resource['slug']; - $category = $one_resource['categories'][0]; + $tx_resource_name = $one_resource['slug']; + $category = $one_resource['categories'][0]; + $add_data = FALSE; if ('Cauldron' == $category) { $tx_cauldron_resources[] = $tx_resource_name; } - // limit resource type only on one + // limit resource type to one if ($category == $resource_type) { - if (empty($wanted_resource) || $tx_resource_name == $wanted_resource) { - // add statistic - if (empty($wanted_language)) { - $tx_stat_for_resource = tx_call("resource/$tx_resource_name/stats"); - } else { - $tx_stat_for_resource[$wanted_language] = tx_call("resource/$tx_resource_name/stats/$wanted_language"); + if ('Cauldron' == $category) { // add data for Cauldron category + // if name is required + if (!empty($wanted_resource) && $tx_resource_name == resource_name_conversion($wanted_resource_name, $category)) { + $add_data = TRUE; + // if only language is required + } else if (!empty($wanted_language) && empty($wanted_resource)) { + $add_data = TRUE; } - $tx_resource_name = str_replace('page-', '', $tx_resource_name); // unify resource names - $one_resource['tx_resource_name'] = $tx_resource_name; - $one_resource['statistic'] = $tx_stat_for_resource; - $tx_resources_info[$tx_resource_name] = $one_resource; + } else { // add data for Webpages and Documentation category + if (empty($wanted_resource) && empty($wanted_language)) { + $add_data = TRUE; + } + } + // add data if details are needed + if ($details_wanted && $tx_resource_name == resource_name_conversion($wanted_resource_name, $category)) { + $add_data = TRUE; + } + } + if ($add_data) { + // add statistic + if (empty($wanted_language)) { + $tx_stat_for_resource = tx_call("resource/$tx_resource_name/stats"); + } else { + $tx_stat_for_resource[$wanted_language] = tx_call("resource/$tx_resource_name/stats/$wanted_language"); } + $tx_resource_name = str_replace('page-', '', $tx_resource_name); // unify resource names + $one_resource['tx_resource_name'] = $tx_resource_name; + $one_resource['statistic'] = $tx_stat_for_resource; + $tx_resources_info[$tx_resource_name] = $one_resource; } } $tx_languages = array(); if (empty($wanted_language)) { - foreach ($tx_stat_for_resource as $tx_resource_language => $tx_resource_language_stat) { - $tx_languages[] = $tx_resource_language; + if ('Webpages' == $resource_type || 'Documentation' == $resource_type) { + $tx_stat_for_resource = tx_call("resource/page-index/stats"); + $tx_languages = array_keys($tx_stat_for_resource); + } else { + foreach ($tx_stat_for_resource as $tx_resource_language => $tx_resource_language_stat) { + $tx_languages[] = $tx_resource_language; + } } } else { $tx_stat_for_resource = tx_call("resource/$tx_resource_name/stats"); @@ -525,8 +566,13 @@ if (empty($wanted_language)) { // create array ($report) with statistics from git $report = array(); if ('Webpages' == $resource_type) { - $enFiles = array_merge(array('../_nav/langs/en.pot'), get_lang_references('*.pot')); // added navigation file - $report = generating_report(get_other_langs(), $enFiles); + if (empty($wanted_resource) && empty($wanted_language)) { + $enFiles = array_merge(array('../_nav/langs/en.pot'), get_lang_references('*.pot')); // added navigation file + $report = generating_report(get_other_langs(), $enFiles); + } else { + $enFiles = array($wanted_resource); + $report = generating_report(array($wanted_language), $enFiles, NULL, NULL, $parsed_tx_file_content); + } } else { $tx_resources_not_yet_in_git = array(); $translated_in_tx = ' translated in Transifex'; @@ -564,7 +610,7 @@ if ('Webpages' == $resource_type) { $errors['lang_not_in_git'] = "Language $wanted_lang_name is not present in git for $resources_not_yet_in_git resource."; } } - $one_report = generating_report(array_values($report_language_codes), array($tx_name), $git_path, $pot_name); + $one_report = generating_report(array_values($report_language_codes), array($tx_name), $git_path, $pot_name, $parsed_tx_file_content); $report = array_merge($one_report, $report); } } @@ -614,14 +660,16 @@ foreach ($tx_resources_info as $tx_resource_info) { $git_resource_untrans_in_lang = $git_resource_info['num_of_not_fully_trans']; $git_resource_path = $git_resource_info['webgit_path']; $git_resource_pot_name = $git_resource_info['pot_name']; + $git_resource_differences = $git_resource_info['differences']; - // compare l10n level - if ($tx_resource_untrans_in_lang != $git_resource_untrans_in_lang) { + // compare l10n level or differences + if ($tx_resource_untrans_in_lang != $git_resource_untrans_in_lang || count($git_resource_differences) != 0) { $tx_git_difference[$tx_resource_language][$git_resource_name]['tx_num_of_all_strings'] = $tx_resource_all_strings_in_lang; $tx_git_difference[$tx_resource_language][$git_resource_name]['tx_untranslated'] = $tx_resource_untrans_in_lang; $tx_git_difference[$tx_resource_language][$git_resource_name]['git_untranslated'] = $git_resource_untrans_in_lang; $tx_git_difference[$tx_resource_language][$git_resource_name]['webgit_path'] = $git_resource_path; $tx_git_difference[$tx_resource_language][$git_resource_name]['pot_name'] = $git_resource_pot_name; + $tx_git_difference[$tx_resource_language][$git_resource_name]['differences'] = $git_resource_differences; } // it seems like a parse error if (0 == $git_resource_num_of_all_strings) { @@ -743,6 +791,58 @@ $all_resources_with_tx_git_diff = array_unique($all_resources_with_tx_git_diff); sort($all_resources_with_tx_git_diff, SORT_STRING); $num_of_resources_w_difference = count($all_resources_with_tx_git_diff); +// preparing a detailed strings difference html table represantation between git and tx +if ($details_wanted) { + $table_rows = array(); + $table_rows[] = "" . PHP_EOL; + $table_rows[] = "" . PHP_EOL; + $table_rows[] = "" . PHP_EOL; // add header row + $table_rows[] = "" . PHP_EOL; + $table_rows[] = "" . PHP_EOL; + $num_of_differences = 0; + $col_repeat = 8; + $i = 0; + foreach ($tx_git_difference as $one_language => $one_language_array) { + $differences = $one_language_array[$wanted_resource_name]['differences']; + $num_of_differences = count($differences); + foreach ($differences as $curr_msgid => $curr_context_array) { + foreach ($curr_context_array as $curr_context_or_num => $array_for_compare) { + if (is_string($curr_context_or_num)) { + $curr_msgid .= " (Context: $curr_context_or_num)"; + } + $curr_msgid = htmlentities($curr_msgid, null, 'UTF-8'); + $num_of_tr_str = max(count($array_for_compare['target_l']), count($array_for_compare['compared'])) - 1; + for ($i = 0; $i <= $num_of_tr_str; $i++) { + if (empty($array_for_compare['target_l'][$i])) { + $target_l_str = ' '; + } else { + $target_l_str = htmlentities($array_for_compare['target_l'][$i], null, 'UTF-8'); + } + if (empty($array_for_compare['compared'][$i])) { + $compared_str = ' '; + } else { + $compared_str = htmlentities($array_for_compare['compared'][$i], null, 'UTF-8'); + } + $row = ""; + $row .= ""; + $row .= ""; + $table_rows[] = $row . PHP_EOL; + // instead of msgid use msgid_plural for plural table lines + if (is_array($array_for_compare['target_l'])) { + if (array_key_exists('msgid_plural', $array_for_compare['target_l'])) { + $curr_msgid = str_replace($curr_msgid, $array_for_compare['target_l']['msgid_plural'], $curr_msgid); + } + } + } + } + } + } + $table_rows[] = "" . PHP_EOL; + $table_rows[] = "
Original stringTranslated string from gitAnd translated string from Transifex
$curr_msgid$target_l_str$compared_str
" . PHP_EOL; + + $table_w_details = (0 != $num_of_differences ? implode($table_rows) : ''); +} + // preparing a html table represantation of git/tx difference in number of untranslated strings $table_difference = ''; $num_for_switch_table_to_list = 15; @@ -755,12 +855,13 @@ if (0 < $num_of_resources_w_difference && $num_for_switch_table_to_list > $num_o $table_rows[] = "" . PHP_EOL; $table_rows[] = "" . PHP_EOL; $first_row_pass = TRUE; + $col_repeat = 8; foreach ($tx_git_difference as $one_language => $one_language_array) { $j = 0; $row = array(); $first_col_pass = TRUE; if ($first_row_pass || 0 == $i % 12) { - $array_chunks = array_chunk($all_resources_with_tx_git_diff, $col_repeat = 8, TRUE); + $array_chunks = array_chunk($all_resources_with_tx_git_diff, $col_repeat, TRUE); foreach ($array_chunks as $array_chunk) { $row = array_merge($row, array(' '), $array_chunk); } @@ -808,7 +909,7 @@ if (0 < $num_of_resources_w_difference && $num_for_switch_table_to_list > $num_o $list_of_lang_diff = ''; $list_of_resource_diff = ''; $list_num_diff = FALSE; -if (0 < $num_of_resources_w_difference && !$table_num_diff) { +if (0 < $num_of_resources_w_difference && !$details_wanted && !$table_num_diff) { $list_num_diff = TRUE; // preparing a list represantation of Tx/git differences by language $list_of_lang_diff .= '