From f9921a191dcd862c72fd1087da6391cc04e5099a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Fri, 25 Oct 2024 13:16:21 -0300 Subject: [PATCH 01/22] DTO for revcheck data. --- scripts/translation/lib/RevcheckData.php | 68 ++++++++++++++++++++ scripts/translation/lib/RevcheckFileInfo.php | 10 --- scripts/translation/lib/all.php | 1 + 3 files changed, 69 insertions(+), 10 deletions(-) create mode 100644 scripts/translation/lib/RevcheckData.php diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php new file mode 100644 index 000000000..89a16d660 --- /dev/null +++ b/scripts/translation/lib/RevcheckData.php @@ -0,0 +1,68 @@ + | + * +----------------------------------------------------------------------+ + * | Description: DTO for serialization of revcheck data. | + * +----------------------------------------------------------------------+ + */ + +// NOTE: This file MAY be used in more of one git repository in future. +// If it is the case, please make note of this in *both* places. + +enum RevcheckStatus : string +{ + case TranslatedOk = 'TranslatedOk'; + case TranslatedOld = 'TranslatedOld'; + case TranslatedWip = 'TranslatedWip'; + case Untranslated = 'Untranslated'; + case RevTagProblem = 'RevTagProblem'; + case NotInEnTree = 'NotInEnTree'; +} + +class RevcheckData +{ + public $translators = array(); // RevcheckDataTranslator + public $fileSummary = array(); // RevcheckStatus, int + public $fileDetail = array(); // RevcheckDataFile + + public function __construct() + { + foreach ( RevcheckStatus::cases() as $status ) + $this->$fileSummary[ $tatus ] = 0; + } +} + +class RevcheckDataTranslator +{ + public string $name; + public string $email; + public string $nick; + + public int $filesUpdate; + public int $filesOld; + public int $filesWip; +} + +class RevcheckDataFile +{ + public string $path; + public string $name; + public int $size; + public int $days; + + public RevcheckStatus $status; + + public string $lastHash; // The most recent commit hash, skipped or not + public string $diffHash; // The most recent, non [skip-revcheck] commit hash +} diff --git a/scripts/translation/lib/RevcheckFileInfo.php b/scripts/translation/lib/RevcheckFileInfo.php index 027c93fd8..5ebc5c46e 100644 --- a/scripts/translation/lib/RevcheckFileInfo.php +++ b/scripts/translation/lib/RevcheckFileInfo.php @@ -19,16 +19,6 @@ require_once __DIR__ . '/all.php'; - enum RevcheckStatus :string -{ - case Untranslated = 'Untranslated'; - case RevTagProblem = 'RevTagProblem'; - case TranslatedOk = 'TranslatedOk'; - case TranslatedOld = 'TranslatedOld'; - case TranslatedWip = 'TranslatedWip'; - case NotInEnTree = 'NotInEnTree'; -} - class RevcheckFileInfo { public string $file = ""; // from fs diff --git a/scripts/translation/lib/all.php b/scripts/translation/lib/all.php index aa9ca28eb..71446f767 100644 --- a/scripts/translation/lib/all.php +++ b/scripts/translation/lib/all.php @@ -32,5 +32,6 @@ require_once __DIR__ . '/RevcheckFileList.php'; require_once __DIR__ . '/RevcheckIgnore.php'; require_once __DIR__ . '/RevcheckRun.php'; +require_once __DIR__ . '/RevcheckData.php'; require_once __DIR__ . '/RevtagParser.php'; require_once __DIR__ . '/XmlUtil.php'; From d72bb5273577d7ef563fa6caf2d5b25d2bd041e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Sat, 26 Oct 2024 16:51:36 -0300 Subject: [PATCH 02/22] Preparation for revcheck data exporting. --- scripts/translation/lib/RevcheckData.php | 47 +++++++---- scripts/translation/lib/RevcheckRun.php | 100 +++++++++++++++++++---- scripts/translation/lib/all.php | 2 +- 3 files changed, 116 insertions(+), 33 deletions(-) diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php index 89a16d660..e151b53bc 100644 --- a/scripts/translation/lib/RevcheckData.php +++ b/scripts/translation/lib/RevcheckData.php @@ -22,24 +22,42 @@ enum RevcheckStatus : string { - case TranslatedOk = 'TranslatedOk'; - case TranslatedOld = 'TranslatedOld'; - case TranslatedWip = 'TranslatedWip'; - case Untranslated = 'Untranslated'; - case RevTagProblem = 'RevTagProblem'; - case NotInEnTree = 'NotInEnTree'; + case TranslatedOk = 'TranslatedOk'; + case TranslatedOld = 'TranslatedOld'; + case TranslatedWip = 'TranslatedWip'; + case Untranslated = 'Untranslated'; + case RevTagProblem = 'RevTagProblem'; + case NotInEnTree = 'NotInEnTree'; } class RevcheckData { - public $translators = array(); // RevcheckDataTranslator + public $translators = array(); // nick, RevcheckDataTranslator public $fileSummary = array(); // RevcheckStatus, int - public $fileDetail = array(); // RevcheckDataFile + public $fileDetail = array(); // filename, RevcheckDataFile public function __construct() { foreach ( RevcheckStatus::cases() as $status ) - $this->$fileSummary[ $tatus ] = 0; + $this->fileSummary[ $status->value ] = 0; + } + + public function addFile( string $key , RevcheckDataFile $file ) + { + $this->fileSummary[ $file->status->value ]++; + $this->fileDetail[ $key ] = $file; + } + + public function getTranslator( string $nick ) + { + $translator = $this->translators[ $nick ] ?? null; + if ( $translator == null ) + { + $translator = new RevcheckDataTranslator(); + $translator->nick = $nick; + $this->translators[ $nick ] = $translator; + } + return $translator; } } @@ -48,10 +66,11 @@ class RevcheckDataTranslator public string $name; public string $email; public string $nick; + public string $vcs; - public int $filesUpdate; - public int $filesOld; - public int $filesWip; + public int $filesUpdate = 0; + public int $filesOld = 0; + public int $filesWip = 0; } class RevcheckDataFile @@ -63,6 +82,6 @@ class RevcheckDataFile public RevcheckStatus $status; - public string $lastHash; // The most recent commit hash, skipped or not - public string $diffHash; // The most recent, non [skip-revcheck] commit hash + public string $hashLast; // The most recent commit hash, skipped or not + public string $hashDiff; // The most recent, non [skip-revcheck] commit hash } diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index 7b2cd4e66..e656ec67c 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -27,14 +27,15 @@ class RevcheckRun public RevcheckFileList $sourceFiles; public RevcheckFileList $targetFiles; - // Separated lists public array $filesOk = []; public array $filesOld = []; public array $filesRevtagProblem = []; public array $filesUntranslated = []; public array $filesNotInEn = []; public array $filesWip = []; + public array $qaList = []; + public RevcheckData $revData; function __construct( string $sourceDir , string $targetDir , bool $writeResults = true ) { @@ -54,14 +55,20 @@ function __construct( string $sourceDir , string $targetDir , bool $writeResults // match and mix $this->calculateStatus(); + // fs output if ( $writeResults ) + { QaFileInfo::cacheSave( $this->qaList ); + $this->saveRevcheckData(); + } } private function calculateStatus() { + $this->revData = new RevcheckData; + // All status are marked in source files, - // except notinen, that are marked on target. + // except NotInEnTree, that are marked on target. foreach( $this->sourceFiles->iterator() as $source ) { @@ -73,6 +80,7 @@ private function calculateStatus() { $source->status = RevcheckStatus::Untranslated; $this->filesUntranslated[] = $source; + $this->addData( $source , null ); continue; } @@ -82,49 +90,51 @@ private function calculateStatus() { $source->status = RevcheckStatus::RevTagProblem; $this->filesRevtagProblem[] = $source; + $this->addData( $source , null ); continue; } - // Translation compares ok from multiple hashs. The head hash or the last non-skiped hash. + // Previous code compares uptodate on multiple hashs. The last hash or the last non-skipped hash. // See https://github.com/php/doc-base/blob/090ff07aa03c3e4ad7320a4ace9ffb6d5ede722f/scripts/revcheck.php#L374 // and https://github.com/php/doc-base/blob/090ff07aa03c3e4ad7320a4ace9ffb6d5ede722f/scripts/revcheck.php#L392 . - $sourceHash = $source->head; + $sourceHsh1 = $source->head; + $sourceHsh2 = $source->diff; $targetHash = $target->revtag->revision; - if ( $targetHash == $source->diff ) - $sourceHash = $source->diff; - $daysOld = ( strtotime( "now" ) - $source->date ) / 86400; $daysOld = (int)$daysOld; - $qaInfo = new QaFileInfo( $sourceHash , $targetHash , $this->sourceDir , $this->targetDir , $source->file , $daysOld ); + $qaInfo = new QaFileInfo( $sourceHsh1 , $targetHash , $this->sourceDir , $this->targetDir , $source->file , $daysOld ); $this->qaList[ $source->file ] = $qaInfo; // TranslatedOk - if ( $target->revtag->status == "ready" && $sourceHash == $targetHash ) + if ( $target->revtag->status == "ready" && ( $sourceHsh1 == $targetHash || $sourceHsh2 == $targetHash ) ) { $source->status = RevcheckStatus::TranslatedOk; $this->filesOk[] = $source; + $this->addData( $source , $target->revtag ); continue; } - GitDiffParser::parseNumstatInto( $this->sourceDir , $source ); - + // TranslatedOld // TranslatedWip - if ( $target->revtag->status != "ready" ) + GitDiffParser::parseNumstatInto( $this->sourceDir , $source ); + + if ( $target->revtag->status == "ready" ) + { + $source->status = RevcheckStatus::TranslatedOld; + $this->filesOld[] = $source; + $this->addData( $source , $target->revtag ); + } + else { $source->status = RevcheckStatus::TranslatedWip; $this->filesWip[] = $source; - continue; + $this->addData( $source , $target->revtag ); } - - // TranslatedOld - - $source->status = RevcheckStatus::TranslatedOld; - $this->filesOld[] = $source; } // NotInEnTree @@ -136,7 +146,61 @@ private function calculateStatus() { $target->status = RevcheckStatus::NotInEnTree; $this->filesNotInEn[] = $target; + $this->addData( $target ); + } + } + } + + private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = null ) : void + { + $file = new RevcheckDataFile; + + $file->path = dirname( $info->file ); + $file->name = basename( $info->file ); + $file->size = $info->size; + $file->days = floor( ( time() - $info->date ) / 86400 ); + $file->status = $info->status; + $file->hashLast = $info->head; + $file->hashDiff = $info->diff; + + $this->revData->addFile( $info->file , $file ); + + if ( $revtag != null ) + { + $translator = $this->revData->getTranslator( $revtag->maintainer ); + + switch( $info->status ) + { + case RevcheckStatus::TranslatedOk: + $translator->filesUpdate++; + break; + case RevcheckStatus::TranslatedOld: + $translator->filesOld++; + break; + default: + $translator->filesWip++; + break; } } } + + private function parseTranslationXml() : void + { + $xml = XmlUtil::loadFile( $this->targetDir . '/translation.xml' ); + $persons = $xml->getElementsByTagName( 'person' ); + + foreach( $persons as $person ) + { + $nick = $person->getAttribute( 'nick' ); + $translator = $this->revData->getTranslator( $nick ); + $translator->name = $person->getAttribute( 'name' ); + $translator->email = $person->getAttribute( 'email' ); + $translator->vcs = $person->getAttribute( 'vcs' ) ?? ""; + } + } + + private function saveRevcheckData() + { + $this->parseTranslationXml(); + } } diff --git a/scripts/translation/lib/all.php b/scripts/translation/lib/all.php index 71446f767..5f56bfa4a 100644 --- a/scripts/translation/lib/all.php +++ b/scripts/translation/lib/all.php @@ -28,10 +28,10 @@ require_once __DIR__ . '/OutputIgnoreArgv.php'; require_once __DIR__ . '/OutputIgnoreBuffer.php'; require_once __DIR__ . '/QaFileInfo.php'; +require_once __DIR__ . '/RevcheckData.php'; require_once __DIR__ . '/RevcheckFileInfo.php'; require_once __DIR__ . '/RevcheckFileList.php'; require_once __DIR__ . '/RevcheckIgnore.php'; require_once __DIR__ . '/RevcheckRun.php'; -require_once __DIR__ . '/RevcheckData.php'; require_once __DIR__ . '/RevtagParser.php'; require_once __DIR__ . '/XmlUtil.php'; From 72453ceb55f5c6ffc7c290454c23d51fda55ad7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Sat, 26 Oct 2024 20:50:46 -0300 Subject: [PATCH 03/22] Backport loose and fixed [skip-revcheck] modes --- scripts/translation/lib/GitLogParser.php | 55 +++++++--- scripts/translation/lib/RevcheckRun.php | 22 ++++ scripts/translation/lib/all.php | 133 +++++++++++++++++++++++ 3 files changed, 195 insertions(+), 15 deletions(-) diff --git a/scripts/translation/lib/GitLogParser.php b/scripts/translation/lib/GitLogParser.php index 151d18456..65095f90c 100644 --- a/scripts/translation/lib/GitLogParser.php +++ b/scripts/translation/lib/GitLogParser.php @@ -29,6 +29,7 @@ static function parseInto( string $lang , RevcheckFileList & $list ) $hash = ""; $date = ""; $skip = false; + $mcnt = 0; while ( ( $line = fgets( $fp ) ) !== false ) { // new commit block @@ -37,6 +38,7 @@ static function parseInto( string $lang , RevcheckFileList & $list ) $hash = trim( substr( $line , 7 ) ); $date = ""; $skip = false; + $mcnt = 0; continue; } // datetime of commit @@ -46,20 +48,31 @@ static function parseInto( string $lang , RevcheckFileList & $list ) $date = strtotime( $line ); continue; } - // other headers - if ( strpos( $line , ': ' ) > 0 ) - continue; // empty lines if ( trim( $line ) == "" ) continue; // commit message if ( str_starts_with( $line , ' ' ) ) { - // commits with this mark are ignored - if ( stristr( $line, '[skip-revcheck]' ) !== false ) - $skip = true; + if ( LOOSE_SKIP_REVCHECK ) // See below, and https://github.com/php/doc-base/pull/132 + { + // commits with [skip-revcheck] anywhere commit message flags skip + if ( str_contains( $line, '[skip-revcheck]' ) ) + $skip = true; + } + else + { + $mcnt++; + // [skip-revcheck] at start of first line of commit message flags a skip + if ( $mcnt == 1 && str_starts_with( trim( $line ) , '[skip-revcheck]' ) ) + $skip = true; + } continue; } + // other headers + if ( strpos( $line , ': ' ) > 0 ) + continue; + // otherwise, a filename $filename = trim( $line ); $info = $list->get( $filename ); @@ -68,22 +81,34 @@ static function parseInto( string $lang , RevcheckFileList & $list ) if ( $info == null ) continue; - // the head commit + // Saves only the first commit hash of a file of git log, + // that is, the last commit hash in chronological order. + if ( $info->head == "" ) { $info->head = $hash; $info->date = $date; - } - // after, only tracks non skipped commits - if ( $skip ) - continue; + if ( FIXED_SKIP_REVCHECK ) + if ( $skip ) + $info->diff = "skip"; + } - // the diff commit - if ( $info->diff == "" ) + if ( !FIXED_SKIP_REVCHECK ) { - $info->diff = $hash; - $info->date = $date; + // Also tracks the first commit hash of a file in git log + // that is *not* market with [skip-revcheck] (the diff hash) + // so it's possible to not bother translations with + // minutiae modifications. + + if ( $skip ) + continue; + + if ( $info->diff == "" ) + { + $info->diff = $hash; + $info->date = $date; + } } } diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index e656ec67c..ce9369dd3 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -125,9 +125,18 @@ private function calculateStatus() if ( $target->revtag->status == "ready" ) { + if ( FIXED_SKIP_REVCHECK && $source->diff == "skip" && TestFixedHashMinusTwo( $source->file , $targetHash ) ) + { + $source->status = RevcheckStatus::TranslatedOk; + $this->filesOk[] = $source; + $this->addData( $source , $target->revtag ); + } + else + { $source->status = RevcheckStatus::TranslatedOld; $this->filesOld[] = $source; $this->addData( $source , $target->revtag ); + } } else { @@ -204,3 +213,16 @@ private function saveRevcheckData() $this->parseTranslationXml(); } } + +function TestFixedHashMinusTwo($filename, $hash) :bool +{ + assert( FIXED_SKIP_REVCHECK ); // if deleted, delete entire funciont. + + // See mentions of FIXED_SKIP_REVCHECK on all.php for an explanation + + $cwd = getcwd(); + chdir( 'en' ); + $hashes = explode ( "\n" , `git log -2 --format=%H -- {$filename}` ); + chdir( $cwd ); + return ( $hashes[1] == $hash ); // $trFile->hash +} \ No newline at end of file diff --git a/scripts/translation/lib/all.php b/scripts/translation/lib/all.php index 5f56bfa4a..ecfa07b56 100644 --- a/scripts/translation/lib/all.php +++ b/scripts/translation/lib/all.php @@ -21,6 +21,9 @@ ini_set( 'display_startup_errors' , 1 ); error_reporting( E_ALL ); +const LOOSE_SKIP_REVCHECK = true; // See https://github.com/php/doc-base/pull/132 , issue 1 +const FIXED_SKIP_REVCHECK = true; // See https://github.com/php/doc-base/pull/132 , issue 2 + require_once __DIR__ . '/CacheFile.php'; require_once __DIR__ . '/CacheUtil.php'; require_once __DIR__ . '/GitDiffParser.php'; @@ -35,3 +38,133 @@ require_once __DIR__ . '/RevcheckRun.php'; require_once __DIR__ . '/RevtagParser.php'; require_once __DIR__ . '/XmlUtil.php'; + + + +if ( LOOSE_SKIP_REVCHECK ) { $description = <<<'TEXT' + +Consider the output of: git show f80105b4fc1196bd8d5fecb98d686b580b1ff65d + +``` +commit f80105b4fc1196bd8d5fecb98d686b580b1ff65d + + Remove constant tag from literal values (#3251) + + * Remove constant tag from literal values + + * [skip-revcheck] Fix whitespace + +diff --git a/appendices/filters.xml b/appendices/filters.xml +index 59a4735de1..06e0a7276e 100644 +--- a/appendices/filters.xml ++++ b/appendices/filters.xml +@@ -302,7 +302,7 @@ fclose($fp); + window is the base-2 log of the compression loopback window size. + Higher values (up to 15 -- 32768 bytes) yield better compression at a cost of memory, + while lower values (down to 9 -- 512 bytes) yield worse compression in a smaller memory footprint. +- Default window size is currently 15. ++ Default window size is currently 15. + + memory is a scale indicating how much work memory should be allocated. + Valid values range from 1 (minimal allocation) to 9 (maximum allocation). This memory allocation +diff --git a/install/fpm/configuration.xml b/install/fpm/configuration.xml +index 9baaf43d6f..a34700ef97 100644 +--- a/install/fpm/configuration.xml ++++ b/install/fpm/configuration.xml +@@ -805,109 +805,109 @@ + + + +- %C ++ %C + + %CPU + + +``` + +This commit must be tracked in translations? In other words, this commit +should mark the various files changed as outdated in translations? + +The current implementation on doc-base/revcheck.php would *ignore* this +commit, *not* marking these files as outdated on translations. + +This is because the code searches for '[skip-revcheck]' in any position [1], +and in any lile [2] of commit messages. + +[1] https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L304 +[2] https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L302 + +The problem, here, is that this commit on doc-en was squashed, and by so, all +individual commit messages are concatenated in one commit message. + +``` + Remove constant tag from literal values (#3251) + * Remove constant tag from literal values + * [skip-revcheck] Fix whitespace +``` + +The solution proposed is to check for '[skip-revcheck]' mark only at the +starting of the first line on commit messages, so future squashed commits +do not cause files modifications being untracked on translations. + +TEXT; } + + + +if ( FIXED_SKIP_REVCHECK ) { $description = <<<'TEXT' + +Consider the output of: git log --oneline -- reference/ds/ds.deque.xml +``` +4d17 [skip-revcheck] Convert class markup to be compatible with DocBook 5.2 +6cec [skip-revcheck] Normalize &Constants; and &Methods; usage (#2703) +b2a2 These should include the namesapce +120c Document ArrayAccess in PHP-DS +``` + +The last two commits, each one, will mark all their included files as old +in translations, as the commit message does not contain '[skip-revcheck]'. + +The commit 6cec, marked [skip-revcheck], will not mark any file as outdated. + +The commit 4d17, marked [skip-revcheck], will mark all its files as outdated, +needing to be updated to 6cec. + +See the difference in behaviour between two individual commits marked +'[skip-revcheck]'? +That 6cec commit is also marked '[skip-revcheck]' is +incidental. This discrepancy occurs in any sequence of commits +marked '[skip-revcheck]'. + +When the revcheck code, as now, detects that the topmost commit hash contains an +'[skip-revcheck]', it ignores this topmost commit hash, and then selects the fixed +'-2' commit hash as a base of comparison, when the file is calculated as old. + +See: +- Oldness test: https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L362 +- Topmost skip: https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L380 +- Hash -2:      https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L384 + +The output of -2 test, as now, is: +``` +4d17b7b4947e7819ff5036715dd706be87ae4def +6ceccac7860f382f16ac1407baf54f656e85ca0b +``` + +The code linked above splits the results on the new line, and compares the revtag +hash against the second line, 6cec in this case. But 6cec is itself marked as an +'[skip-revcheck]'. So an [skip-revcheck] is bumping all its file hashes into +another [skip-revcheck] commit hash... + +The proposed solution is to removing the use of the fixed -2 topmost hash when +the topmost hash is marked [skip-revcheck] into ignoring any topmost commit +hash marked [skip-revcheck], and thus selecting as an alternative comparison +hash the first topmost hash not marked as [skip-revcheck]. + +In this case, b2a2. + +So any future sequence of [skip-revcheck] commits does not cause the bumping +of hashes in all translations in the presence of a sequence of [skip-revcheck] +commits. + +TEXT; } \ No newline at end of file From 600714c933947690d1a9a0c98474b3cb2b24b0d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Fri, 1 Nov 2024 14:36:42 -0300 Subject: [PATCH 04/22] Additional revcheck data. --- scripts/translation/lib/RevcheckData.php | 9 ++++++--- scripts/translation/lib/RevcheckRun.php | 18 +++++++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php index e151b53bc..2a0bf846f 100644 --- a/scripts/translation/lib/RevcheckData.php +++ b/scripts/translation/lib/RevcheckData.php @@ -32,9 +32,12 @@ enum RevcheckStatus : string class RevcheckData { - public $translators = array(); // nick, RevcheckDataTranslator - public $fileSummary = array(); // RevcheckStatus, int - public $fileDetail = array(); // filename, RevcheckDataFile + public string $lang = ""; + public string $date = ""; + public string $intro = ""; + public $translators = array(); // nick => RevcheckDataTranslator + public $fileSummary = array(); // RevcheckStatus => int + public $fileDetail = array(); // filename => RevcheckDataFile public function __construct() { diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index ce9369dd3..ea4bad5e3 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -37,7 +37,7 @@ class RevcheckRun public array $qaList = []; public RevcheckData $revData; - function __construct( string $sourceDir , string $targetDir , bool $writeResults = true ) + function __construct( string $sourceDir , string $targetDir , bool $writeResults = false ) { $this->sourceDir = $sourceDir; $this->targetDir = $targetDir; @@ -66,6 +66,8 @@ function __construct( string $sourceDir , string $targetDir , bool $writeResults private function calculateStatus() { $this->revData = new RevcheckData; + $this->revData->lang = $this->targetDir; + $this->revData->date = date("r"); // All status are marked in source files, // except NotInEnTree, that are marked on target. @@ -133,9 +135,9 @@ private function calculateStatus() } else { - $source->status = RevcheckStatus::TranslatedOld; - $this->filesOld[] = $source; - $this->addData( $source , $target->revtag ); + $source->status = RevcheckStatus::TranslatedOld; + $this->filesOld[] = $source; + $this->addData( $source , $target->revtag ); } } else @@ -196,8 +198,12 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul private function parseTranslationXml() : void { $xml = XmlUtil::loadFile( $this->targetDir . '/translation.xml' ); - $persons = $xml->getElementsByTagName( 'person' ); + $this->revData->intro = + $xml->getElementsByTagName( 'intro' )[0]->textContent + ?? "No intro available for the {$lang} translation of the manual."; + + $persons = $xml->getElementsByTagName( 'person' ); foreach( $persons as $person ) { $nick = $person->getAttribute( 'nick' ); @@ -211,6 +217,8 @@ private function parseTranslationXml() : void private function saveRevcheckData() { $this->parseTranslationXml(); + $json = json_encode( $this->revData , JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT ); + file_put_contents( __DIR__ . "/../../../.revcheck.json" , $json ); } } From 221e0dcad476fc9444d6b2d7705928de091e27b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Fri, 1 Nov 2024 14:40:53 -0300 Subject: [PATCH 05/22] Revcheck deduplication: clean up, header. --- .gitignore | 1 + scripts/revcheck.php | 456 ++----------------------------------------- 2 files changed, 21 insertions(+), 436 deletions(-) diff --git a/.gitignore b/.gitignore index 528fe4b73..afc9e7141 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Files generated by the configure script .manual.xml +.revcheck.json install-unix.xml install-win.xml manual.xml diff --git a/scripts/revcheck.php b/scripts/revcheck.php index aba286215..02eab209c 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -20,6 +20,8 @@ +----------------------------------------------------------------------+ */ +require_once __DIR__ . '/translation/lib/all.php'; + if ( $argc != 2 ) { print << Lang -$root = getcwd(); $lang = $argv[1]; +fwrite( STDERR , "TODO\n" ); // FAST +//$data = new RevcheckRun( 'en' , $argv[1] ); +$data = unserialize( file_get_contents ( "FAST" ) ); +$data = $data->revData; -$gitData = []; // filename lang hash - -$intro = "No intro available for the {$lang} translation of the manual."; - -$oldfiles = []; //path, name, size - -$enFiles = populateFileTree( 'en' ); -$trFiles = populateFileTree( $lang ); -captureGitValues( 'en' , $gitData ); - -computeSyncStatus( $enFiles , $trFiles , $gitData , $lang ); -$translators = computeTranslatorStatus( $lang, $enFiles, $trFiles ); - -print_html_all( $enFiles , $trFiles , $translators, $lang ); - -// Model -class OldFilesInfo -{ - public $path; - public $name; - public $size; - - public function getKey() - { - return trim( $this->path . '/' . $this->name , '/' ); - } -} - -class FileStatusEnum -{ - const Untranslated = 'Untranslated'; - const RevTagProblem = 'RevTagProblem'; - const TranslatedWip = 'TranslatedWip'; - const TranslatedOk = 'TranslatedOk'; - const TranslatedOld = 'TranslatedOld'; - const NotInEnTree = 'NotInEnTree'; -} - -class FileStatusInfo -{ - public $path; - public $name; - public $size; - public $hash; - public $skip; - public $days; - public $adds; - public $dels; - public $syncStatus; - public $maintainer; - public $completion; - public $credits; - - public function getKey() - { - return trim( $this->path . '/' . $this->name , '/' ); - } -} - -class TranslatorInfo -{ - public $name; - public $email; - public $nick; - public $vcs; - - public $files_uptodate; - public $files_outdated; - public $files_wip; - public $files_sum; - public $files_other; - - public function __construct() { - $this->files_uptodate = 0; - $this->files_outdated = 0; - $this->files_wip = 0; - $this->files_sum = 0; - $this->files_other = 0; - } - - public static function getKey( $fileStatus ) { - switch ( $fileStatus ) { - case FileStatusEnum::RevTagProblem: - case FileStatusEnum::TranslatedOld: - return "files_outdated"; - break; - case FileStatusEnum::TranslatedWip: - return "files_wip"; - break; - case FileStatusEnum::TranslatedOk: - return "files_uptodate"; - break; - default: - return "files_other"; - } - } -} - -function populateFileTree( $lang ) -{ - $dir = new \DirectoryIterator( $lang ); - if ( $dir === false ) - { - print "$lang is not a directory.\n"; - exit; - } - $cwd = getcwd(); - $ret = array(); - chdir( $lang ); - populateFileTreeRecurse( $lang , "." , $ret ); - chdir( $cwd ); - return $ret; -} - -function populateFileTreeRecurse( $lang , $path , & $output ) -{ - global $oldfiles; - $dir = new DirectoryIterator( $path ); - if ( $dir === false ) - { - print "$path is not a directory.\n"; - exit; - } - $todoPaths = []; - $trimPath = ltrim( $path , "./"); - foreach( $dir as $entry ) - { - $filename = $entry->getFilename(); - if ( $filename[0] == '.' ) - continue; - if ( substr( $filename , 0 , 9 ) == "entities." ) - continue; - if ( $entry->isDir() ) - { - $todoPaths[] = $path . '/' . $entry->getFilename(); - continue; - } - if ( $entry->isFile() ) - { - $ignoredFileNames = [ - 'README.md', - 'translation.xml', - 'readme.first', - 'license.xml', - 'extensions.xml', - 'versions.xml', - 'book.developer.xml', - 'contributors.ent', - 'contributors.xml', - 'README', - 'DO_NOT_TRANSLATE', - 'rsusi.txt', - 'missing-ids.xml', - ]; - - $ignoredDirectories = [ - 'chmonly', - ]; - - $ignoredFullPaths = [ - 'appendices/reserved.constants.xml', - 'appendices/extensions.xml', - 'reference/datetime/timezones.xml', - ]; - - if( - in_array($trimPath, $ignoredDirectories, true) - || in_array($filename, $ignoredFileNames, true) - || (strpos($filename, 'entities.') === 0) - || !in_array(substr($filename, -3), ['xml','ent'], true) - || (substr($filename, -13) === 'PHPEditBackup') - || (in_array($trimPath . '/' .$filename, $ignoredFullPaths, true)) - ) { - continue; - } - $file = new FileStatusInfo; - $file->path = $trimPath; - $file->name = $filename; - $file->size = filesize( $path . '/' . $filename ); - $file->syncStatus = null; - if ( $lang != 'en' ) - { - parseRevisionTag( $entry->getPathname() , $file ); - $path_en = '../en/' . $trimPath . '/' . $filename; - if( !is_file($path_en) ) //notinen - { - $oldfile = new OldFilesInfo; - $oldfile->path = $trimPath; - $oldfile->name = $filename; - $oldfile->size = $file->size < 1024 ? 1 : floor( $file->size / 1024 ); - $oldfiles[ $oldfile->getKey() ] = $oldfile; - } else { - $output[ $file->getKey() ] = $file; - } - } else { - $output[ $file->getKey() ] = $file; - } - } - } - sort( $todoPaths ); - foreach( $todoPaths as $path ) - populateFileTreeRecurse( $lang , $path , $output ); -} - -function parseRevisionTag( $filename , FileStatusInfo $file ) -{ - $fp = fopen( $filename , "r" ); - $contents = fread( $fp , 1024 ); - fclose( $fp ); - - // No match before the preg - $match = array (); - - $regex = "''U"; - if (preg_match ($regex , $contents , $match )) { - $file->hash = trim( $match[1] ); - $file->maintainer = trim( $match[2] ); - $file->completion = trim( $match[3] ); - } - if ( $file->hash == null or strlen( $file->hash ) != 40 or - $file->maintainer == null or - $file->completion == null ) - $file->syncStatus = FileStatusEnum::RevTagProblem; - - $regex = "//U"; - $match = array(); - preg_match ( $regex , $contents , $match ); - if ( count( $match ) == 2 ) - $file->credits = str_replace( ' ' , '' , trim( $match[1] ) ); - else - $file->credits = ''; -} - -function captureGitValues( $lang , & $output ) -{ - $cwd = getcwd(); - chdir( $lang ); - $fp = popen( "git --no-pager log --name-only" , "r" ); - $hash = $additions = $deletions = $filename = null; - $skip = false; - while ( ( $line = fgets( $fp ) ) !== false ) - { - if ( substr( $line , 0 , 7 ) == "commit " ) - { - $hash = trim( substr( $line , 7 ) ); - $skip = false; - continue; - } - if ( strpos( $line , 'Date:' ) === 0 ) - continue; - if ( trim( $line ) == "" ) - continue; - if ( substr( $line , 0 , 4 ) == ' ' ) - { - if ( stristr( $line, '[skip-revcheck]' ) !== false ) - $skip = true; - continue; - } - if ( strpos( $line , ': ' ) > 0 ) - continue; - $filename = trim( $line ); - if ( isset( $output[$filename][$lang] ) ) - continue; - - $output[$filename][$lang]['hash'] = $hash; - $output[$filename][$lang]['skip'] = $skip; - } - pclose( $fp ); - chdir( $cwd ); -} - -function computeSyncStatus( $enFiles , $trFiles , $gitData , $lang ) -{ - foreach( $trFiles as $filename => $trFile ) - { - // notinen - $path_en = 'en/' . $trFile->path . '/' . $trFile->name; - if( !is_file($path_en) ) - { - $trFile->syncStatus = FileStatusEnum::NotInEnTree; - continue; - } - - } - foreach( $enFiles as $filename => $enFile ) - { - if ( isset( $gitData[ $filename ]['en'] ) ) - { - $enFile->hash = $gitData[ $filename ]['en']['hash']; - $enFile->skip = $gitData[ $filename ]['en']['skip']; - } - else - print "Warn: No hash for en/$filename
"; - - $trFile = isset( $trFiles[ $filename ] ) ? $trFiles[ $filename ] : null; - - if ( $trFile == null ) // Untranslated - { - $enFile->syncStatus = FileStatusEnum::Untranslated; - continue; - } - if ( $trFile->syncStatus == FileStatusEnum::RevTagProblem ) - continue; - - // TranslatedOk - // TranslatedOld - if ( strlen( $trFile->hash ) == 40 ) - { - if ( $enFile->hash == $trFile->hash ) - $trFile->syncStatus = FileStatusEnum::TranslatedOk; - else - { - $trFile->syncStatus = FileStatusEnum::TranslatedOld; - - $cwd = getcwd(); - chdir( 'en' ); - //adds,dels - $subject = `git diff --numstat $trFile->hash -- {$filename}`; - if ( $subject ) - { - preg_match('/(\d+)\s+(\d+)/', $subject, $matches); - if ($matches) - [, $enFile->adds, $enFile->dels] = $matches; - } - //days - $days = `git show --no-patch --format='%ct' $enFile->hash -- {$filename}`; - if ( $days != "" ) - $enFile->days = floor( ( time() - $days ) / 86400 ); - chdir( $cwd ); - - if ( $enFile->skip ) - { - $cwd = getcwd(); - chdir( 'en' ); - $hashes = explode ( "\n" , `git log -2 --format=%H -- {$filename}` ); - chdir( $cwd ); - if ( $hashes[1] == $trFile->hash ) - $trFile->syncStatus = FileStatusEnum::TranslatedOk; - } - } - } - // TranslatedWip - if ( $trFile->completion != null && $trFile->completion != "ready" ) - $trFile->syncStatus = FileStatusEnum::TranslatedWip; - } -} - -function parse_attr_string ( $tags_attrs ) { - $tag_attrs_processed = array(); - - foreach($tags_attrs as $attrib_list) { - preg_match_all("!(.+)=\\s*([\"'])\\s*(.+)\\2!U", $attrib_list, $attribs); - - $attrib_array = array(); - foreach ($attribs[1] as $num => $attrname) { - $attrib_array[trim($attrname)] = trim($attribs[3][$num]); - } - - $tag_attrs_processed[] = $attrib_array; - } - - return $tag_attrs_processed; -} - -function computeTranslatorStatus( $lang, $enFiles, $trFiles ) { - global $intro; - $translation_xml = getcwd() . "/" . $lang . "/translation.xml"; - if (!file_exists($translation_xml)) { - return []; - } - - $txml = join("", file($translation_xml)); - $txml = preg_replace("/\\s+/", " ", $txml); - - preg_match("!(.+)!s", $txml, $match); - $intro = trim($match[1]); - - preg_match("!<\?xml(.+)\?>!U", $txml, $match); - $xmlinfo = parse_attr_string($match); - $output_charset = $xmlinfo[1]["encoding"]; - - $pattern = "!!U"; - preg_match_all($pattern, $txml, $matches); - $translators = parse_attr_string($matches[1]); - - $translatorInfos = []; - $unknownInfo = new TranslatorInfo(); - $unknownInfo->nick = "unknown"; - $translatorInfos["unknown"] = $unknownInfo; - - foreach ($translators as $key => $translator) { - $info = new TranslatorInfo(); - $info->name = $translator["name"]; - $info->email = $translator["email"]; - $info->nick = $translator["nick"]; - $info->vcs = $translator["vcs"] ?? ""; - - $translatorInfos[$info->nick] = $info; - } - - foreach( $enFiles as $key => $enFile ) { - $info_exists = false; - if (array_key_exists($enFile->getKey(), $trFiles)) { - $trFile = $trFiles[$enFile->getKey()]; - $statusKey = TranslatorInfo::getKey($trFile->syncStatus); - if (array_key_exists($trFile->maintainer, $translatorInfos)) { - $translatorInfos[$trFile->maintainer]->$statusKey++; - $translatorInfos[$trFile->maintainer]->files_sum++; - $info_exists = true; - } - } - if (!$info_exists) { - $translatorInfos["unknown"]->$statusKey++; - $translatorInfos["unknown"]->files_sum++; - } - } - - return $translatorInfos; -} +print_html_all( $data ); // Output -function print_html_all( $enFiles , $trFiles , $translators , $lang ) +function print_html_all( $data ) { - print_html_header( $lang ); - print_html_translators($translators , $enFiles, $trFiles); - print_html_files( $enFiles , $trFiles , $lang ); - print_html_notinen(); - print_html_misstags( $enFiles, $trFiles, $lang ); - print_html_untranslated( $enFiles ); - print_html_footer(); + print_html_header( $data ); + //print_html_translators($translators , $enFiles, $trFiles); + //print_html_files( $enFiles , $trFiles , $lang ); + //print_html_notinen(); + //print_html_misstags( $enFiles, $trFiles, $lang ); + //print_html_untranslated( $enFiles ); + //print_html_footer(); } -function print_html_header( $lang ) +function print_html_header( $data ) { - $date = date("r"); + $lang = $data->lang; + $date = $data->date; print << @@ -512,15 +99,12 @@ function print_html_header( $lang )

Status of the translated PHP Manual

Generated: $date / Language: $lang

- HTML; } - function print_html_menu($href) { print <<

Introduction | Translators From 83fb6515adf6be2e34dc8c29a435e47bf4fd45a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Fri, 1 Nov 2024 16:12:53 -0300 Subject: [PATCH 06/22] Revcheck deduplication: translators, file summary. --- scripts/revcheck.php | 158 +++++++++-------------- scripts/translation/lib/RevcheckData.php | 10 +- scripts/translation/lib/RevcheckRun.php | 15 ++- 3 files changed, 77 insertions(+), 106 deletions(-) diff --git a/scripts/revcheck.php b/scripts/revcheck.php index 02eab209c..f5dcbe50b 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -48,6 +48,13 @@ $lang = $argv[1]; fwrite( STDERR , "TODO\n" ); // FAST //$data = new RevcheckRun( 'en' , $argv[1] ); + +fwrite( STDERR , "TODO\n" ); // FAST +if ( ! file_exists( "FAST" ) ) +{ + $data = new RevcheckRun( 'en' , $argv[1] ); + file_put_contents( "FAST" , serialize( $data ) ); +} $data = unserialize( file_get_contents ( "FAST" ) ); $data = $data->revData; @@ -58,7 +65,7 @@ function print_html_all( $data ) { print_html_header( $data ); - //print_html_translators($translators , $enFiles, $trFiles); + print_html_translators( $data ); //print_html_files( $enFiles , $trFiles , $lang ); //print_html_notinen(); //print_html_misstags( $enFiles, $trFiles, $lang ); @@ -117,14 +124,16 @@ function print_html_menu($href) HTML; } -function print_html_translators( $translators , $enFiles, $trFiles ) +function print_html_translators( $data ) { - global $intro, $oldfiles, $files_misstags, $notinen_count, $files_untranslated; - if (count($translators) === 0) return; + $translators = $data->translators; + if ( count( $translators ) == 0 ) + return; + print_html_menu("intro"); print << - $intro + {$data->intro}

@@ -136,83 +145,44 @@ function print_html_translators( $translators , $enFiles, $trFiles ) - + HTML; - $files_uptodate = 0; - $files_outdated = 0; - $files_wip = 0; - $files_sum = 0; - foreach( $translators as $key => $person ) + $totalOk = 0; + $totalOld = 0; + $totalWip = 0; + + foreach( $translators as $person ) { - if ($person->nick === "unknown") continue; + // Unknown or untracked on translations.xml + if ( $person->name == "" && $person->email == "" && $person->vcs == "" ) + continue; - $files_uptodate += $person->files_uptodate; - $files_outdated += $person->files_outdated; - $files_wip += $person->files_wip; - $files_sum += $person->files_sum; - print <<filesUpdate; + $totalOld += $person->filesOld; + $totalWip += $person->filesWip; + + $personSum = $person->filesUpdate + $person->filesOld + $person->filesWip; + print << - - - - - + + + + - HTML; - } print "
Files maintained
upto-
date
upd old wip sum
{$person->name} {$person->email} {$person->nick} {$person->vcs}{$person->files_uptodate}{$person->files_outdated}{$person->files_wip}{$person->files_sum}{$person->filesUpdate}{$person->filesOld}{$person->filesWip}{$personSum}
\n"; -//FILE SUMMARY - $count = 0; - $files_outdated = 0; - $files_sum = 0; - $files_uptodate = 0; - $files_misstags = 0; - $files_wip = 0; - foreach( $trFiles as $key => $tr ) - { - if ( $tr->syncStatus == FileStatusEnum::TranslatedOld ) - $files_outdated++; - if ( $tr->syncStatus == FileStatusEnum::TranslatedOk ) - $files_uptodate++; - if ( $tr->syncStatus == FileStatusEnum::RevTagProblem ) - $files_misstags++; - if ( $tr->syncStatus == FileStatusEnum::TranslatedWip ) - $files_wip++; - } - $files_untranslated = 0; - foreach( $enFiles as $key => $en ) - { - if ( $en->syncStatus == FileStatusEnum::Untranslated ) { - $files_untranslated++; - } - $count++; - } - $notinen_count = 0; - foreach( $oldfiles as $key => $en ) - { - if ( $key == "{$en->path}/{$en->name}" ) { - $notinen_count++; - } - } - $files_uptodate_percent = number_format($files_uptodate * 100 / $count, 2 ); - $files_outdated_percent = number_format($files_outdated * 100 / $count, 2 ); - $files_wip_percent = number_format($files_wip * 100 / $count, 2 ); - $files_untranslated_percent = number_format($files_untranslated * 100 / $count, 2 ); - $notinen_count_percent = number_format($notinen_count * 100 / $count, 2 ); - $files_misstags_percent = number_format($files_misstags * 100 / $count, 2 ); print_html_menu("filesummary"); print << @@ -221,41 +191,41 @@ function print_html_translators( $translators , $enFiles, $trFiles ) Number of files Percent of files +HTML; + + $filesTotal = 0; + foreach ( $data->fileSummary as $count ) + $filesTotal += $count; + + foreach( RevcheckStatus::cases() as $key ) + { + $label = ""; + $count = $data->fileSummary[ $key->value ]; + $perc = number_format( $count / $filesTotal * 100 , 2 ) . "%"; + switch( $key ) + { + case RevcheckStatus::TranslatedOk: $label = "Up to date files"; break; + case RevcheckStatus::TranslatedOld: $label = "Outdated files"; break; + case RevcheckStatus::TranslatedWip: $label = "Work in progress"; break; + case RevcheckStatus::RevTagProblem: $label = "Revision tag missing/problem"; break; + case RevcheckStatus::NotInEnTree: $label = "Not in EN tree"; break; + case RevcheckStatus::Untranslated: $label = "Available for translation"; break; + } + + print << - Up to date files - $files_uptodate - $files_uptodate_percent% - - - Outdated files - $files_outdated - $files_outdated_percent% - - - Work in progress - $files_wip - $files_wip_percent% - - - Files without revision number - $files_misstags - $files_misstags_percent% - - - Not in EN tree - $notinen_count - $notinen_count_percent% + $label + $count + $perc +HTML; + } + print << - Files available for translation - $files_untranslated - $files_untranslated_percent% + $label + $count + $perc - - Files total - $count - 100% -

HTML; } diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php index 2a0bf846f..f2d1af324 100644 --- a/scripts/translation/lib/RevcheckData.php +++ b/scripts/translation/lib/RevcheckData.php @@ -25,9 +25,9 @@ enum RevcheckStatus : string case TranslatedOk = 'TranslatedOk'; case TranslatedOld = 'TranslatedOld'; case TranslatedWip = 'TranslatedWip'; - case Untranslated = 'Untranslated'; case RevTagProblem = 'RevTagProblem'; case NotInEnTree = 'NotInEnTree'; + case Untranslated = 'Untranslated'; } class RevcheckData @@ -66,10 +66,10 @@ public function getTranslator( string $nick ) class RevcheckDataTranslator { - public string $name; - public string $email; - public string $nick; - public string $vcs; + public string $name = ""; + public string $email = ""; + public string $nick = ""; + public string $vcs = ""; public int $filesUpdate = 0; public int $filesOld = 0; diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index ea4bad5e3..daa4adeaf 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -53,6 +53,7 @@ function __construct( string $sourceDir , string $targetDir , bool $writeResults RevtagParser::parseInto( $targetDir , $this->targetFiles ); // match and mix + $this->parseTranslationXml(); $this->calculateStatus(); // fs output @@ -65,12 +66,9 @@ function __construct( string $sourceDir , string $targetDir , bool $writeResults private function calculateStatus() { - $this->revData = new RevcheckData; - $this->revData->lang = $this->targetDir; - $this->revData->date = date("r"); - - // All status are marked in source files, - // except NotInEnTree, that are marked on target. + // Most of status are marked $sourceFiles, + // except NotInEnTree, that are marked on $targetFiles. + // $revData contains all status foreach( $this->sourceFiles->iterator() as $source ) { @@ -197,6 +195,10 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul private function parseTranslationXml() : void { + $this->revData = new RevcheckData; + $this->revData->lang = $this->targetDir; + $this->revData->date = date("r"); + $xml = XmlUtil::loadFile( $this->targetDir . '/translation.xml' ); $this->revData->intro = @@ -216,7 +218,6 @@ private function parseTranslationXml() : void private function saveRevcheckData() { - $this->parseTranslationXml(); $json = json_encode( $this->revData , JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT ); file_put_contents( __DIR__ . "/../../../.revcheck.json" , $json ); } From 3b62b38d05777b236102fbfdb31137de40618fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Tue, 5 Nov 2024 12:31:33 -0300 Subject: [PATCH 07/22] Revcheck deduplication: old/wip files. --- scripts/revcheck.php | 200 ++++++++++++----------- scripts/translation/lib/RevcheckData.php | 9 +- scripts/translation/lib/RevcheckRun.php | 4 + 3 files changed, 114 insertions(+), 99 deletions(-) diff --git a/scripts/revcheck.php b/scripts/revcheck.php index f5dcbe50b..fb6d6121e 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -62,18 +62,18 @@ // Output -function print_html_all( $data ) +function print_html_all( RevcheckData $data ) { print_html_header( $data ); print_html_translators( $data ); - //print_html_files( $enFiles , $trFiles , $lang ); + print_html_oldwip( $data ); //print_html_notinen(); //print_html_misstags( $enFiles, $trFiles, $lang ); //print_html_untranslated( $enFiles ); //print_html_footer(); } -function print_html_header( $data ) +function print_html_header( RevcheckData $data ) { $lang = $data->lang; $date = $data->date; @@ -109,7 +109,7 @@ function print_html_header( $data ) HTML; } -function print_html_menu($href) +function print_html_menu( string $href ) { print << @@ -124,7 +124,7 @@ function print_html_menu($href) HTML; } -function print_html_translators( $data ) +function print_html_translators( RevcheckData $data ) { $translators = $data->translators; if ( count( $translators ) == 0 ) @@ -152,20 +152,12 @@ function print_html_translators( $data ) HTML; - $totalOk = 0; - $totalOld = 0; - $totalWip = 0; - foreach( $translators as $person ) { // Unknown or untracked on translations.xml if ( $person->name == "" && $person->email == "" && $person->vcs == "" ) continue; - $totalOk += $person->filesUpdate; - $totalOld += $person->filesOld; - $totalWip += $person->filesWip; - $personSum = $person->filesUpdate + $person->filesOld + $person->filesWip; print << HTML; } - print << - $label - $count - $perc + Files total + $filesTotal + 100% + +HTML; +} + +function print_html_oldwip( RevcheckData $data ) +{ + print_html_menu("files"); + + $total = $data->fileSummary[ RevcheckStatus::TranslatedOld->value ]; + $total += $data->fileSummary[ RevcheckStatus::TranslatedWip->value ]; + $total += $data->fileSummary[ RevcheckStatus::RevTagProblem->value ]; + if ( $total == 0 ) + { + print "

Hooray! There is no files to update, nice work!

\n\n"; + return; + } + + print << + + Translated file + Changes + Hash + Maintainer + Status + Days + + + en + {$data->lang}} + \n +HTML; + + $now = new DateTime( 'now' ); + $path = null; + asort( $data->fileDetail ); + + foreach( $data->fileDetail as $key => $file ) + { + switch ( $file->status ) + { + case RevcheckStatus::TranslatedOld: + case RevcheckStatus::TranslatedWip: + case RevcheckStatus::RevTagProblem: + break; + default: + continue 2; + } + + if ( $path !== $file->path ) + { + $path = $file->path; + $path2 = $path == '' ? '/' : $path; + print " $path2"; + } + + $ma = $file->maintainer; + $st = $file->completion; + $ll = strtolower( $data->lang ); + $kh = hash( 'sha256' , $key ); + $d1 = "https://doc.php.net/revcheck.php?p=plain&lang={$ll}&hbp={$file->hashRvtg}&f=$key"; + $d2 = "https://doc.php.net/revcheck.php?p=plain&lang={$ll}&hbp={$file->hashRvtg}&f=$key&c=on"; + + $nm = "{$file->name} [colored]"; + if ( $file->status == RevcheckStatus::RevTagProblem ) // $file->hashRvtg empty or invalid + $nm = $file->name; + $h1 = "{$file->hashLast}"; + $h2 = "{$file->hashRvtg}"; + + if ( $file->adds > 0 || $file->dels > 0 ) + $ch = "+{$file->adds} -{$file->dels}"; + else + $ch = ""; + + $bgdays = ''; + if ( $file->days > 90 ) + $bgdays = 'bgorange'; + + print << + $nm + $ch + + $h1 + + $h2 + $ma + $st + {$file->days} + \n HTML; + } + + print "

\n\n"; } function print_html_misstags( $enFiles, $trFiles, $lang ) @@ -330,86 +416,6 @@ function print_html_footer() HTML; } - -function print_html_files( $enFiles , $trFiles , $lang ) -{ - print_html_menu("files"); - print << - - Translated file - Changes - Hash - Maintainer - Status - Days - - - en - $lang - - -HTML; - - $now = new DateTime( 'now' ); - $path = null; - asort($trFiles); - foreach( $trFiles as $key => $tr ) - { - if ( $tr->syncStatus == FileStatusEnum::TranslatedOk ) - continue; - if ( $tr->syncStatus == FileStatusEnum::RevTagProblem ) - continue; - if ( $tr->syncStatus == FileStatusEnum::NotInEnTree ) - continue; - $en = $enFiles[ $key ]; - if ( $en->syncStatus == FileStatusEnum::Untranslated ) - continue; - - if ( $path !== $en->path ) - { - $path = $en->path; - $path2 = $path == '' ? '/' : $path; - print " $path2"; - } - $ll = strtolower( $lang ); - $kh = hash( 'sha256' , $key ); - $d1 = "https://doc.php.net/revcheck.php?p=plain&lang={$ll}&hbp={$tr->hash}&f=$key&c=on"; - $d2 = "https://doc.php.net/revcheck.php?p=plain&lang={$ll}&hbp={$tr->hash}&f=$key&c=off"; - $nm = "{$en->name} [colored]"; - if ( $en->syncStatus == FileStatusEnum::RevTagProblem ) - $nm = $en->name; - $h1 = "{$en->hash}"; - $h2 = "{$tr->hash}"; - - $bgdays = ''; - if ($en->days != null && $en->days > 90) - $bgdays = 'bgorange'; - - if ($en->adds != null) - $ch = "+{$en->adds} -{$en->dels}"; - else - $ch = "no data"; - - $ma = $tr->maintainer; - $st = $tr->completion; - print << - $nm - $ch - - $h1 - - $h2 - $ma - $st - {$en->days} - -HTML; - } -print "

\n"; -} - function print_html_notinen() { global $oldfiles, $notinen_count; diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php index f2d1af324..6a775c840 100644 --- a/scripts/translation/lib/RevcheckData.php +++ b/scripts/translation/lib/RevcheckData.php @@ -82,9 +82,14 @@ class RevcheckDataFile public string $name; public int $size; public int $days; + public int $adds = 0; + public int $dels = 0; public RevcheckStatus $status; + public string $maintainer = ""; + public string $completion = ""; - public string $hashLast; // The most recent commit hash, skipped or not - public string $hashDiff; // The most recent, non [skip-revcheck] commit hash + public string $hashLast; // The most recent commit hash, skipped or not + public string $hashDiff; // The most recent, non [skip-revcheck] commit hash + public string $hashRvtg = ""; // Revtag hash, if any } diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index daa4adeaf..3c5bda9db 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -176,6 +176,10 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul if ( $revtag != null ) { + $file->hashRvtg = $revtag->revision; + $file->maintainer = $revtag->maintainer; + $file->completion = $revtag->status; + $translator = $this->revData->getTranslator( $revtag->maintainer ); switch( $info->status ) From 156b0b5960bdf6479ccadfece8f080b79aec4600 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Tue, 5 Nov 2024 14:21:07 -0300 Subject: [PATCH 08/22] Move backport code and docs to a separate file. --- scripts/translation/lib/all.php | 136 +--------------------- scripts/translation/lib/backport.php | 168 +++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 134 deletions(-) create mode 100644 scripts/translation/lib/backport.php diff --git a/scripts/translation/lib/all.php b/scripts/translation/lib/all.php index ecfa07b56..e404a9a83 100644 --- a/scripts/translation/lib/all.php +++ b/scripts/translation/lib/all.php @@ -21,9 +21,7 @@ ini_set( 'display_startup_errors' , 1 ); error_reporting( E_ALL ); -const LOOSE_SKIP_REVCHECK = true; // See https://github.com/php/doc-base/pull/132 , issue 1 -const FIXED_SKIP_REVCHECK = true; // See https://github.com/php/doc-base/pull/132 , issue 2 - +require_once __DIR__ . '/backport.php'; require_once __DIR__ . '/CacheFile.php'; require_once __DIR__ . '/CacheUtil.php'; require_once __DIR__ . '/GitDiffParser.php'; @@ -37,134 +35,4 @@ require_once __DIR__ . '/RevcheckIgnore.php'; require_once __DIR__ . '/RevcheckRun.php'; require_once __DIR__ . '/RevtagParser.php'; -require_once __DIR__ . '/XmlUtil.php'; - - - -if ( LOOSE_SKIP_REVCHECK ) { $description = <<<'TEXT' - -Consider the output of: git show f80105b4fc1196bd8d5fecb98d686b580b1ff65d - -``` -commit f80105b4fc1196bd8d5fecb98d686b580b1ff65d - - Remove constant tag from literal values (#3251) - - * Remove constant tag from literal values - - * [skip-revcheck] Fix whitespace - -diff --git a/appendices/filters.xml b/appendices/filters.xml -index 59a4735de1..06e0a7276e 100644 ---- a/appendices/filters.xml -+++ b/appendices/filters.xml -@@ -302,7 +302,7 @@ fclose($fp); - window is the base-2 log of the compression loopback window size. - Higher values (up to 15 -- 32768 bytes) yield better compression at a cost of memory, - while lower values (down to 9 -- 512 bytes) yield worse compression in a smaller memory footprint. -- Default window size is currently 15. -+ Default window size is currently 15. - - memory is a scale indicating how much work memory should be allocated. - Valid values range from 1 (minimal allocation) to 9 (maximum allocation). This memory allocation -diff --git a/install/fpm/configuration.xml b/install/fpm/configuration.xml -index 9baaf43d6f..a34700ef97 100644 ---- a/install/fpm/configuration.xml -+++ b/install/fpm/configuration.xml -@@ -805,109 +805,109 @@ - - - -- %C -+ %C - - %CPU - - -``` - -This commit must be tracked in translations? In other words, this commit -should mark the various files changed as outdated in translations? - -The current implementation on doc-base/revcheck.php would *ignore* this -commit, *not* marking these files as outdated on translations. - -This is because the code searches for '[skip-revcheck]' in any position [1], -and in any lile [2] of commit messages. - -[1] https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L304 -[2] https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L302 - -The problem, here, is that this commit on doc-en was squashed, and by so, all -individual commit messages are concatenated in one commit message. - -``` - Remove constant tag from literal values (#3251) - * Remove constant tag from literal values - * [skip-revcheck] Fix whitespace -``` - -The solution proposed is to check for '[skip-revcheck]' mark only at the -starting of the first line on commit messages, so future squashed commits -do not cause files modifications being untracked on translations. - -TEXT; } - - - -if ( FIXED_SKIP_REVCHECK ) { $description = <<<'TEXT' - -Consider the output of: git log --oneline -- reference/ds/ds.deque.xml -``` -4d17 [skip-revcheck] Convert class markup to be compatible with DocBook 5.2 -6cec [skip-revcheck] Normalize &Constants; and &Methods; usage (#2703) -b2a2 These should include the namesapce -120c Document ArrayAccess in PHP-DS -``` - -The last two commits, each one, will mark all their included files as old -in translations, as the commit message does not contain '[skip-revcheck]'. - -The commit 6cec, marked [skip-revcheck], will not mark any file as outdated. - -The commit 4d17, marked [skip-revcheck], will mark all its files as outdated, -needing to be updated to 6cec. - -See the difference in behaviour between two individual commits marked -'[skip-revcheck]'? -That 6cec commit is also marked '[skip-revcheck]' is -incidental. This discrepancy occurs in any sequence of commits -marked '[skip-revcheck]'. - -When the revcheck code, as now, detects that the topmost commit hash contains an -'[skip-revcheck]', it ignores this topmost commit hash, and then selects the fixed -'-2' commit hash as a base of comparison, when the file is calculated as old. - -See: -- Oldness test: https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L362 -- Topmost skip: https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L380 -- Hash -2:      https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L384 - -The output of -2 test, as now, is: -``` -4d17b7b4947e7819ff5036715dd706be87ae4def -6ceccac7860f382f16ac1407baf54f656e85ca0b -``` - -The code linked above splits the results on the new line, and compares the revtag -hash against the second line, 6cec in this case. But 6cec is itself marked as an -'[skip-revcheck]'. So an [skip-revcheck] is bumping all its file hashes into -another [skip-revcheck] commit hash... - -The proposed solution is to removing the use of the fixed -2 topmost hash when -the topmost hash is marked [skip-revcheck] into ignoring any topmost commit -hash marked [skip-revcheck], and thus selecting as an alternative comparison -hash the first topmost hash not marked as [skip-revcheck]. - -In this case, b2a2. - -So any future sequence of [skip-revcheck] commits does not cause the bumping -of hashes in all translations in the presence of a sequence of [skip-revcheck] -commits. - -TEXT; } \ No newline at end of file +require_once __DIR__ . '/XmlUtil.php'; \ No newline at end of file diff --git a/scripts/translation/lib/backport.php b/scripts/translation/lib/backport.php new file mode 100644 index 000000000..f857204f6 --- /dev/null +++ b/scripts/translation/lib/backport.php @@ -0,0 +1,168 @@ +window is the base-2 log of the compression loopback window size. +Higher values (up to 15 -- 32768 bytes) yield better compression at a cost of memory, +while lower values (down to 9 -- 512 bytes) yield worse compression in a smaller memory footprint. +- Default window size is currently 15. ++ Default window size is currently 15. + +memory is a scale indicating how much work memory should be allocated. +Valid values range from 1 (minimal allocation) to 9 (maximum allocation). This memory allocation +diff --git a/install/fpm/configuration.xml b/install/fpm/configuration.xml +index 9baaf43d6f..a34700ef97 100644 +--- a/install/fpm/configuration.xml ++++ b/install/fpm/configuration.xml +@@ -805,109 +805,109 @@ + + + +- %C ++ %C + +%CPU + + +``` + +This commit must be tracked in translations? In other words, this commit +should mark the various files changed as outdated in translations? + +The current implementation on doc-base/revcheck.php would *ignore* this +commit, *not* marking these files as outdated on translations. + +This is because the code searches for '[skip-revcheck]' in any position [1], +and in any lile [2] of commit messages. + +[1] https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L304 +[2] https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L302 + +The problem, here, is that this commit on doc-en was squashed, and by so, all +individual commit messages are concatenated in one commit message. + +``` +Remove constant tag from literal values (#3251) +* Remove constant tag from literal values +* [skip-revcheck] Fix whitespace +``` + +The solution proposed is to check for '[skip-revcheck]' mark only at the +starting of the first line on commit messages, so future squashed commits +do not cause file modifications being untracked on translations. + +After code deduplication, open an issue to consider having an +*strick* [skip-revcheck] mode, avoids the issue above, +by removing any mentions of LOOSE_SKIP_REVCHECK constante. */ + +assert( LOOSE_SKIP_REVCHECK || ! LOOSE_SKIP_REVCHECK ); + +/* # FIXED_SKIP_REVCHECK + +Consider the output of: git log --oneline -- reference/ds/ds.deque.xml +``` +4d17 [skip-revcheck] Convert class markup to be compatible with DocBook 5.2 +6cec [skip-revcheck] Normalize &Constants; and &Methods; usage (#2703) +b2a2 These should include the namesapce +120c Document ArrayAccess in PHP-DS +``` + +The last two commits, each one, will mark all their included files as old +in translations, as the commit message does not contain '[skip-revcheck]'. + +The commit 6cec, marked [skip-revcheck], will not mark any file as outdated. + +The commit 4d17, marked [skip-revcheck], will mark all its files as outdated, +needing to be updated to 6cec. + +See the difference in behaviour between two individual commits marked +'[skip-revcheck]'? +That 6cec commit is also marked '[skip-revcheck]' is +incidental. This discrepancy occurs in any sequence of commits +marked '[skip-revcheck]'. + +When the revcheck code, as now, detects that the topmost commit hash contains an +'[skip-revcheck]', it ignores this topmost commit hash, and then selects the fixed +'-2' commit hash as a base of comparison, when the file is calculated as old. + +See: +- Oldness test: https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L362 +- Topmost skip: https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L380 +- Hash -2:      https://github.com/php/doc-base/blob/84532c32eb7b6d694df6cbee3622cec624709654/scripts/revcheck.php#L384 + +The output of -2 test, as now, is: +``` +4d17b7b4947e7819ff5036715dd706be87ae4def +6ceccac7860f382f16ac1407baf54f656e85ca0b +``` + +The code linked above splits the results on the new line, and compares the revtag +hash against the second line, 6cec in this case. But 6cec is itself marked as an +'[skip-revcheck]'. So an [skip-revcheck] is bumping all its file hashes into +another [skip-revcheck] commit hash... + +The proposed solution is to removing the use of the fixed -2 topmost hash when +the topmost hash is marked [skip-revcheck] into ignoring any topmost commit +hash marked [skip-revcheck], and thus selecting as an alternative comparison +hash the first topmost hash not marked as [skip-revcheck]. + +In this case, b2a2. + +So any future sequence of [skip-revcheck] commits does not cause the bumping +of hashes in all translations in the presence of a sequence of [skip-revcheck] +commits. + +After code deduplication, open an issue to consider having an +multi skipping [skip-revcheck] mode, avoids the issue above, +by removing any mentions of FIXED_SKIP_REVCHECK constante. */ + +assert( FIXED_SKIP_REVCHECK || ! FIXED_SKIP_REVCHECK ); \ No newline at end of file From 44054355e69faf9315e1c6961dd3cfcd7fa1644d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Tue, 5 Nov 2024 15:12:38 -0300 Subject: [PATCH 09/22] Revcheck deduplication: backport couting behaviour. --- scripts/revcheck.php | 27 +++++++++++++++++------- scripts/translation/lib/RevcheckData.php | 6 +++--- scripts/translation/lib/RevcheckRun.php | 15 ++++++++----- scripts/translation/lib/backport.php | 5 ++++- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/scripts/revcheck.php b/scripts/revcheck.php index fb6d6121e..7df233ac8 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -47,8 +47,9 @@ $lang = $argv[1]; fwrite( STDERR , "TODO\n" ); // FAST -//$data = new RevcheckRun( 'en' , $argv[1] ); +//$data = new RevcheckRun( 'en' , $argv[1] )->revData; +//TODO remove start fwrite( STDERR , "TODO\n" ); // FAST if ( ! file_exists( "FAST" ) ) { @@ -57,7 +58,9 @@ } $data = unserialize( file_get_contents ( "FAST" ) ); $data = $data->revData; +//TODO remove end +//print_debug_list( $data ); die(); print_html_all( $data ); // Output @@ -145,9 +148,9 @@ function print_html_translators( RevcheckData $data ) Files maintained - upd + upto-
date old - wip + wip sum HTML; @@ -158,7 +161,7 @@ function print_html_translators( RevcheckData $data ) if ( $person->name == "" && $person->email == "" && $person->vcs == "" ) continue; - $personSum = $person->filesUpdate + $person->filesOld + $person->filesWip; + $personSum = $person->countOk + $person->countOld + $person->countOther; print << @@ -166,9 +169,9 @@ function print_html_translators( RevcheckData $data ) {$person->email} {$person->nick} {$person->vcs} - {$person->filesUpdate} - {$person->filesOld} - {$person->filesWip} + {$person->countOk} + {$person->countOld} + {$person->countOther} {$personSum} HTML; @@ -254,7 +257,6 @@ function print_html_oldwip( RevcheckData $data ) $now = new DateTime( 'now' ); $path = null; - asort( $data->fileDetail ); foreach( $data->fileDetail as $key => $file ) { @@ -450,3 +452,12 @@ function print_html_notinen() print "

"; } } + + + +function print_debug_list( RevcheckData $data ) +{ + foreach( $data->fileDetail as $key => $file ) + print "f:$key m:{$file->maintainer} s:{$file->status->value}\n"; + die(); +} \ No newline at end of file diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php index 6a775c840..63bd6217f 100644 --- a/scripts/translation/lib/RevcheckData.php +++ b/scripts/translation/lib/RevcheckData.php @@ -71,9 +71,9 @@ class RevcheckDataTranslator public string $nick = ""; public string $vcs = ""; - public int $filesUpdate = 0; - public int $filesOld = 0; - public int $filesWip = 0; + public int $countOk = 0; + public int $countOld = 0; + public int $countOther = 0; } class RevcheckDataFile diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index 3c5bda9db..1e87b6829 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -155,7 +155,7 @@ private function calculateStatus() { $target->status = RevcheckStatus::NotInEnTree; $this->filesNotInEn[] = $target; - $this->addData( $target ); + $this->addData( $target , $target->revtag ); } } } @@ -185,13 +185,18 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul switch( $info->status ) { case RevcheckStatus::TranslatedOk: - $translator->filesUpdate++; + $translator->countOk++; break; case RevcheckStatus::TranslatedOld: - $translator->filesOld++; + $translator->countOld++; break; - default: - $translator->filesWip++; +// default: // STATUS_COUNT_MISMATCH correct +// $translator->countOther++; + case RevcheckStatus::NotInEnTree: // STATUS_COUNT_MISMATCH backported behaviour + break; + default: // STATUS_COUNT_MISMATCH backported behaviour + if ( $revtag->status != "ready" ); + $translator->countOther++; break; } } diff --git a/scripts/translation/lib/backport.php b/scripts/translation/lib/backport.php index f857204f6..3dc38de53 100644 --- a/scripts/translation/lib/backport.php +++ b/scripts/translation/lib/backport.php @@ -28,7 +28,10 @@ To make the mismatch smaller, the "wip" column in Translators was changed to "misc", and so any status other than "ok" and "old" -was added here. */ +was added here. + +Also, NotInEnTree is missing on first case, and files +in this situation goes uncounted... */ assert( STATUS_COUNT_MISMATCH || ! STATUS_COUNT_MISMATCH ); From 31cf0db496fc971d924c108d2928a0ff6226cdf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Tue, 5 Nov 2024 16:02:50 -0300 Subject: [PATCH 10/22] Revcheck deduplication: more backport couting woes. --- scripts/translation/lib/RevcheckRun.php | 25 ++++++++++++++++--------- scripts/translation/lib/backport.php | 5 ++++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index 1e87b6829..1d11b127c 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -90,7 +90,7 @@ private function calculateStatus() { $source->status = RevcheckStatus::RevTagProblem; $this->filesRevtagProblem[] = $source; - $this->addData( $source , null ); + $this->addData( $source , $target->revtag ); continue; } @@ -184,20 +184,27 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul switch( $info->status ) { - case RevcheckStatus::TranslatedOk: + case RevcheckStatus::TranslatedOk: // ready and synced $translator->countOk++; break; - case RevcheckStatus::TranslatedOld: + case RevcheckStatus::TranslatedOld: // ready and outdated $translator->countOld++; break; -// default: // STATUS_COUNT_MISMATCH correct -// $translator->countOther++; - case RevcheckStatus::NotInEnTree: // STATUS_COUNT_MISMATCH backported behaviour + // STATUS_COUNT_MISMATCH count correct + // default: // all other cases + // $translator->countOther++; + + // STATUS_COUNT_MISMATCH backported behaviour + case RevcheckStatus::RevTagProblem: // STATUS_COUNT_MISMATCH backported behaviour + $translator->countOld++; // RevTagProblem into Old (generated diff link fails) break; - default: // STATUS_COUNT_MISMATCH backported behaviour - if ( $revtag->status != "ready" ); - $translator->countOther++; + case RevcheckStatus::NotInEnTree: // STATUS_COUNT_MISMATCH backported behaviour + break; // Not counted, but files are listed anyways... + default: + if ( $revtag->status != "ready" ); // STATUS_COUNT_MISMATCH backported behaviour + $translator->countOther++; // The exception of all cases, and also not ready. break; + // STATUS_COUNT_MISMATCH backported behaviour } } } diff --git a/scripts/translation/lib/backport.php b/scripts/translation/lib/backport.php index 3dc38de53..4ada320b6 100644 --- a/scripts/translation/lib/backport.php +++ b/scripts/translation/lib/backport.php @@ -31,7 +31,10 @@ was added here. Also, NotInEnTree is missing on first case, and files -in this situation goes uncounted... */ +in this situation goes uncounted... + +Also, RevTagProblem is counted towards as Old, but it +is impossible to generate diffs with invalid hashes... */ assert( STATUS_COUNT_MISMATCH || ! STATUS_COUNT_MISMATCH ); From 324f48c3f65f64bf628b84163fdd6cda3b02085c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Tue, 5 Nov 2024 17:52:57 -0300 Subject: [PATCH 11/22] Revcheck deduplication: notinen, revtag, unstranslated. --- scripts/revcheck.php | 183 ++++++++++++------------ scripts/translation/lib/RevcheckRun.php | 2 + 2 files changed, 95 insertions(+), 90 deletions(-) diff --git a/scripts/revcheck.php b/scripts/revcheck.php index 7df233ac8..df142105f 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -70,10 +70,10 @@ function print_html_all( RevcheckData $data ) print_html_header( $data ); print_html_translators( $data ); print_html_oldwip( $data ); - //print_html_notinen(); - //print_html_misstags( $enFiles, $trFiles, $lang ); - //print_html_untranslated( $enFiles ); - //print_html_footer(); + print_html_notinen( $data ); + print_html_revtag( $data ); + print_html_untranslated( $data ); + print_html_footer(); } function print_html_header( RevcheckData $data ) @@ -121,7 +121,7 @@ function print_html_menu( string $href ) | File summary | Outdated Files | Not in EN tree -| Missing revision numbers +| Missing or invalid revtag | Untranslated files

HTML; @@ -232,7 +232,6 @@ function print_html_oldwip( RevcheckData $data ) $total = $data->fileSummary[ RevcheckStatus::TranslatedOld->value ]; $total += $data->fileSummary[ RevcheckStatus::TranslatedWip->value ]; - $total += $data->fileSummary[ RevcheckStatus::RevTagProblem->value ]; if ( $total == 0 ) { print "

Hooray! There is no files to update, nice work!

\n\n"; @@ -264,7 +263,6 @@ function print_html_oldwip( RevcheckData $data ) { case RevcheckStatus::TranslatedOld: case RevcheckStatus::TranslatedWip: - case RevcheckStatus::RevTagProblem: break; default: continue 2; @@ -285,8 +283,6 @@ function print_html_oldwip( RevcheckData $data ) $d2 = "https://doc.php.net/revcheck.php?p=plain&lang={$ll}&hbp={$file->hashRvtg}&f=$key&c=on"; $nm = "{$file->name} [colored]"; - if ( $file->status == RevcheckStatus::RevTagProblem ) // $file->hashRvtg empty or invalid - $nm = $file->name; $h1 = "{$file->hashLast}"; $h2 = "{$file->hashRvtg}"; @@ -317,86 +313,129 @@ function print_html_oldwip( RevcheckData $data ) print "

\n\n"; } -function print_html_misstags( $enFiles, $trFiles, $lang ) +function print_html_notinen( RevcheckData $data ) { - print_html_menu("misstags"); + print_html_menu("notinen"); - GLOBAL $files_misstags; - if ($files_misstags == 0) + if ( $data->fileSummary[ RevcheckStatus::TranslatedWip->value ] == 0 ) { - echo '

Good, all files contain revision numbers.

'; - } else { + print "

Hooray! There is no files to update, nice work!

\n\n"; + return; + } + + print << + + Files which is not present in source tree + Size kB + +HTML; + $header = null; + foreach ( $data->fileDetail as $file ) + { + if ( $file->status != RevcheckStatus::NotInEnTree ) + continue; + + if ( $header !== $file->path ) + { + $header = $file->path; + print " /$header"; + } + + $name = $file->name; + $size = round( $file->size / 1024 ); + print << + {$name} + {$size} + +HTML; + } + + print "

\n\n"; +} + +function print_html_revtag( RevcheckData $data ) +{ + print_html_menu("revtag"); + if ( $data->fileSummary[ RevcheckStatus::RevTagProblem->value ] == 0 ) + { + echo "

Good, all files contain valid revtags.

\n\n"; + return; + } + + echo << - Files without EN-Revision number ($files_misstags files) - Commit hash - Sizes in kB + Files with invalid or missing revision tags + Size kB -en$langdiff HTML; - $last_path = null; - asort($trFiles); - foreach ($trFiles as $key => $tr) + $last_path = null; + foreach ( $data->fileDetail as $file ) + { + if ( $file->status != RevcheckStatus::RevTagProblem ) + continue; + + if ( $last_path != $file->path ) { - if ( $tr->syncStatus != FileStatusEnum::RevTagProblem ) - continue; - - $en = $enFiles[ $key ]; - - if ( $last_path != $tr->path ) - { - $path = $tr->path == '' ? '/' : $tr->path; - echo "$path"; - $last_path = $tr->path; - } - $diff = intval($en->size - $tr->size); - echo "{$tr->name}{$en->hash}{$en->size}{$tr->size}$diff"; + $path = $file->path == '' ? '/' : $file->path; + echo "$path"; + $last_path = $file->path; } - echo ''; + $size = round( $file->size / 1024 ); + echo "{$file->name}{$size}"; } + echo ''; } -function print_html_untranslated($enFiles) +function print_html_untranslated( RevcheckData $data ) { - global $files_untranslated; - $exists = false; - if (!$files_untranslated) return; print_html_menu("untranslated"); + if ( $data->fileSummary[ RevcheckStatus::Untranslated->value ] == 0 ) + { + echo "

No file left untranslated!

\n\n"; + return; + } + print << - Untranslated files ($files_untranslated files): - Commit hash + Untranslated files + Last hash kb HTML; $path = null; - asort($enFiles); - foreach( $enFiles as $key => $en ) + foreach ( $data->fileDetail as $key => $file ) { - if ( $en->syncStatus != FileStatusEnum::Untranslated ) + if ( $file->status != RevcheckStatus::Untranslated ) continue; - if ( $path !== $en->path ) + + if ( $path !== $file->path ) { - $path = $en->path; - $path2 = $path == '' ? '/' : $path; - print " $path2"; + $path = $file->path; + $header = $path == '' ? '/' : $path; + print " /$header"; } - $size = $en->size < 1024 ? 1 : floor( $en->size / 1024 ); - print <<name; + $hash = $file->hashLast; + $href = "https://github.com/php/doc-en/blob/{$hash}/$key"; + $size = round( $file->size / 1024 ); + print << - $en->name - $en->hash + $name + $hash $size HTML; } - print "\n"; + print "\n\n"; } function print_html_footer() @@ -418,42 +457,6 @@ function print_html_footer() HTML; } -function print_html_notinen() -{ - global $oldfiles, $notinen_count; - print_html_menu("notinen"); - $exists = false; - if (!$notinen_count) - { - print "

Good, it seems that this translation doesn't contain any file which is not present in English tree.

\n"; - } else { - print << - - Files which is not present in English tree. ($notinen_count files) - Size in kB - -HTML; - $path = null; - foreach( $oldfiles as $key => $en ) - { - if ( $path !== $en->path ) - { - $path = $en->path; - print " /$path"; - } - print << - $en->name - $en->size - -HTML; - } -print "

"; - } -} - - function print_debug_list( RevcheckData $data ) { diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index 1d11b127c..79e384b3c 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -158,6 +158,8 @@ private function calculateStatus() $this->addData( $target , $target->revtag ); } } + + asort( $this->revData->fileDetail ); } private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = null ) : void From 60b5f1e6f5090bdbcad89cd830cb7b5fc7dd55e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Wed, 6 Nov 2024 14:00:51 -0300 Subject: [PATCH 12/22] Revcheck deduplication: ignore, regex, xml. --- scripts/translation/lib/RevcheckIgnore.php | 15 +++++++------ scripts/translation/lib/RevcheckRun.php | 6 ++--- scripts/translation/lib/RevtagParser.php | 6 ++--- scripts/translation/lib/backport.php | 26 ++++++++++++++++++---- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/scripts/translation/lib/RevcheckIgnore.php b/scripts/translation/lib/RevcheckIgnore.php index a2a36d026..f9dc57089 100644 --- a/scripts/translation/lib/RevcheckIgnore.php +++ b/scripts/translation/lib/RevcheckIgnore.php @@ -42,7 +42,7 @@ public static function ignore( $filename ) : bool if ( str_contains( $filename , "/versions.xml" ) ) return true; - // Only in English + // Only in English, autogenerated, marked not translatable if ( $filename == "contributors.ent" ) return true; @@ -50,18 +50,19 @@ public static function ignore( $filename ) : bool return true; if ( $filename == "appendices/license.xml" ) return true; - if ( $filename == "appendices/license.xml" ) - return true; if ( $filename == "appendices/extensions.xml" ) return true; if ( $filename == "appendices/reserved.constants.xml" ) return true; if ( $filename == "reference/datetime/timezones.xml" ) return true; - if ( str_starts_with( $filename , 'chmonly/') ) - return true; - if ( str_ends_with( $filename , '/book.developer.xml') ) - return true; + + if ( IGNORE_EXTENSIONS_XML ) + if ( str_ends_with( $filename , '/extension.xml') ) // wrong + return true; + if ( IGNORE_CHMONLY_DIR ) + if ( str_starts_with( $filename , 'chmonly/') ) // possibly wrong + return true; // Only in translations diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index 79e384b3c..4ba907b71 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -217,13 +217,13 @@ private function parseTranslationXml() : void $this->revData->lang = $this->targetDir; $this->revData->date = date("r"); - $xml = XmlUtil::loadFile( $this->targetDir . '/translation.xml' ); + $dom = XmlUtil::loadFile( $this->targetDir . '/translation.xml' ); $this->revData->intro = - $xml->getElementsByTagName( 'intro' )[0]->textContent + $dom->saveXML( $dom->getElementsByTagName( 'intro' )[0] ) ?? "No intro available for the {$lang} translation of the manual."; - $persons = $xml->getElementsByTagName( 'person' ); + $persons = $dom->getElementsByTagName( 'person' ); foreach( $persons as $person ) { $nick = $person->getAttribute( 'nick' ); diff --git a/scripts/translation/lib/RevtagParser.php b/scripts/translation/lib/RevtagParser.php index a04db2fd1..cae5eada0 100644 --- a/scripts/translation/lib/RevtagParser.php +++ b/scripts/translation/lib/RevtagParser.php @@ -67,11 +67,11 @@ public static function parseComment( DOMNode $node , RevtagInfo $ret , $filename if ( str_starts_with( $text , "EN-" ) ) { - // /EN-Revision:\s*(\S+)\s*Maintainer:\s*(\S+)\s*Status:\s*(\S+)/ // restrict maintainer without spaces - // /EN-Revision:\s*(\S+)\s*Maintainer:\s(.*?)\sStatus:\s*(\S+)/ // accepts maintainer with spaces + // /EN-Revision:\s*(\S+)\s*Maintainer:\s*(\S+)\s*Status:\s*(\S+)/ // restrict maintainer without spaces + // /EN-Revision:\s*(\S+)\s*Maintainer:\s(.*?)\sStatus:\s*(\S+)/ // accepts maintainer with spaces $match = array(); - $regex = "/EN-Revision:\s*(\S+)\s*Maintainer:\s*(\S+)\s*Status:\s*(\S+)/"; + $regex = "/EN-Revision:\s*(\S+)\s*Maintainer:\s(.*?)\sStatus:\s*(\S+)/"; if ( preg_match( $regex , $text , $match ) ) { $ret->revision = trim( $match[1] ); diff --git a/scripts/translation/lib/backport.php b/scripts/translation/lib/backport.php index 4ada320b6..e684eda04 100644 --- a/scripts/translation/lib/backport.php +++ b/scripts/translation/lib/backport.php @@ -9,12 +9,30 @@ // Constants +const IGNORE_CHMONLY_DIR = true; +const IGNORE_EXTENSIONS_XML = true; const STATUS_COUNT_MISMATCH = true; const LOOSE_SKIP_REVCHECK = true; const FIXED_SKIP_REVCHECK = true; // Documentation +/* # IGNORE_CHMONLY_DIR + +The chmonly/ dir contains files that appears to be +translatable. See recent efforts to re-enabling +CHM build: https://github.com/php/doc-base/pull/163 +*/ + +/* # IGNORE_EXTENSIONS_XML + +The actual revcheck ignores any files called extensions.xml, +but are at least two of translated files with this name. + +- appendices/migration56/extensions.xml +- install/windows/legacy/extensions.xml +*/ + /* # STATUS_COUNT_MISMATCH The actual revcheck counts "outdated" files in two different ways; @@ -31,14 +49,14 @@ was added here. Also, NotInEnTree is missing on first case, and files -in this situation goes uncounted... +in this situation goes uncounted. -Also, RevTagProblem is counted towards as Old, but it -is impossible to generate diffs with invalid hashes... */ +Also, RevTagProblem is counted towards as Old, but files +are show in revtag missing/problem list, and is +impossible to generate diffs with invalid hashes... */ assert( STATUS_COUNT_MISMATCH || ! STATUS_COUNT_MISMATCH ); - /* # LOOSE_SKIP_REVCHECK Consider the output of: git show f80105b4fc1196bd8d5fecb98d686b580b1ff65d From a0ddca658b801d4023921e9be461672d69b2d2f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Wed, 6 Nov 2024 14:53:59 -0300 Subject: [PATCH 13/22] Revcheck deduplication: adds/dels, clean up. --- scripts/revcheck.php | 27 +++++------------------ scripts/translation/lib/GitDiffParser.php | 26 ++++++++++++++++++++-- scripts/translation/lib/RevcheckRun.php | 13 +++++++---- scripts/translation/lib/all.php | 2 +- 4 files changed, 40 insertions(+), 28 deletions(-) diff --git a/scripts/revcheck.php b/scripts/revcheck.php index df142105f..495acf7c4 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -42,25 +42,10 @@ exit; } -fwrite( STDERR , "TODO\n" ); // notinen -fwrite( STDERR , "TODO\n" ); // source|targetDir -> Lang - $lang = $argv[1]; -fwrite( STDERR , "TODO\n" ); // FAST -//$data = new RevcheckRun( 'en' , $argv[1] )->revData; +$revc = new RevcheckRun( 'en' , $argv[1] ); +$data = $revc->revData; -//TODO remove start -fwrite( STDERR , "TODO\n" ); // FAST -if ( ! file_exists( "FAST" ) ) -{ - $data = new RevcheckRun( 'en' , $argv[1] ); - file_put_contents( "FAST" , serialize( $data ) ); -} -$data = unserialize( file_get_contents ( "FAST" ) ); -$data = $data->revData; -//TODO remove end - -//print_debug_list( $data ); die(); print_html_all( $data ); // Output @@ -250,7 +235,7 @@ function print_html_oldwip( RevcheckData $data ) en - {$data->lang}} + {$data->lang} \n HTML; @@ -319,7 +304,7 @@ function print_html_notinen( RevcheckData $data ) if ( $data->fileSummary[ RevcheckStatus::TranslatedWip->value ] == 0 ) { - print "

Hooray! There is no files to update, nice work!

\n\n"; + print "

Good, it seems that this translation doesn't contain any file which is not present in source tree.

\n\n"; return; } @@ -339,7 +324,7 @@ function print_html_notinen( RevcheckData $data ) if ( $header !== $file->path ) { $header = $file->path; - print " /$header"; + print " $header"; } $name = $file->name; @@ -419,7 +404,7 @@ function print_html_untranslated( RevcheckData $data ) { $path = $file->path; $header = $path == '' ? '/' : $path; - print " /$header"; + print " $header"; } $name = $file->name; diff --git a/scripts/translation/lib/GitDiffParser.php b/scripts/translation/lib/GitDiffParser.php index 9df98ac07..169c13c16 100644 --- a/scripts/translation/lib/GitDiffParser.php +++ b/scripts/translation/lib/GitDiffParser.php @@ -21,6 +21,28 @@ class GitDiffParser { - public static function parseNumstatInto( string $dir , RevcheckFileInfo $file ) - {} + public static function parseAddsDels( string $chdir , RevcheckDataFile $file ) + { + $cwd = getcwd(); + chdir( $chdir ); + + $hash = $file->hashRvtg; + $name = $file->path == "" ? $file->name : $file->path . "/" . $file->name; + + $hash = escapeshellarg( $hash ); + $name = escapeshellarg( $name ); + + $output = `git diff --numstat $hash -- $name`; + if ( $output ) + { + preg_match( '/(\d+)\s+(\d+)/' , $output , $matches ); + if ( $matches ) + { + $file->adds = $matches[1]; + $file->dels = $matches[2]; + } + } + + chdir( $cwd ); + } } diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index 4ba907b71..120e10f49 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -121,8 +121,6 @@ private function calculateStatus() // TranslatedOld // TranslatedWip - GitDiffParser::parseNumstatInto( $this->sourceDir , $source ); - if ( $target->revtag->status == "ready" ) { if ( FIXED_SKIP_REVCHECK && $source->diff == "skip" && TestFixedHashMinusTwo( $source->file , $targetHash ) ) @@ -184,7 +182,7 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul $translator = $this->revData->getTranslator( $revtag->maintainer ); - switch( $info->status ) + switch( $info->status ) // counts { case RevcheckStatus::TranslatedOk: // ready and synced $translator->countOk++; @@ -208,6 +206,13 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul break; // STATUS_COUNT_MISMATCH backported behaviour } + + switch( $info->status ) // adds,dels + { + case RevcheckStatus::TranslatedOld: + case RevcheckStatus::TranslatedWip: + GitDiffParser::parseAddsDels( $this->sourceDir , $file ); + } } } @@ -252,4 +257,4 @@ function TestFixedHashMinusTwo($filename, $hash) :bool $hashes = explode ( "\n" , `git log -2 --format=%H -- {$filename}` ); chdir( $cwd ); return ( $hashes[1] == $hash ); // $trFile->hash -} \ No newline at end of file +} diff --git a/scripts/translation/lib/all.php b/scripts/translation/lib/all.php index e404a9a83..62ec3177d 100644 --- a/scripts/translation/lib/all.php +++ b/scripts/translation/lib/all.php @@ -35,4 +35,4 @@ require_once __DIR__ . '/RevcheckIgnore.php'; require_once __DIR__ . '/RevcheckRun.php'; require_once __DIR__ . '/RevtagParser.php'; -require_once __DIR__ . '/XmlUtil.php'; \ No newline at end of file +require_once __DIR__ . '/XmlUtil.php'; From 55bae33d8d646c840d5312e79532e8efe9869f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Thu, 7 Nov 2024 11:37:12 -0300 Subject: [PATCH 14/22] Revcheck deduplicatoin: genrevdb.php. --- scripts/translation/lib/RevcheckData.php | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php index 63bd6217f..84344b426 100644 --- a/scripts/translation/lib/RevcheckData.php +++ b/scripts/translation/lib/RevcheckData.php @@ -62,6 +62,17 @@ public function getTranslator( string $nick ) } return $translator; } + + public function getSummaryLabels() : array + { + $ret[ RevcheckStatus::TranslatedOk->value ] = "Up to date files"; + $ret[ RevcheckStatus::TranslatedOld->value ] = "Outdated files"; + $ret[ RevcheckStatus::TranslatedWip->value ] = "Work in progress"; + $ret[ RevcheckStatus::RevTagProblem->value ] = "Revision tag missing/problem"; + $ret[ RevcheckStatus::NotInEnTree->value ] = "Not in EN tree"; + $ret[ RevcheckStatus::Untranslated->value ] = "Available for translation"; + return $ret; + } } class RevcheckDataTranslator From 1ab908cbd95a8866bb5f419220b51c762014caab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Thu, 7 Nov 2024 12:13:36 -0300 Subject: [PATCH 15/22] Revcheck deduplicatoin: genrevdb.php. --- scripts/translation/genrevdb.php | 250 +++++++++++++++++++++++++++++++ 1 file changed, 250 insertions(+) create mode 100644 scripts/translation/genrevdb.php diff --git a/scripts/translation/genrevdb.php b/scripts/translation/genrevdb.php new file mode 100644 index 000000000..c56811197 --- /dev/null +++ b/scripts/translation/genrevdb.php @@ -0,0 +1,250 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Check format for revtags and credits on XML comments. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/lib/all.php'; + +if ( count( $argv ) < 3 || in_array( '--help' , $argv ) || in_array( '-h' , $argv ) ) +{ + fwrite( STDERR , "\nUsage: {$argv[0]} [file.db] [lang1,langN]\n\n" ); + return; +} + +$timeStart = new \DateTime; +$dbpath = $argv[1]; +$langs = explode( ',' , $argv[2] ); + +consolelog( "Creating revdata database $dbpath for languages $argv[2]." ); + +$db = db_create( $dbpath ); +foreach( $langs as $lang ) + generate( $db , $lang ); + +consolelog( "Revdata database $dbpath complete." ); +exit; + + + +function generate( SQLite3 $db , string $lang ) +{ + $cwd = getcwd(); + if ( ! is_dir( $lang ) ) + { + consolelog( "Error: '$cwd/$lang' doesn't exist. Skipped." ); + return; + } + if ( ! is_file( "$lang/translation.xml" ) ) + { + consolelog( "Error: '$cwd/$lang' doesn't contains translation.xml. Skipped." ); + return; + } + + try + { + consolelog( "Language $lang run" ); + + $revcheck = new RevcheckRun( 'en' , $lang ); + $data = $revcheck->revData; + + $db->exec( 'BEGIN TRANSACTION' ); + + db_insert( $db , "languages" , $data->lang , $data->intro ); + + foreach( $data->translators as $translator ) + if ( $translator->nick != "" ) + db_insert( $db , "translators", $data->lang + , $translator->name + , $translator->nick + , $translator->email + , $translator->vcs + , $translator->countOk + , $translator->countOld + , $translator->countOther + ); + + foreach( $data->fileDetail as $file ) + if ( $translator->nick != "" ) + db_insert( $db , "files", $data->lang + , $file->path + , $file->name + , $file->size + , $file->days + , $file->adds + , $file->dels + , $file->status->value + , $file->maintainer + , $file->completion + , $file->hashLast + , $file->hashDiff + , $file->hashRvtg + ); + + $filesTotal = 0; + foreach( $data->fileSummary as $count ) + $filesTotal += $count; + $labels = $data->getSummaryLabels(); + foreach( $data->fileSummary as $status => $count ) + db_insert( $db , "summary", $data->lang + , $status + , $labels[ $status ] + , $count + , number_format( $count / $filesTotal * 100 , 2 ) . "%" + ); + + $db->exec( 'COMMIT TRANSACTION' ); + consolelog_timed( "Language $lang done" ); + } + catch ( Exception $e ) + { + $db->exec( 'ROLLBACK TRANSACTION' ); + consolelog( "Throw: " . $e->getMessage() ); + exit; + } +} + +function db_insert( SQLite3 $db , string $table , ... $values ) : void +{ + $dml = "INSERT INTO $table VALUES ("; + $cmm = ""; + foreach( $values as $v ) + { + $dml .= "$cmm?"; + $cmm = ","; + } + $dml .= ");\n"; + + $cmd = $db->prepare( $dml ); + if ( ! $cmd ) + { + consolelog_error( "Error: Prepare failed." , $db ); + throw new \Exception; + } + + $idx = 0; + foreach( $values as $val ) + { + $idx++; + $cmd->bindValue( $idx , $val ); + } + + $sql = $cmd->getSQL( true ); + + $res = $cmd->execute(); + if ( ! $res ) + { + consolelog_error( "Error: '$sql'" , $db ); + throw new \Exception; + } +} + +function db_create( $path ) : SQLite3 +{ + if ( is_file ( $path ) ) + { + consolelog( "Previous database file found, deleting." ); + if ( ! @ unlink ( $path ) ) + { + consolelog( "Error: Can't remove temporary database." ); + exit; + } + } + + $ddl = <<exec( $ddl ) ) + { + consolelog_error( "Error: Database creation failed." , $db ); + exit; + } + return $db; + } + catch ( Exception $e ) + { + consolelog( "Throw: " . $e->getMessage() ); + exit; + } +} + +function consolelog( $message ) : void +{ + $time = (new \DateTime())->format('Y-m-d H:i'); + echo "[$time] $message\n"; +} + +function consolelog_timed( $message ) : void +{ + static $lastMark = null; + if ( $lastMark == null ) + { + global $timeStart; + $lastMark = $timeStart; + } + $seconds = (new \DateTime)->getTimestamp() - $lastMark->getTimestamp(); + $lastMark = new \DateTime; + $time = $lastMark->format('Y-m-d H:i'); + echo sprintf( "[%s] %s (elapsed %.02fs)\n", $time, $message, $seconds ); +} + +function consolelog_error( string $message , SQLite3 $db ) : void +{ + consolelog( $message ); + consolelog( 'SQLite3: (' . $db->lastErrorCode() . ') ' . $db->lastErrorMsg() ); +} From a08561f08c70ee4f8ea9bd16e7146e8fd11d1f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Thu, 7 Nov 2024 16:00:33 -0300 Subject: [PATCH 16/22] More backport and small fixes. --- scripts/revcheck.php | 2 +- scripts/translation/genrevdb.php | 2 +- scripts/translation/lib/RevcheckData.php | 2 +- scripts/translation/lib/RevcheckIgnore.php | 4 ++-- scripts/translation/lib/backport.php | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/revcheck.php b/scripts/revcheck.php index 495acf7c4..c091775a2 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -302,7 +302,7 @@ function print_html_notinen( RevcheckData $data ) { print_html_menu("notinen"); - if ( $data->fileSummary[ RevcheckStatus::TranslatedWip->value ] == 0 ) + if ( $data->fileSummary[ RevcheckStatus::NotInEnTree->value ] == 0 ) { print "

Good, it seems that this translation doesn't contain any file which is not present in source tree.

\n\n"; return; diff --git a/scripts/translation/genrevdb.php b/scripts/translation/genrevdb.php index c56811197..765d5af7f 100644 --- a/scripts/translation/genrevdb.php +++ b/scripts/translation/genrevdb.php @@ -29,7 +29,7 @@ $dbpath = $argv[1]; $langs = explode( ',' , $argv[2] ); -consolelog( "Creating revdata database $dbpath for languages $argv[2]." ); +consolelog( "Creating revdata database $dbpath for languages: $argv[2]." ); $db = db_create( $dbpath ); foreach( $langs as $lang ) diff --git a/scripts/translation/lib/RevcheckData.php b/scripts/translation/lib/RevcheckData.php index 84344b426..d3e19b977 100644 --- a/scripts/translation/lib/RevcheckData.php +++ b/scripts/translation/lib/RevcheckData.php @@ -47,8 +47,8 @@ public function __construct() public function addFile( string $key , RevcheckDataFile $file ) { - $this->fileSummary[ $file->status->value ]++; $this->fileDetail[ $key ] = $file; + $this->fileSummary[ $file->status->value ]++; } public function getTranslator( string $nick ) diff --git a/scripts/translation/lib/RevcheckIgnore.php b/scripts/translation/lib/RevcheckIgnore.php index f9dc57089..bb37eabda 100644 --- a/scripts/translation/lib/RevcheckIgnore.php +++ b/scripts/translation/lib/RevcheckIgnore.php @@ -58,10 +58,10 @@ public static function ignore( $filename ) : bool return true; if ( IGNORE_EXTENSIONS_XML ) - if ( str_ends_with( $filename , '/extension.xml') ) // wrong + if ( str_ends_with( $filename , '/extensions.xml') ) // track/count backport return true; if ( IGNORE_CHMONLY_DIR ) - if ( str_starts_with( $filename , 'chmonly/') ) // possibly wrong + if ( str_starts_with( $filename , 'chmonly/') ) // track/count backport return true; // Only in translations diff --git a/scripts/translation/lib/backport.php b/scripts/translation/lib/backport.php index e684eda04..455b6e16e 100644 --- a/scripts/translation/lib/backport.php +++ b/scripts/translation/lib/backport.php @@ -27,10 +27,10 @@ /* # IGNORE_EXTENSIONS_XML The actual revcheck ignores any files called extensions.xml, -but are at least two of translated files with this name. +but are at least two of files with this name. -- appendices/migration56/extensions.xml -- install/windows/legacy/extensions.xml +- appendices/migration56/extensions.xml (translatable) +- install/windows/legacy/extensions.xml (deleted on en/, exists untracked on translations) */ /* # STATUS_COUNT_MISMATCH From d254cede8eac11bdd07a9bd3a68a003785490526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Thu, 7 Nov 2024 17:27:54 -0300 Subject: [PATCH 17/22] Review changes/fixes. --- scripts/translation/genrevdb.php | 6 ++++-- scripts/translation/lib/RevcheckRun.php | 13 ++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/scripts/translation/genrevdb.php b/scripts/translation/genrevdb.php index 765d5af7f..7b9254b17 100644 --- a/scripts/translation/genrevdb.php +++ b/scripts/translation/genrevdb.php @@ -27,9 +27,11 @@ $timeStart = new \DateTime; $dbpath = $argv[1]; -$langs = explode( ',' , $argv[2] ); +$langs = array(); +for( $idx = 2 ; $idx < count( $argv ) ; $idx++ ) + $langs[] = $argv[ $idx ]; -consolelog( "Creating revdata database $dbpath for languages: $argv[2]." ); +consolelog( "Creating revdata database $dbpath for languages: " . implode( ',', $langs ) . '.'); $db = db_create( $dbpath ); foreach( $langs as $lang ) diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index 120e10f49..b9164b2ee 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -224,9 +224,16 @@ private function parseTranslationXml() : void $dom = XmlUtil::loadFile( $this->targetDir . '/translation.xml' ); - $this->revData->intro = - $dom->saveXML( $dom->getElementsByTagName( 'intro' )[0] ) - ?? "No intro available for the {$lang} translation of the manual."; + $tag = $dom->getElementsByTagName( 'intro' )[0] ?? null; + if ( $tag == null ) + $intro = "No intro available for the {$this->targetDir} translation of the manual."; + else + { + $intro = ""; + foreach( $tag->childNodes as $node ) + $intro .= $dom->saveXML( $node ); + } + $this->revData->intro = $intro; $persons = $dom->getElementsByTagName( 'person' ); foreach( $persons as $person ) From 117a61d373cdea4cc3eae49fcd5142c7001b6b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Thu, 7 Nov 2024 17:32:49 -0300 Subject: [PATCH 18/22] Better log messages. --- scripts/translation/genrevdb.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/translation/genrevdb.php b/scripts/translation/genrevdb.php index 7b9254b17..f89f85d62 100644 --- a/scripts/translation/genrevdb.php +++ b/scripts/translation/genrevdb.php @@ -58,7 +58,7 @@ function generate( SQLite3 $db , string $lang ) try { - consolelog( "Language $lang run" ); + consolelog( "Language $lang started." ); $revcheck = new RevcheckRun( 'en' , $lang ); $data = $revcheck->revData; @@ -109,7 +109,7 @@ function generate( SQLite3 $db , string $lang ) ); $db->exec( 'COMMIT TRANSACTION' ); - consolelog_timed( "Language $lang done" ); + consolelog_timed( "Language $lang finished." ); } catch ( Exception $e ) { From 3eb4ef5e8e741d9ba6df5098a9dd268a322bb857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Fri, 8 Nov 2024 11:40:13 -0300 Subject: [PATCH 19/22] Remove unrelated filter. --- scripts/translation/genrevdb.php | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/scripts/translation/genrevdb.php b/scripts/translation/genrevdb.php index f89f85d62..aa87650a6 100644 --- a/scripts/translation/genrevdb.php +++ b/scripts/translation/genrevdb.php @@ -80,21 +80,20 @@ function generate( SQLite3 $db , string $lang ) ); foreach( $data->fileDetail as $file ) - if ( $translator->nick != "" ) - db_insert( $db , "files", $data->lang - , $file->path - , $file->name - , $file->size - , $file->days - , $file->adds - , $file->dels - , $file->status->value - , $file->maintainer - , $file->completion - , $file->hashLast - , $file->hashDiff - , $file->hashRvtg - ); + db_insert( $db , "files", $data->lang + , $file->path + , $file->name + , $file->size + , $file->days + , $file->adds + , $file->dels + , $file->status->value + , $file->maintainer + , $file->completion + , $file->hashLast + , $file->hashDiff + , $file->hashRvtg + ); $filesTotal = 0; foreach( $data->fileSummary as $count ) From 31fc969b3e03d8308ae5c0521d50800f117b5397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Mon, 11 Nov 2024 11:10:26 -0300 Subject: [PATCH 20/22] Tracks chmonly/ as translatable. --- scripts/translation/lib/RevcheckIgnore.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/scripts/translation/lib/RevcheckIgnore.php b/scripts/translation/lib/RevcheckIgnore.php index bb37eabda..8ae6c56ea 100644 --- a/scripts/translation/lib/RevcheckIgnore.php +++ b/scripts/translation/lib/RevcheckIgnore.php @@ -60,9 +60,6 @@ public static function ignore( $filename ) : bool if ( IGNORE_EXTENSIONS_XML ) if ( str_ends_with( $filename , '/extensions.xml') ) // track/count backport return true; - if ( IGNORE_CHMONLY_DIR ) - if ( str_starts_with( $filename , 'chmonly/') ) // track/count backport - return true; // Only in translations From 6d864a1863bccbecccc8d21a9fea837832c41465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Mon, 11 Nov 2024 11:16:57 -0300 Subject: [PATCH 21/22] Tracks translatable extensions.xml files. --- scripts/translation/lib/RevcheckIgnore.php | 4 ---- scripts/translation/lib/backport.php | 18 ------------------ 2 files changed, 22 deletions(-) diff --git a/scripts/translation/lib/RevcheckIgnore.php b/scripts/translation/lib/RevcheckIgnore.php index 8ae6c56ea..657d3707e 100644 --- a/scripts/translation/lib/RevcheckIgnore.php +++ b/scripts/translation/lib/RevcheckIgnore.php @@ -57,10 +57,6 @@ public static function ignore( $filename ) : bool if ( $filename == "reference/datetime/timezones.xml" ) return true; - if ( IGNORE_EXTENSIONS_XML ) - if ( str_ends_with( $filename , '/extensions.xml') ) // track/count backport - return true; - // Only in translations if ( $filename == "translation.xml" ) diff --git a/scripts/translation/lib/backport.php b/scripts/translation/lib/backport.php index 455b6e16e..8a36be148 100644 --- a/scripts/translation/lib/backport.php +++ b/scripts/translation/lib/backport.php @@ -9,30 +9,12 @@ // Constants -const IGNORE_CHMONLY_DIR = true; -const IGNORE_EXTENSIONS_XML = true; const STATUS_COUNT_MISMATCH = true; const LOOSE_SKIP_REVCHECK = true; const FIXED_SKIP_REVCHECK = true; // Documentation -/* # IGNORE_CHMONLY_DIR - -The chmonly/ dir contains files that appears to be -translatable. See recent efforts to re-enabling -CHM build: https://github.com/php/doc-base/pull/163 -*/ - -/* # IGNORE_EXTENSIONS_XML - -The actual revcheck ignores any files called extensions.xml, -but are at least two of files with this name. - -- appendices/migration56/extensions.xml (translatable) -- install/windows/legacy/extensions.xml (deleted on en/, exists untracked on translations) -*/ - /* # STATUS_COUNT_MISMATCH The actual revcheck counts "outdated" files in two different ways; From ddbfd62a1bf2aaa418178471f71cf13820d4fe64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20L=20F=20S=20Bacci?= Date: Mon, 11 Nov 2024 11:30:29 -0300 Subject: [PATCH 22/22] Counts ok/old exclusively. --- scripts/revcheck.php | 8 ++++---- scripts/translation/lib/RevcheckRun.php | 20 ++++---------------- scripts/translation/lib/backport.php | 25 ------------------------- 3 files changed, 8 insertions(+), 45 deletions(-) diff --git a/scripts/revcheck.php b/scripts/revcheck.php index c091775a2..2816ee7d0 100644 --- a/scripts/revcheck.php +++ b/scripts/revcheck.php @@ -91,8 +91,8 @@ function print_html_header( RevcheckData $data ) HTML; } @@ -133,9 +133,9 @@ function print_html_translators( RevcheckData $data ) Files maintained - upto-
date + ok old - wip + misc sum HTML; diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php index b9164b2ee..dc3c2ced6 100644 --- a/scripts/translation/lib/RevcheckRun.php +++ b/scripts/translation/lib/RevcheckRun.php @@ -182,29 +182,17 @@ private function addData( RevcheckFileInfo $info , RevtagInfo|null $revtag = nul $translator = $this->revData->getTranslator( $revtag->maintainer ); - switch( $info->status ) // counts + switch( $info->status ) { - case RevcheckStatus::TranslatedOk: // ready and synced + case RevcheckStatus::TranslatedOk: $translator->countOk++; break; - case RevcheckStatus::TranslatedOld: // ready and outdated + case RevcheckStatus::TranslatedOld: $translator->countOld++; break; - // STATUS_COUNT_MISMATCH count correct - // default: // all other cases - // $translator->countOther++; - - // STATUS_COUNT_MISMATCH backported behaviour - case RevcheckStatus::RevTagProblem: // STATUS_COUNT_MISMATCH backported behaviour - $translator->countOld++; // RevTagProblem into Old (generated diff link fails) - break; - case RevcheckStatus::NotInEnTree: // STATUS_COUNT_MISMATCH backported behaviour - break; // Not counted, but files are listed anyways... default: - if ( $revtag->status != "ready" ); // STATUS_COUNT_MISMATCH backported behaviour - $translator->countOther++; // The exception of all cases, and also not ready. + $translator->countOther++; break; - // STATUS_COUNT_MISMATCH backported behaviour } switch( $info->status ) // adds,dels diff --git a/scripts/translation/lib/backport.php b/scripts/translation/lib/backport.php index 8a36be148..169c781f4 100644 --- a/scripts/translation/lib/backport.php +++ b/scripts/translation/lib/backport.php @@ -9,36 +9,11 @@ // Constants -const STATUS_COUNT_MISMATCH = true; const LOOSE_SKIP_REVCHECK = true; const FIXED_SKIP_REVCHECK = true; // Documentation -/* # STATUS_COUNT_MISMATCH - -The actual revcheck counts "outdated" files in two different ways; - -- Only TranslatedOld: - https://github.com/php/doc-base/blob/747c53bf8ec72f27ac1a83ba91bcc390eea2e46a/scripts/revcheck.php#L603 -- TranslatedOld + RevTagProblem: - https://github.com/php/doc-base/blob/747c53bf8ec72f27ac1a83ba91bcc390eea2e46a/scripts/revcheck.php#L134 - -This causes a mismatchs between translators totals and file summary. - -To make the mismatch smaller, the "wip" column in Translators was -changed to "misc", and so any status other than "ok" and "old" -was added here. - -Also, NotInEnTree is missing on first case, and files -in this situation goes uncounted. - -Also, RevTagProblem is counted towards as Old, but files -are show in revtag missing/problem list, and is -impossible to generate diffs with invalid hashes... */ - -assert( STATUS_COUNT_MISMATCH || ! STATUS_COUNT_MISMATCH ); - /* # LOOSE_SKIP_REVCHECK Consider the output of: git show f80105b4fc1196bd8d5fecb98d686b580b1ff65d