diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000..8903550ba --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,11 @@ +# The following volunteers have self-identified as subject matter experts +# or interested parties over a particular area of this repository. +# While requesting a review from someone does not obligate that person to +# review a pull request, these reviewers might have valuable knowledge of +# the problem area and could aid in deciding whether a pull request is ready +# for merging. +# +# For more information, see the GitHub CODEOWNERS documentation: +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners + +/scripts/translation/ @alfsb diff --git a/README.md b/README.md index e2f0d5925..a6af2b5cf 100644 --- a/README.md +++ b/README.md @@ -143,3 +143,8 @@ and find issues with it, they are located in the `scripts/qa/` directory. There might be some more just in `scripts/` but they need to be checked if they are still relevant and/or given some love. +# Translation Tools + +There are also various scripts to ensure the quality and synchrony of +documentation translations, located in the `scripts/translation/` directory. + diff --git a/scripts/translation/.gitignore b/scripts/translation/.gitignore new file mode 100644 index 000000000..08b61f3ee --- /dev/null +++ b/scripts/translation/.gitignore @@ -0,0 +1,2 @@ +# Persistent data shared between scripts +.cache/ diff --git a/scripts/translation/README.md b/scripts/translation/README.md new file mode 100644 index 000000000..7890be067 --- /dev/null +++ b/scripts/translation/README.md @@ -0,0 +1,111 @@ +# Some useful scripts for maintaining translation consistency of manual + +Some of these scripts only test some file contents or XML structure +of translated files against their equivalents on `en/` directory. +Others will try modify the translations in place, changing the +translated files. Use with care. + +Not all translations are identical, or use the same conventions. +So not all scripts will be of use for all translations. The +assumptions of each script are described in each file. + +The `lib/` directory contains common code and functionality +across these scripts. + +Before using the scripts, it need be configured: +``` +php doc-base/scripts/translation/configure.php $LANG_DIR +``` + +## qarvt.php + +`qarvt.a.php` checks if all translated files have revtags in the +expected format. + +## qaxml.a.php + +`qaxml.a.php` checks if all updated translated files have +the same tag-attribute-value triples. Tag's attributes are extensively +utilized in manual for linking and XIncluding. Translated files with +missing os mistyped attributes may cause build failing or missing +parts not copied by XIncludes. + +## qaxml.e.php + +`qaxml.e.php` checks if all updated translated files have +the same external entities as the original files. Unbalanced entities +may indicate mistyped or wrongly traduced parts. + +## qaxml.p.php + +`qaxml.p.php` checks if all updated translated files have +the same processing instructions as the original files. Unbalanced entities +may cause compilation errors, as they are utilized on manual in the build +process. + +## qaxml.t.php + +`qaxml.t.php` checks if all updated translated files have +the same tags as the original files. Different number of tags between +source texts and target translations may cause compilation errors. + +Usage: `php qaxml.t.php [--detail] [tag[,tag]]` + +`[tag[,tag]]` is a comma separated tag list to check their +contents, as some tag's contents are expected *not* be translated. + +`--detail` will also print line defintions of each mismatched tag, +to facilitate bitsecting. + +## Suggested execution + +Structural checks: + +``` +php doc-base/scripts/translation/configure.php $LANG_DIR + +php doc-base/scripts/translation/qarvt.php + +php doc-base/scripts/translation/qaxml.a.php +php doc-base/scripts/translation/qaxml.e.php +php doc-base/scripts/translation/qaxml.p.php +php doc-base/scripts/translation/qaxml.t.php +``` +Tags where is expected no translations: +``` +php doc-base/scripts/translation/qaxml.t.php acronym +php doc-base/scripts/translation/qaxml.t.php classname +php doc-base/scripts/translation/qaxml.t.php constant +php doc-base/scripts/translation/qaxml.t.php envar +php doc-base/scripts/translation/qaxml.t.php function +php doc-base/scripts/translation/qaxml.t.php interfacename +php doc-base/scripts/translation/qaxml.t.php parameter +php doc-base/scripts/translation/qaxml.t.php type +php doc-base/scripts/translation/qaxml.t.php classsynopsis +php doc-base/scripts/translation/qaxml.t.php constructorsynopsis +php doc-base/scripts/translation/qaxml.t.php destructorsynopsis +php doc-base/scripts/translation/qaxml.t.php fieldsynopsis +php doc-base/scripts/translation/qaxml.t.php funcsynopsis +php doc-base/scripts/translation/qaxml.t.php methodsynopsis +``` +Tags where is expected few translations: +``` +php doc-base/scripts/translation/qaxml.t.php code +php doc-base/scripts/translation/qaxml.t.php computeroutput +php doc-base/scripts/translation/qaxml.t.php filename +php doc-base/scripts/translation/qaxml.t.php literal +php doc-base/scripts/translation/qaxml.t.php varname +``` + +# Migration + +## Maintainers with spaces + +The regex on `RevtagParser` was narrowed to not accept maintainer's names +with spaces. This need to be confirmed on all active translations, or +the regex modified to accept spaces again. + +## en/chmonly + +`en/chmonly` is ignored on revcheck, but it appears translatable. If it's a +`en/` only directory, this should be uncommented on RevcheckIgnore. diff --git a/scripts/translation/configure.php b/scripts/translation/configure.php new file mode 100644 index 000000000..88d5ff40c --- /dev/null +++ b/scripts/translation/configure.php @@ -0,0 +1,29 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Generate cached data for revcheck and QA tools. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/lib/all.php'; + +if ( count( $argv ) < 2 || in_array( '--help' , $argv ) || in_array( '-h' , $argv ) ) +{ + fwrite( STDERR , "Usage: {$argv[0]} [lang_dir]\n\n" ); + fwrite( STDERR , "See https://github.com/php/doc-base/tree/master/scripts/translation#readme for more info.\n" ); + return; +} + +new RevcheckRun( 'en' , $argv[1] , true ); diff --git a/scripts/translation/lib/CacheFile.php b/scripts/translation/lib/CacheFile.php new file mode 100644 index 000000000..50e27348a --- /dev/null +++ b/scripts/translation/lib/CacheFile.php @@ -0,0 +1,57 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Class to handle data persistence. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class CacheFile +{ + const CACHE_DIR = __DIR__ . '/../.cache'; + + private string $filename; + + function __construct( string $file ) + { + $this->filename = CacheFile::prepareFilename( $file , true ); + } + + public function load( mixed $init = null ) + { + if ( file_exists( $this->filename ) == false ) + return $init; + $data = file_get_contents( $this->filename ); + return unserialize( gzdecode( $data ) ); + } + + public function save( $data ) + { + $contents = gzencode( serialize( $data ) ); + file_put_contents( $this->filename , $contents ); + } + + public static function prepareFilename( string $file , bool $createCacheDirs = false ) + { + if ( str_starts_with( $file , '/' ) ) + return $file; + $outPath = CacheUtil::CACHE_DIR . '/' . dirname( $file ); + $outFile = rtrim( $outPath , '/' ) . '/' . $file; + if ( $createCacheDirs && file_exists( $outPath ) == false ) + mkdir( $outPath , 0777 , true ); + return $outFile; + } +} diff --git a/scripts/translation/lib/CacheUtil.php b/scripts/translation/lib/CacheUtil.php new file mode 100644 index 000000000..bc71e98fe --- /dev/null +++ b/scripts/translation/lib/CacheUtil.php @@ -0,0 +1,51 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Common functions do load and save to cache files. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class CacheUtil +{ + const CACHE_DIR = __DIR__ . '/../.cache'; + + public static function load( string $path , string $file ) + { + $filename = CacheUtil::prepareFilename( $path , $file , true ); + if ( file_exists( $filename ) == false ) + return null; + $data = file_get_contents( $filename ); + return unserialize( $data ); + } + + public static function save( string $path , string $file , $data ) + { + $outFile = CacheUtil::prepareFilename( $path , $file , true ); + $contents = serialize( $data ); + file_put_contents( $outFile , $contents ); + } + + public static function prepareFilename( string $path , string $file , bool $createDirs = false ) + { + $baseDir = CacheUtil::CACHE_DIR; + $outPath = rtrim( $baseDir , '/' ) . '/' . $path; + $outFile = rtrim( $outPath , '/' ) . '/' . $file; + if ( $createDirs && file_exists( $outPath ) == false ) + mkdir( $outPath , 0777 , true ); + return $outFile; + } +} diff --git a/scripts/translation/lib/GitDiffParser.php b/scripts/translation/lib/GitDiffParser.php new file mode 100644 index 000000000..9df98ac07 --- /dev/null +++ b/scripts/translation/lib/GitDiffParser.php @@ -0,0 +1,26 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Parse `git diff` to complement file state. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class GitDiffParser +{ + public static function parseNumstatInto( string $dir , RevcheckFileInfo $file ) + {} +} diff --git a/scripts/translation/lib/GitLogParser.php b/scripts/translation/lib/GitLogParser.php new file mode 100644 index 000000000..151d18456 --- /dev/null +++ b/scripts/translation/lib/GitLogParser.php @@ -0,0 +1,93 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Parse `git log` to complement file state. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class GitLogParser +{ + static function parseInto( string $lang , RevcheckFileList & $list ) + { + $cwd = getcwd(); + chdir( $lang ); + $fp = popen( "git log --name-only" , "r" ); + $hash = ""; + $date = ""; + $skip = false; + while ( ( $line = fgets( $fp ) ) !== false ) + { + // new commit block + if ( substr( $line , 0 , 7 ) == "commit " ) + { + $hash = trim( substr( $line , 7 ) ); + $date = ""; + $skip = false; + continue; + } + // datetime of commit + if ( strpos( $line , 'Date:' ) === 0 ) + { + $line = trim( substr( $line , 5 ) ); + $date = strtotime( $line ); + continue; + } + // other headers + if ( strpos( $line , ': ' ) > 0 ) + continue; + // empty lines + if ( trim( $line ) == "" ) + continue; + // commit message + if ( str_starts_with( $line , ' ' ) ) + { + // commits with this mark are ignored + if ( stristr( $line, '[skip-revcheck]' ) !== false ) + $skip = true; + continue; + } + // otherwise, a filename + $filename = trim( $line ); + $info = $list->get( $filename ); + + // untracked file (deleted, renamed) + if ( $info == null ) + continue; + + // the head commit + if ( $info->head == "" ) + { + $info->head = $hash; + $info->date = $date; + } + + // after, only tracks non skipped commits + if ( $skip ) + continue; + + // the diff commit + if ( $info->diff == "" ) + { + $info->diff = $hash; + $info->date = $date; + } + } + + pclose( $fp ); + chdir( $cwd ); + } +} diff --git a/scripts/translation/lib/OutputIgnoreArgv.php b/scripts/translation/lib/OutputIgnoreArgv.php new file mode 100644 index 000000000..1b75c8ddf --- /dev/null +++ b/scripts/translation/lib/OutputIgnoreArgv.php @@ -0,0 +1,92 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Intercept and process $argv parameters. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class OutputIgnoreArgv +{ + public string $command = ""; + public string $options = ""; + public bool $showIgnore = true; + + function __construct( array & $argv ) + { + $this->command = $argv[0]; + + foreach( $argv as $key => $arg ) + { + if ( str_starts_with( $arg , "--add-ignore=" ) ) + { + $file = OutputIgnoreArgv::cacheFile(); + $list = $file->load( array() ); + $line = substr( $arg , 13 ); + if ( ! in_array( $line , $list ) ) + { + $list[] = $line; + $file->save( $list ); + } + exit; + } + + if ( str_starts_with( $arg , "--del-ignore=" ) ) + { + $file = OutputIgnoreArgv::cacheFile(); + $list = $file->load( array() ); + $line = substr( $arg , 13 ); + $dels = 0; + while ( in_array( $line , $list ) ) + { + $key = array_search( $line , $list ); + unset( $list[$key] ); + $dels++; + } + if ( $dels == 0 ) + print "Ignore mark not found.\n"; + else + $file->save( $list ); + exit; + } + + if ( $arg == "--disable-ignore" ) + { + $this->showIgnore = false; + unset( $argv[$key] ); + } + } + + $copy = $argv; + array_shift( $copy ); + $this->options = implode( " " , $copy ); + } + + public static function cacheFile() + { + return new CacheFile( getcwd() . "/.qaxml.ignore" ); + } + + function pushAddIgnore( OutputIgnoreBuffer $output, string $mark ) + { + $output->add( " php {$this->command} --add-ignore=$mark\n" ); + } + + function pushDelIgnore( OutputIgnoreBuffer $output, string $mark ) + { + $output->add( " php {$this->command} --del-ignore=$mark\n" ); + } +} diff --git a/scripts/translation/lib/OutputIgnoreBuffer.php b/scripts/translation/lib/OutputIgnoreBuffer.php new file mode 100644 index 000000000..3db7573d1 --- /dev/null +++ b/scripts/translation/lib/OutputIgnoreBuffer.php @@ -0,0 +1,123 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Cache the output and shows if if not marked to ignore. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class OutputIgnoreBuffer +{ + public int $printCount = 0; + + private string $header = ""; + private string $filename = ""; + private array $texts = array(); + + private OutputIgnoreArgv $args; + + function __construct( OutputIgnoreArgv $args , string $header , string $filename ) + { + $this->args = $args; + $this->header = $header; + $this->filename = $filename; + } + + function add( string $text ) + { + $this->texts[] = $text; + } + + function addDiff( string $text , int $sourceCount , int $targetCount ) + { + if ( $sourceCount == $targetCount ) + return; + $prefix = "* "; + $suffix = " -{$targetCount} +{$sourceCount}"; + if ( $sourceCount == 0 ) + { + $prefix = "- "; + $suffix = $targetCount == 1 ? "" : " -{$targetCount}"; + } + if ( $targetCount == 0 ) + { + $prefix = "+ "; + $suffix = $sourceCount == 1 ? "" : " +{$sourceCount}"; + } + $this->add( "{$prefix}{$text}{$suffix}\n" ); + } + + function addLine() + { + if ( count( $this->texts ) > 0 && end( $this->texts ) != "\n" ) + $this->add( "\n" ); + } + + function print() + { + if ( count( $this->texts ) == 0 ) + return; + + $this->addLine( "\n" ); + if ( count ( $this->texts ) > 0 ) + $this->printCount++; + + $head = $this->filename . ':' . $this->hash( false ) . ':'; + $mark = $head . $this->hash( true ); + $marks = OutputIgnoreArgv::cacheFile()->load( array() ); + + if ( $this->args->showIgnore ) + { + if ( in_array( $mark , $marks ) ) + $this->texts = array(); + else + $this->args->pushAddIgnore( $this , $mark ); + + // old marks + while ( in_array( $mark , $marks ) ) + { + $key = array_search( $mark , $marks ); + unset( $marks[$key] ); + } + foreach ( $marks as $mark ) + if ( $mark != null ) + if ( str_starts_with( $mark , $head ) ) + $this->args->pushDelIgnore( $this , $mark ); + + } + + $this->addLine( "\n" ); + + if ( count( $this->texts ) == 0 ) + return; + + print $this->header; + foreach( $this->texts as $text ) + print $text; + } + + private function hash( bool $withContents ) : string + { + $text = $this->header . $this->args->options; + if ( $withContents ) + $text .= implode( "" , $this->texts ); + $text = str_replace( " " , "" , $text ); + $text = str_replace( "\n" , "" , $text ); + $text = str_replace( "\r" , "" , $text ); + $text = str_replace( "\t" , "" , $text ); + return hash( "crc32b" , $text ); + } +} diff --git a/scripts/translation/lib/QaFileInfo.php b/scripts/translation/lib/QaFileInfo.php new file mode 100644 index 000000000..2b8eb3ded --- /dev/null +++ b/scripts/translation/lib/QaFileInfo.php @@ -0,0 +1,61 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Common data for revcheck and QA tools. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class QaFileInfo +{ + public string $sourceHash; + public string $targetHash; + public string $sourceDir; + public string $targetDir; + public string $file; + public int $days; + + function __construct( string $sourceHash , string $targetHash , string $sourceDir , string $targetDir , string $file , int $days ) + { + $this->sourceHash = $sourceHash; + $this->targetHash = $targetHash; + $this->sourceDir = $sourceDir; + $this->targetDir = $targetDir; + $this->file = $file; + $this->days = $days; + } + + public static function cacheLoad() :array + { + return CacheUtil::load( "" , "QaFileInfo.phps" ); + } + + public static function cacheSave( array $itens ) + { + // PHP serialize() + CacheUtil::save( "" , "QaFileInfo.phps" , $itens ); + + // CSV + $filename = CacheUtil::prepareFilename( "" , "QaFileInfo.csv" , true ); + $fp = fopen( $filename , 'w' ); + foreach( $itens as $item ) + { + $line = array( $item->sourceHash , $item->targetHash , $item->sourceDir , $item->targetDir , $item->file , $item->days ); + fputcsv( $fp , $line ); + } + fclose($fp); + } +} diff --git a/scripts/translation/lib/RevcheckFileInfo.php b/scripts/translation/lib/RevcheckFileInfo.php new file mode 100644 index 000000000..027c93fd8 --- /dev/null +++ b/scripts/translation/lib/RevcheckFileInfo.php @@ -0,0 +1,53 @@ + | + * +----------------------------------------------------------------------+ + * | Description: General data of a file in a documentation tree. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + + enum RevcheckStatus :string +{ + case Untranslated = 'Untranslated'; + case RevTagProblem = 'RevTagProblem'; + case TranslatedOk = 'TranslatedOk'; + case TranslatedOld = 'TranslatedOld'; + case TranslatedWip = 'TranslatedWip'; + case NotInEnTree = 'NotInEnTree'; +} + +class RevcheckFileInfo +{ + public string $file = ""; // from fs + public int $size = 0 ; // from fs + public string $head = ""; // from vcs, source only, head hash, may be skipped + public string $diff = ""; // from vcs, source only, diff hash, no skips + public int $date = 0 ; // from vcs, source only, date of head or diff commit + + public RevcheckStatus $status; // target only + public RevtagInfo|null $revtag; // target only + + function __construct( string $file , int $size ) + { + $this->file = $file; + $this->size = $size; + $this->head = ""; + $this->diff = ""; + $this->date = 0; + $this->status = RevcheckStatus::Untranslated; + $this->revtag = null; + } +} diff --git a/scripts/translation/lib/RevcheckFileList.php b/scripts/translation/lib/RevcheckFileList.php new file mode 100644 index 000000000..a227912b7 --- /dev/null +++ b/scripts/translation/lib/RevcheckFileList.php @@ -0,0 +1,81 @@ + | + * +----------------------------------------------------------------------+ + * | Description: General file transversal, ordered file listing. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class RevcheckFileList +{ + private $list = array(); + + function __construct( $lang ) + { + $this->loadTree( $lang ); + } + + function get( $file ): RevcheckFileInfo|null + { + return $this->list[ $file ] ?? null; + } + + function iterator(): Iterator + { + return new ArrayIterator( $this->list ); + } + + function loadTree( $lang ) + { + $dir = new \DirectoryIterator( $lang ); + if ( $dir === false ) + die( "$lang is not a directory.\n" ); + $cwd = getcwd(); + chdir( $lang ); + $this->loadTreeRecurse( $lang , "" ); + chdir( $cwd ); + } + + function loadTreeRecurse( $lang , $path ) + { + $todoDirs = []; + $dir = new DirectoryIterator( $path == "" ? "." : $path ); + if ( $dir === false ) + die( "$path is not a directory.\n" ); + + foreach( $dir as $entry ) + { + $name = $entry->getFilename(); + $key = ltrim( $path . '/' . $name , '/' ); + if ( $name[0] == '.' ) + continue; + if ( $entry->isDir() ) + { + $todoDirs[] = $key; + continue; + } + + if ( RevcheckIgnore::ignore( $key ) ) + continue; + $file = new RevcheckFileInfo( $key , $entry->getSize() ); + $this->list[ $key ] = $file; + } + + sort( $todoDirs ); + foreach( $todoDirs as $path ) + $this->loadTreeRecurse( $lang , $path ); + } +} diff --git a/scripts/translation/lib/RevcheckIgnore.php b/scripts/translation/lib/RevcheckIgnore.php new file mode 100644 index 000000000..a2a36d026 --- /dev/null +++ b/scripts/translation/lib/RevcheckIgnore.php @@ -0,0 +1,74 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Files ignored on translation tree. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class RevcheckIgnore +{ + public static function ignore( $filename ) : bool + { + // Ignore dot files + + if ( $filename[0] == '.' ) + return true; + + // Ignore files other than xml assets + + if ( ( str_ends_with( $filename , '.xml' ) || str_ends_with( $filename , '.ent' ) ) == false ) + return true; + + // Ignore autogenerated files + + if ( str_starts_with( $filename , "entities." ) ) + return true; + if ( str_contains( $filename , "/entities." ) ) + return true; + if ( str_contains( $filename , "/versions.xml" ) ) + return true; + + // Only in English + + if ( $filename == "contributors.ent" ) + return true; + if ( $filename == "contributors.xml" ) + return true; + if ( $filename == "appendices/license.xml" ) + return true; + if ( $filename == "appendices/license.xml" ) + return true; + if ( $filename == "appendices/extensions.xml" ) + return true; + if ( $filename == "appendices/reserved.constants.xml" ) + return true; + if ( $filename == "reference/datetime/timezones.xml" ) + return true; + if ( str_starts_with( $filename , 'chmonly/') ) + return true; + if ( str_ends_with( $filename , '/book.developer.xml') ) + return true; + + // Only in translations + + if ( $filename == "translation.xml" ) + return true; + + // At least, do not ignore + return false; + } +} diff --git a/scripts/translation/lib/RevcheckRun.php b/scripts/translation/lib/RevcheckRun.php new file mode 100644 index 000000000..8debc0408 --- /dev/null +++ b/scripts/translation/lib/RevcheckRun.php @@ -0,0 +1,143 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Calculate translation sync/diff status from two | + * | directories. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class RevcheckRun +{ + public string $sourceDir; + public string $targetDir; + public RevcheckFileList $sourceFiles; + public RevcheckFileList $targetFiles; + + // Separated lists + public array $filesOk = []; + public array $filesOld = []; + public array $filesRevtagProblem = []; + public array $filesUntranslated = []; + public array $filesNotInEn = []; + public array $filesWip = []; + public array $qaList = []; + + function __construct( string $sourceDir , string $targetDir , bool $writeResults = true ) + { + $this->sourceDir = $sourceDir; + $this->targetDir = $targetDir; + + // load respective file tree + $this->sourceFiles = new RevcheckFileList( $sourceDir ); + $this->targetFiles = new RevcheckFileList( $targetDir ); + + // original files get info from version control + GitLogParser::parseInto( $sourceDir , $this->sourceFiles ); + + // translated files get info from file contents + RevtagParser::parseInto( $targetDir , $this->targetFiles ); + + // match and mix + $this->calculateStatus(); + + if ( $writeResults ) + QaFileInfo::cacheSave( $this->qaList ); + } + + private function calculateStatus() + { + // All status are marked in source files, + // except notinen, that are marked on target. + + foreach( $this->sourceFiles->iterator() as $source ) + { + $target = $this->targetFiles->get( $source->file ); + + // Untranslated + + if ( $target == null ) + { + $source->status = RevcheckStatus::Untranslated; + $this->filesUntranslated[] = $source; + continue; + } + + // RevTagProblem + + if ( $target->revtag == null || strlen( $target->revtag->revision ) != 40 ) + { + $source->status = RevcheckStatus::RevTagProblem; + $this->filesRevtagProblem[] = $source; + continue; + } + + // Translation compares ok from multiple hashs. The head hash or the last non-skiped hash. + // See https://github.com/php/doc-base/blob/090ff07aa03c3e4ad7320a4ace9ffb6d5ede722f/scripts/revcheck.php#L374 + // and https://github.com/php/doc-base/blob/090ff07aa03c3e4ad7320a4ace9ffb6d5ede722f/scripts/revcheck.php#L392 . + + $sourceHash = $source->head; + $targetHash = $target->revtag->revision; + + if ( $targetHash == $source->diff ) + $sourceHash = $source->diff; + + $daysOld = ( strtotime( "now" ) - $source->date ) / 86400; + $daysOld = (int)$daysOld; + + $qaInfo = new QaFileInfo( $sourceHash , $targetHash , $this->sourceDir , $this->targetDir , $source->file , $daysOld ); + $this->qaList[ $source->file ] = $qaInfo; + + // TranslatedOk + + if ( $target->revtag->status == "ready" && $sourceHash == $targetHash ) + { + $source->status = RevcheckStatus::TranslatedOk; + $this->filesOk[] = $source; + continue; + } + + GitDiffParser::parseNumstatInto( $this->sourceDir , $source ); + + // TranslatedWip + + if ( $target->revtag->status != "ready" ) + { + $source->status = RevcheckStatus::TranslatedWip; + $this->filesWip[] = $source; + continue; + } + + // TranslatedOld + + $source->days = $daysOld; + $source->status = RevcheckStatus::TranslatedOld; + $this->filesOld[] = $source; + } + + // NotInEnTree + + foreach( $this->targetFiles->iterator() as $target ) + { + $source = $this->sourceFiles->get( $target->file ); + if ( $source == null ) + { + $target->status = RevcheckStatus::NotInEnTree; + $this->filesNotInEn[] = $target; + } + } + } +} diff --git a/scripts/translation/lib/RevtagParser.php b/scripts/translation/lib/RevtagParser.php new file mode 100644 index 000000000..a04db2fd1 --- /dev/null +++ b/scripts/translation/lib/RevtagParser.php @@ -0,0 +1,105 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Parse revision and credits from XML comments. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class RevtagInfo +{ + public string $revision = ""; + public string $maintainer = ""; + public string $status = ""; + public string $credits = ""; + public array $errors = []; +} + +class RevtagParser +{ + static function parseInto( string $lang , RevcheckFileList & $list ) + { + foreach( $list->iterator() as $entry ) + $entry->revtag = RevtagParser::parseFile( $lang . '/' . $entry->file ); + } + + public static function parseFile( string $filename ): RevtagInfo|null + { + $doc = XmlUtil::loadFile( $filename ); + $ret = new RevtagInfo; + RevtagParser::parseNodeRecurse( $doc , $ret , $filename ); + return $ret; + } + + public static function parseText( string $contents ): RevtagInfo|null + { + $doc = XmlUtil::loadText( $contents ); + $ret = new RevtagInfo; + RevtagParser::parseNodeRecurse( $doc , $ret ); + return $ret; + } + + public static function parseNodeRecurse( DOMNode $node , RevtagInfo $ret , $filename = "" ) + { + if ( $node->nodeType == XML_COMMENT_NODE ) + RevtagParser::parseComment( $node , $ret , $filename ); + + foreach( $node->childNodes as $child ) + RevtagParser::parseNodeRecurse( $child , $ret , $filename ); + } + + public static function parseComment( DOMNode $node , RevtagInfo $ret , $filename = "" ) + { + $text = trim( $node->textContent ); + + if ( str_starts_with( $text , "EN-" ) ) + { + // /EN-Revision:\s*(\S+)\s*Maintainer:\s*(\S+)\s*Status:\s*(\S+)/ // restrict maintainer without spaces + // /EN-Revision:\s*(\S+)\s*Maintainer:\s(.*?)\sStatus:\s*(\S+)/ // accepts maintainer with spaces + + $match = array(); + $regex = "/EN-Revision:\s*(\S+)\s*Maintainer:\s*(\S+)\s*Status:\s*(\S+)/"; + if ( preg_match( $regex , $text , $match ) ) + { + $ret->revision = trim( $match[1] ); + $ret->maintainer = trim( $match[2] ); + $ret->status = trim( $match[3] ); + + if ( $ret->revision != "" && strlen( $ret->revision ) != 40 ) + $ret->errors[] = "Wrong hash size: {$ret->revision}"; + if ( $ret->maintainer == "" ) + $ret->errors[] = "Empty maintainer."; + if ( $ret->status == "" ) + $ret->errors[] = "Empty status."; + } + else + $ret->errors[] = "No revtag."; + } + + if ( str_starts_with( $text , "CREDITS:" ) ) + { + $match = array(); + $regex = "/CREDITS:(.*)/"; + if ( preg_match( $regex , $text , $match ) ) + { + $ret->credits = trim( $match[1] ); + + if ( $ret->credits == "" ) + $ret->errors[] = "Empty credits."; + } + } + } +} diff --git a/scripts/translation/lib/XmlUtil.php b/scripts/translation/lib/XmlUtil.php new file mode 100644 index 000000000..292cffee2 --- /dev/null +++ b/scripts/translation/lib/XmlUtil.php @@ -0,0 +1,84 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Misc funcionality dealing with raw XML. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/all.php'; + +class XmlUtil +{ + public static function extractEntities( $filename ) + { + $was = libxml_use_internal_errors( true ); + + $doc = new DOMDocument(); + $doc->recover = true; + $doc->resolveExternals = false; + $doc->load( $filename ); + + $errors = libxml_get_errors(); + libxml_clear_errors(); + libxml_use_internal_errors( $was ); + + $ret = array(); + foreach ($errors as $error) + { + if ( preg_match( "/Entity '(\S+)' not defined/" , $error->message , $matches ) ) + $ret[] = $matches[1]; + } + return $ret; + } + + public static function listNodeType( DOMNode $node , int $type ) + { + $ret = array(); + XmlUtil::listNodeTypeRecurse( $node , $type , $ret ); + return $ret; + } + + public static function listNodeTypeRecurse( DOMNode $node , int $type, array & $ret ) + { + if ( $node->nodeType == $type ) + $ret[] = $node; + foreach( $node->childNodes as $child ) + XmlUtil::listNodeTypeRecurse( $child , $type, $ret ); + } + + public static function loadFile( $filename ):DOMDocument + { + $contents = file_get_contents( $filename ); + return XmlUtil::loadText( $contents ); + } + + public static function loadText( $contents ):DOMDocument + { + $was = libxml_use_internal_errors( true ); + + $doc = new DOMDocument(); + $doc->preserveWhiteSpace = true; + $doc->recover = true; + $doc->resolveExternals = false; + $doc->substituteEntities = false; + + $doc->loadXML( $contents ); + + libxml_clear_errors(); + libxml_use_internal_errors( $was ); + + return $doc; + } +} diff --git a/scripts/translation/lib/all.php b/scripts/translation/lib/all.php new file mode 100644 index 000000000..aa9ca28eb --- /dev/null +++ b/scripts/translation/lib/all.php @@ -0,0 +1,36 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Old style, require all file. | + * +----------------------------------------------------------------------+ + */ + +ini_set( 'display_errors' , 1 ); +ini_set( 'display_startup_errors' , 1 ); +error_reporting( E_ALL ); + +require_once __DIR__ . '/CacheFile.php'; +require_once __DIR__ . '/CacheUtil.php'; +require_once __DIR__ . '/GitDiffParser.php'; +require_once __DIR__ . '/GitLogParser.php'; +require_once __DIR__ . '/OutputIgnoreArgv.php'; +require_once __DIR__ . '/OutputIgnoreBuffer.php'; +require_once __DIR__ . '/QaFileInfo.php'; +require_once __DIR__ . '/RevcheckFileInfo.php'; +require_once __DIR__ . '/RevcheckFileList.php'; +require_once __DIR__ . '/RevcheckIgnore.php'; +require_once __DIR__ . '/RevcheckRun.php'; +require_once __DIR__ . '/RevtagParser.php'; +require_once __DIR__ . '/XmlUtil.php'; diff --git a/scripts/translation/qarvt.php b/scripts/translation/qarvt.php new file mode 100644 index 000000000..e76196729 --- /dev/null +++ b/scripts/translation/qarvt.php @@ -0,0 +1,72 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Check format for revtags and credits on XML comments. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/lib/all.php'; + +$langDir = ""; + +switch ( count( $argv ) ) +{ + case 1: + break; + case 2: + $langDir = $argv[1]; + break; + default: + print_usage_exit($argv[0]); + return; +} + +if ( $langDir == "" ) +{ + $qalist = QaFileInfo::cacheLoad(); + if ( count( $qalist ) > 0 ) + { + foreach( $qalist as $qa ) + { + $langDir = $qa->targetDir; + break; + } + } + else + print_usage_exit($argv[0]); +} + +$list = new RevcheckFileList( $langDir ); + +foreach( $list->list as $item ) +{ + $file = $langDir . '/' . $item->file; + $revt = RevtagParser::parseFile( $file ); + + if ( count( $revt->errors ) == 0 ) + continue; + + print "qarvt: $file\n"; + foreach( $revt->errors as $error ) + print " $error\n"; + print "\n"; +} + +function print_usage_exit($cmd) +{ + fwrite( STDERR , " Wrong paramater count. Usage:\n" ); + fwrite( STDERR , " {$cmd} [lang_dir]:\n" ); + exit; +} diff --git a/scripts/translation/qaxml.a.php b/scripts/translation/qaxml.a.php new file mode 100644 index 000000000..6662b580d --- /dev/null +++ b/scripts/translation/qaxml.a.php @@ -0,0 +1,84 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Compare attributes between XMLs. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/lib/all.php'; + +$qalist = QaFileInfo::cacheLoad(); + +foreach ( $qalist as $qafile ) +{ + if ( $qafile->file == "bookinfo.xml" ) + continue; + if ( $qafile->sourceHash != $qafile->targetHash ) + continue; + + $source = $qafile->sourceDir . '/' . $qafile->file; + $target = $qafile->targetDir . '/' . $qafile->file; + + $s = XmlUtil::loadFile( $source ); + $t = XmlUtil::loadFile( $target ); + + $s = XmlUtil::listNodeType( $s , XML_ELEMENT_NODE ); + $t = XmlUtil::listNodeType( $t , XML_ELEMENT_NODE ); + + $s = extractTriple( $s ); + $t = extractTriple( $t ); + + if ( implode( "\n" , $s ) == implode( "\n" , $t ) ) + continue; + + $header = true; + $match = array(); + + foreach( $s as $v ) + $match[$v] = array( 0 , 0 ); + foreach( $t as $v ) + $match[$v] = array( 0 , 0 ); + + foreach( $s as $v ) + $match[$v][0] += 1; + foreach( $t as $v ) + $match[$v][1] += 1; + + foreach( $match as $k => $v ) + { + if ( $v[0] == $v[1] ) + continue; + + if ( $header ) + { + print "qaxml.a: {$target}\n\n"; + $header = false; + } + + print "* {$k} -{$v[1]} +{$v[0]}\n"; + } + + if ( ! $header ) + print "\n"; +} + +function extractTriple( array $list ) +{ + $ret = array(); + foreach( $list as $elem ) + foreach( $elem->attributes as $attrib ) + $ret[] = "{$elem->nodeName} {$attrib->nodeName} {$attrib->nodeValue}"; + return $ret; +} diff --git a/scripts/translation/qaxml.e.php b/scripts/translation/qaxml.e.php new file mode 100644 index 000000000..6d09cb1b1 --- /dev/null +++ b/scripts/translation/qaxml.e.php @@ -0,0 +1,65 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Compare entities usage between XMLs. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/lib/all.php'; + +$qalist = QaFileInfo::cacheLoad(); +$outarg = new OutputIgnoreArgv( $argv ); + +foreach ( $qalist as $qafile ) +{ + if ( $qafile->file == "bookinfo.xml" ) + continue; + if ( $qafile->sourceHash != $qafile->targetHash ) + continue; + + $source = $qafile->sourceDir . '/' . $qafile->file; + $target = $qafile->targetDir . '/' . $qafile->file; + + $s = XmlUtil::extractEntities( $source ); + $t = XmlUtil::extractEntities( $target ); + + if ( implode( "\n" , $s ) == implode( "\n" , $t ) ) + continue; + + $output = new OutputIgnoreBuffer( $outarg , "qaxml.e: {$target}\n\n" , $target ); + + $match = array(); + + foreach( $s as $v ) + $match[$v] = array( 0 , 0 ); + foreach( $t as $v ) + $match[$v] = array( 0 , 0 ); + + foreach( $s as $v ) + $match[$v][0] += 1; + foreach( $t as $v ) + $match[$v][1] += 1; + + foreach( $match as $k => $v ) + { + if ( $v[0] == $v[1] ) + continue; + + $output->add( "* &{$k}; -{$v[1]} +{$v[0]}\n" ); + } + + $output->addLine(); + $output->print(); +} diff --git a/scripts/translation/qaxml.p.php b/scripts/translation/qaxml.p.php new file mode 100644 index 000000000..95cca01d2 --- /dev/null +++ b/scripts/translation/qaxml.p.php @@ -0,0 +1,83 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Compare PIs usage between XMLs. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/lib/all.php'; + +$qalist = QaFileInfo::cacheLoad(); + +foreach ( $qalist as $qafile ) +{ + if ( $qafile->file == "bookinfo.xml" ) + continue; + if ( $qafile->sourceHash != $qafile->targetHash ) + continue; + + $source = $qafile->sourceDir . '/' . $qafile->file; + $target = $qafile->targetDir . '/' . $qafile->file; + + $s = XmlUtil::loadFile( $source ); + $t = XmlUtil::loadFile( $target ); + + $s = XmlUtil::listNodeType( $s , XML_PI_NODE ); + $t = XmlUtil::listNodeType( $t , XML_PI_NODE ); + + $s = extractPiData( $s ); + $t = extractPiData( $t ); + + if ( implode( "\n" , $s ) == implode( "\n" , $t ) ) + continue; + + $header = true; + $match = array(); + + foreach( $s as $v ) + $match[$v] = array( 0 , 0 ); + foreach( $t as $v ) + $match[$v] = array( 0 , 0 ); + + foreach( $s as $v ) + $match[$v][0] += 1; + foreach( $t as $v ) + $match[$v][1] += 1; + + foreach( $match as $k => $v ) + { + if ( $v[0] == $v[1] ) + continue; + + if ( $header ) + { + print "qaxml.p: {$target}\n\n"; + $header = false; + } + + print "* {$k} -{$v[1]} +{$v[0]}\n"; + } + + if ( ! $header ) + print "\n"; +} + +function extractPiData( array $list ) +{ + $ret = array(); + foreach( $list as $elem ) + $ret[] = "{$elem->target} {$elem->data}"; + return $ret; +} diff --git a/scripts/translation/qaxml.t.php b/scripts/translation/qaxml.t.php new file mode 100644 index 000000000..3917302c9 --- /dev/null +++ b/scripts/translation/qaxml.t.php @@ -0,0 +1,312 @@ + | + * +----------------------------------------------------------------------+ + * | Description: Compare tag count and contents between XMLs. | + * +----------------------------------------------------------------------+ + */ + +require_once __DIR__ . '/lib/all.php'; + +$tags = array(); +$showDetail = false; + +$qalist = QaFileInfo::cacheLoad(); +$outarg = new OutputIgnoreArgv( $argv ); + +array_shift( $argv ); +while ( count( $argv ) > 0 ) +{ + $arg = array_shift( $argv ); + + if ( $arg == "--detail" ) + { + $showDetail = true; + $outarg->showIgnore = false; + continue; + } + + $tags = explode( ',' , $arg ); +} + +foreach ( $qalist as $qafile ) +{ + if ( $qafile->file == "bookinfo.xml" ) + continue; + if ( $qafile->sourceHash != $qafile->targetHash ) + continue; + + $source = $qafile->sourceDir . '/' . $qafile->file; + $target = $qafile->targetDir . '/' . $qafile->file; + + $output = new OutputIgnoreBuffer( $outarg , "qaxml.t: {$target}\n\n" , $target ); + + // First check, by tag contents, inner text + + if ( count( $tags ) > 0 && $output->printCount == 0 ) + { + $s = XmlUtil::loadFile( $source ); + $t = XmlUtil::loadFile( $target ); + + $s = XmlUtil::listNodeType( $s , XML_ELEMENT_NODE ); + $t = XmlUtil::listNodeType( $t , XML_ELEMENT_NODE ); + + typesNotCaseSensitive( $s ); + typesNotCaseSensitive( $t ); + + $s = extractTagsInnerText( $s , $tags ); + $t = extractTagsInnerText( $t , $tags ); + + $match = array(); + + foreach( $t as $v ) + $match[$v] = array( 0 , 0 ); + foreach( $s as $v ) + $match[$v] = array( 0 , 0 ); + + foreach( $s as $v ) + $match[$v][0] += 1; + foreach( $t as $v ) + $match[$v][1] += 1; + + foreach( $match as $k => $v ) + $output->addDiff( $k , $v[0] , $v[1] ); + + if ( $showDetail ) + foreach( $match as $tag => $v ) + printTagUsageDetail( $source , $target , $tag , $output ); + + $output->print(); + } + + // Second check, by tag contents, inner XML + + if ( count( $tags ) > 0 && $output->printCount == 0 ) + { + $s = XmlUtil::loadFile( $source ); + $t = XmlUtil::loadFile( $target ); + + $s = XmlUtil::listNodeType( $s , XML_ELEMENT_NODE ); + $t = XmlUtil::listNodeType( $t , XML_ELEMENT_NODE ); + + typesNotCaseSensitive( $s ); + typesNotCaseSensitive( $t ); + + $s = extractTagsInnerXmls( $s , $tags ); + $t = extractTagsInnerXmls( $t , $tags ); + + $match = array(); + + foreach( $t as $v ) + $match[$v] = array( 0 , 0 ); + foreach( $s as $v ) + $match[$v] = array( 0 , 0 ); + + foreach( $s as $v ) + $match[$v][0] += 1; + foreach( $t as $v ) + $match[$v][1] += 1; + + foreach( $match as $k => $v ) + $output->addDiff( $k , $v[0] , $v[1] ); + + if ( $showDetail ) + foreach( $match as $tag => $v ) + printTagUsageDetail( $source , $target , $tag , $output ); + + $output->print(); + } + + // Last check, simple tag count + + if ( $output->printCount == 0 ) + { + $s = XmlUtil::loadFile( $source ); + $t = XmlUtil::loadFile( $target ); + + $s = XmlUtil::listNodeType( $s , XML_ELEMENT_NODE ); + $t = XmlUtil::listNodeType( $t , XML_ELEMENT_NODE ); + + typesNotCaseSensitive( $s ); + typesNotCaseSensitive( $t ); + + $s = extractNodeName( $s , $tags ); + $t = extractNodeName( $t , $tags ); + + $match = array(); + + foreach( $t as $v ) + $match[$v] = array( 0 , 0 ); + foreach( $s as $v ) + $match[$v] = array( 0 , 0 ); + + foreach( $s as $v ) + $match[$v][0] += 1; + foreach( $t as $v ) + $match[$v][1] += 1; + + foreach( $match as $k => $v ) + $output->addDiff( $k , $v[0] , $v[1] ); + + if ( $showDetail ) + foreach( $match as $tag => $v ) + printTagUsageDetail( $source , $target , $tag , $output ); + + $output->print(); + } +} + +function extractNodeName( array $list , array $tags ) +{ + $ret = array(); + foreach( $list as $elem ) + if ( in_array( $elem->nodeName , $tags) || count( $tags ) == 0 ) + $ret[] = $elem->nodeName; + return $ret; +} + +function typesNotCaseSensitive( array & $nodes ) +{ + // Types not case-sensitive: https://github.com/php/doc-en/issues/2658 + + if ( $nodes == null ) + return; + + foreach( $nodes as $node ) + { + if ( $node->nodeName == "type" ) + { + $text = trim( strtolower( $node->nodeValue ) ); + switch( $text ) + { + case "array": + case "string": + case "float": + case "bool": + case "null": + $node->nodeValue = $text; + break; + } + } + } +} + +function extractTagsInnerText( array $nodes , array $tags ) +{ + $ret = array(); + foreach( $nodes as $node ) + { + $tag = $node->nodeName; + if ( in_array( $tag , $tags ) == false ) + continue; + $text = $node->textContent; + while( true ) + { + $was = strlen( $text ); + $text = str_replace( "\n" , " " , $text ); + $text = str_replace( "\r" , " " , $text ); + $text = str_replace( " " , " " , $text ); + if ( strlen( $text ) == $was ) + break; + } + $ret[] = $tag . ">" . $text; + } + return $ret; +} + +function extractTagsInnerXmls( array $nodes , array $tags ) +{ + $ret = array(); + foreach( $nodes as $node ) + { + $tag = $node->nodeName; + if ( in_array( $tag , $tags ) == false ) + continue; + $text = $node->ownerDocument->saveXML( $node ); + while( true ) + { + $was = strlen( $text ); + $text = str_replace( "\n" , " " , $text ); + $text = str_replace( "\r" , " " , $text ); + $text = str_replace( " " , " " , $text ); + if ( strlen( $text ) == $was ) + break; + } + $ret[] = $text; + } + return $ret; +} + +function printTagUsageDetail( string $source , string $target , string $tag , OutputIgnoreBuffer $output ) +{ + $source = collectTagDefinitions( $source , $tag ); + $target = collectTagDefinitions( $target , $tag ); + if ( count( $source ) == count($target) ) + return; + $output->addLine(); + $s = null; + $t = null; + while ( count( $source ) > 0 || count( $target ) > 0 ) + { + if ( $s == null ) + $s = array_shift( $source ); + if ( $t == null ) + $t = array_shift( $target ); + if ( $s != null && $t != null ) + { + if ( abs( $s - $t ) < 1 ) + { + $output->add( "\t{$tag}\t{$s}\t{$t}\n" ); + $s = null; + $t = null; + continue; + } + if ( $s < $t ) + { + array_unshift( $target , $t ); + $t = null; + } + else + { + array_unshift( $source , $s ); + $s = null; + } + } + if ( $s != null ) + { + $output->add( "\t{$tag}\t{$s}\t-\n" ); + $s = null; + } + if ( $t != null ) + { + $output->add( "\t{$tag}\t-\t{$t}\n" ); + $t = null; + } + } + $output->addLine(); +} + +function collectTagDefinitions( string $file , string $tag ) +{ + $ret = array(); + $text = XmlUtil::loadFile( $file ); + $list = XmlUtil::listNodeType( $text , XML_ELEMENT_NODE ); + foreach( $list as $node ) + { + if ( $node->nodeName != $tag ) + continue; + $ret[] = $node->getLineNo(); + } + return $ret; +}