@@ -22,6 +22,19 @@ class SourceHgWebPlugin extends MantisSourcePlugin {
22
22
const PLUGIN_VERSION = '2.0.1 ' ;
23
23
const FRAMEWORK_VERSION_REQUIRED = '2.0.0 ' ;
24
24
25
+ /**
26
+ * Various PCRE patterns used to parse HgWeb output when retrieving
27
+ * changeset info
28
+ * @see commit_changeset()
29
+ */
30
+ const PATTERN_USER = '(?<id>User) (?<user>[^<>]*)(?(?=(?=<))<(?<email>[^<>]*)>|.*) ' ;
31
+ const PATTERN_DATE = '(?<id>Date) (?<date>\d+) (?<tz>-?\d+) ' ;
32
+ const PATTERN_REVISION = '(?<id>Node ID|Parent) +(?<rev>[0-9a-f]+) ' ;
33
+ const PATTERN_DIFF = 'diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*) ' ;
34
+ const PATTERN_BINARY_FILE = 'Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed ' ;
35
+ # Don't use '/' as pattern delimiter with this one
36
+ const PATTERN_PLUS_MINUS = '\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]* ' ;
37
+
25
38
function register () {
26
39
$ this ->name = plugin_lang_get ( 'title ' );
27
40
$ this ->description = plugin_lang_get ( 'description ' );
@@ -207,7 +220,7 @@ private function import_commits( $p_repo, $p_uri_base, $p_commit_ids, $p_branch=
207
220
$ t_commit_url = $ this ->uri_base ( $ p_repo ) . 'raw-rev/ ' . $ t_commit_id ;
208
221
$ t_input = url_get ( $ t_commit_url );
209
222
210
- if ( false === $ t_input ) {
223
+ if ( ! $ t_input ) {
211
224
echo "failed. \n" ;
212
225
continue ;
213
226
}
@@ -224,41 +237,69 @@ private function import_commits( $p_repo, $p_uri_base, $p_commit_ids, $p_branch=
224
237
return $ t_changesets ;
225
238
}
226
239
240
+ /**
241
+ * Parse changeset data and store it if it does not exist already.
242
+ * This assumes a standard Mercurial template for raw changesets. Using a
243
+ * customized one may break the parsing logic.
244
+ * @param SourceRepo $p_repo Repository
245
+ * @param string $p_input Raw changeset data
246
+ * @param string $p_branch
247
+ * @return array SourceChangeset object, list of parent revisions
248
+ */
227
249
private function commit_changeset ( $ p_repo , $ p_input , $ p_branch ='' ) {
228
- $ t_parents = array ();
229
- $ t_message = array ();
230
-
231
250
$ t_input = explode ( "\n" , $ p_input );
251
+ $ i = 0 ;
232
252
233
- foreach ( $ t_input as $ t_line ) {
234
- if ( strpos ( $ t_line , '# ' ) === 0 ) {
235
- if ( !isset ( $ t_commit ['revision ' ] ) && preg_match ( '@^# Node ID +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
236
- $ t_commit ['revision ' ] = $ t_matches [1 ];
237
- echo 'Processing ' . string_display_line ( $ t_commit [revision] ) . '... ' ;
238
- if ( SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
239
- echo "already exists. \n" ;
240
- return array ( null , array () );
241
- }
242
- } else if ( !isset ( $ t_commit ['author ' ] ) && preg_match ( '@^# User ([^<>]*)(?(?=(?=<))<([^<>]*)>|.*)@ ' , $ t_line , $ t_matches ) ) {
243
- $ t_commit ['author ' ] = trim ($ t_matches [1 ]);
244
- $ t_commit ['author_email ' ] = $ t_matches [2 ];
245
- } else if ( !isset ( $ t_commit ['date ' ] ) && preg_match ( '@^# Date +(\d+) (-?\d+)@ ' , $ t_line , $ t_matches ) ) {
246
- $ t_timestamp_gmt = $ t_matches [1 ] - (int )$ t_matches [2 ];
253
+ # Skip changeset header
254
+ while ( strpos ( $ t_input [$ i ++], '# HG changeset patch ' ) === false );
255
+
256
+ # Process changeset metadata
257
+ $ t_commit = array ();
258
+ $ t_parents = array ();
259
+ static $ s_pattern_metadata = '/^# (?: '
260
+ . self ::PATTERN_USER . '| '
261
+ . self ::PATTERN_DATE . '| '
262
+ . self ::PATTERN_REVISION
263
+ . ')/J ' ;
264
+ while ( true ) {
265
+ $ t_match = preg_match ( $ s_pattern_metadata , $ t_input [$ i ], $ t_metadata );
266
+ if ( $ t_match == false ) {
267
+ # We reached the end of metadata, next line is the commit message
268
+ break ;
269
+ }
270
+ switch ( $ t_metadata ['id ' ] ) {
271
+ case 'User ' :
272
+ $ t_commit ['author ' ] = isset ( $ t_metadata ['user ' ] ) ? trim ( $ t_metadata ['user ' ] ) : '' ;
273
+ $ t_commit ['author_email ' ] = isset ( $ t_metadata ['email ' ] ) ? $ t_metadata ['email ' ] : '' ;
274
+ break ;
275
+ case 'Date ' :
276
+ $ t_timestamp_gmt = $ t_metadata ['date ' ] - (int )$ t_metadata ['tz ' ];
247
277
$ t_commit ['date ' ] = gmdate ( 'Y-m-d H:i:s ' , $ t_timestamp_gmt );
248
- } else if ( !isset ( $ t_commit ['parent ' ] ) && preg_match ( '@^# Parent +([a-f0-9]+)@ ' , $ t_line , $ t_matches ) ) {
249
- $ t_parents [] = $ t_matches [1 ];
250
- $ t_commit ['parent ' ] = $ t_matches [1 ];
251
- }
252
- } else if ( isset ( $ t_commit ['revision ' ] ) ) {
253
- if ( preg_match ( '@^diff @ ' , $ t_line , $ t_matches ) ) {
254
278
break ;
255
- }
256
- $ t_message [] = $ t_line ;
279
+ case 'Node ID ' :
280
+ $ t_commit ['revision ' ] = $ t_metadata ['rev ' ];
281
+ break ;
282
+ case 'Parent ' :
283
+ $ t_parents [] = $ t_commit ['parent ' ] = $ t_metadata ['rev ' ];
284
+ break ;
257
285
}
286
+ $ i ++;
258
287
}
259
288
260
- if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
261
- $ t_commit ['message ' ] = implode ( "\n" , $ t_message );
289
+ if ( !SourceChangeset::exists ( $ p_repo ->id , $ t_commit ['revision ' ] ) ) {
290
+ # Read commit message
291
+ $ t_message = '' ;
292
+ while ( $ i < count ( $ t_input ) ) {
293
+ $ t_match = preg_match (
294
+ '/^ ' . self ::PATTERN_DIFF . '/ ' ,
295
+ $ t_input [$ i ]
296
+ );
297
+ if ( $ t_match ) {
298
+ break ;
299
+ }
300
+ $ t_message .= $ t_input [$ i ++] . "\n" ;
301
+ }
302
+ $ t_commit ['message ' ] = trim ( $ t_message );
262
303
263
304
$ t_changeset = new SourceChangeset ( $ p_repo ->id , $ t_commit ['revision ' ],
264
305
$ p_branch , $ t_commit ['date ' ], $ t_commit ['author ' ],
@@ -268,7 +309,12 @@ private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
268
309
269
310
$ t_changeset ->author_email = empty ($ t_commit ['author_email ' ])? '' : $ t_commit ['author_email ' ];
270
311
271
- preg_match_all ('#diff[\s]*-r[\s]([^\s]*)[\s]*-r[\s]([^\s]*)[\s]([^\n]*)\n(Binary file[\s]([^\r\n\t\f\v]*)[\s]has changed|\-{3}[\s](/dev/null)?[^\t]*[^\n]*\n\+{3}[\s](/dev/null)?[^\t]*\t[^\n]*)#u ' , $ p_input , $ t_matches , PREG_SET_ORDER );
312
+ static $ s_pattern_diff = '# '
313
+ . self ::PATTERN_DIFF . '\n( '
314
+ . self ::PATTERN_BINARY_FILE . '| '
315
+ . self ::PATTERN_PLUS_MINUS
316
+ . ')#u ' ;
317
+ preg_match_all ( $ s_pattern_diff , $ p_input , $ t_matches , PREG_SET_ORDER );
272
318
273
319
$ t_commit ['files ' ] = array ();
274
320
@@ -278,21 +324,21 @@ private function commit_changeset( $p_repo, $p_input, $p_branch='' ) {
278
324
$ t_file ['revision ' ] = $ t_commit ['revision ' ];
279
325
280
326
if (!empty ($ t_file_matches [3 ])) {
281
- if (! empty ($ t_file_matches [5 ])) {
282
- $ t_file ['action ' ] = 'bin ' ;
327
+ if ( empty ($ t_file_matches [5 ]) && empty ( $ t_file_matches [ 6 ]) && empty ( $ t_file_matches [ 7 ])) {
328
+ $ t_file ['action ' ] = 'mod ' ;
283
329
}
284
- else if ( " /dev/null " == $ t_file_matches [7 ] ) {
285
- $ t_file ['action ' ] = 'rm ' ;
330
+ else if (! empty ( $ t_file_matches [5 ]) ) {
331
+ $ t_file ['action ' ] = 'bin ' ;
286
332
}
287
333
else if ("/dev/null " == $ t_file_matches [6 ]) {
288
334
$ t_file ['action ' ] = 'add ' ;
289
335
}
336
+ else if ("/dev/null " == $ t_file_matches [7 ]) {
337
+ $ t_file ['action ' ] = 'rm ' ;
338
+ }
290
339
else if ("/dev/null " == $ t_file_matches [7 ] && "/dev/null " == $ t_file_matches [6 ]) {
291
340
$ t_file ['action ' ] = 'n/a ' ;
292
341
}
293
- else if (empty ($ t_file_matches [5 ]) && empty ($ t_file_matches [6 ]) && empty ($ t_file_matches [7 ])) {
294
- $ t_file ['action ' ] = 'mod ' ;
295
- }
296
342
}
297
343
$ t_commit ['files ' ][] = $ t_file ;
298
344
}
0 commit comments