@@ -8,7 +8,11 @@ import (
8
8
"io/ioutil"
9
9
"os"
10
10
"path/filepath"
11
+ "runtime"
12
+ "strconv"
11
13
"strings"
14
+ "sync"
15
+ "sync/atomic"
12
16
"time"
13
17
14
18
opentracing "github.com/opentracing/opentracing-go"
49
53
errInvalidIndexType = errors .NewKind ("expecting a pilosa index, instead got %T" )
50
54
)
51
55
56
+ const (
57
+ pilosaIndexThreadsKey = "PILOSA_INDEX_THREADS"
58
+ pilosaIndexThreadsVar = "pilosa_index_threads"
59
+ )
60
+
52
61
type (
53
62
bitBatch struct {
54
63
size uint64
@@ -217,13 +226,13 @@ func (d *Driver) savePartition(
217
226
kviter sql.IndexKeyValueIter ,
218
227
idx * pilosaIndex ,
219
228
pilosaIndex * concurrentPilosaIndex ,
220
- offset uint64 ,
221
229
b * batch ,
222
230
) (uint64 , error ) {
223
231
var (
224
232
colID uint64
225
233
err error
226
234
)
235
+
227
236
for i , e := range idx .Expressions () {
228
237
name := fieldName (idx .ID (), e , p )
229
238
pilosaIndex .DeleteField (name )
@@ -254,7 +263,7 @@ func (d *Driver) savePartition(
254
263
kviter .Close ()
255
264
}()
256
265
257
- for colID = offset ; err == nil ; colID ++ {
266
+ for colID = 0 ; err == nil ; colID ++ {
258
267
// commit each batch of objects (pilosa and boltdb)
259
268
if colID % sql .IndexBatchSize == 0 && colID != 0 {
260
269
if err = d .saveBatch (ctx , idx .mapping , colID , b ); err != nil {
@@ -265,28 +274,30 @@ func (d *Driver) savePartition(
265
274
select {
266
275
case <- ctx .Context .Done ():
267
276
return 0 , ctx .Context .Err ()
268
-
269
277
default :
270
- var (
271
- values []interface {}
272
- location []byte
273
- )
274
- if values , location , err = kviter .Next (); err != nil {
275
- break
276
- }
278
+ }
277
279
278
- for i , field := range b . fields {
279
- if values [ i ] = = nil {
280
- continue
281
- }
280
+ values , location , err := kviter . Next ()
281
+ if err ! = nil {
282
+ break
283
+ }
282
284
283
- rowID , err := idx .mapping .getRowID (field .Name (), values [i ])
284
- if err != nil {
285
- return 0 , err
286
- }
287
- b .bitBatches [i ].Add (rowID , colID )
285
+ for i , field := range b .fields {
286
+ if values [i ] == nil {
287
+ continue
288
+ }
289
+
290
+ rowID , err := idx .mapping .getRowID (field .Name (), values [i ])
291
+ if err != nil {
292
+ return 0 , err
288
293
}
289
- err = idx .mapping .putLocation (pilosaIndex .Name (), colID , location )
294
+
295
+ b .bitBatches [i ].Add (rowID , colID )
296
+ }
297
+
298
+ err = idx .mapping .putLocation (pilosaIndex .Name (), p , colID , location )
299
+ if err != nil {
300
+ return 0 , err
290
301
}
291
302
}
292
303
@@ -307,7 +318,7 @@ func (d *Driver) savePartition(
307
318
}
308
319
}
309
320
310
- return colID - offset , err
321
+ return colID , err
311
322
}
312
323
313
324
// Save the given index (mapping and bitmap)
@@ -331,44 +342,86 @@ func (d *Driver) Save(
331
342
idx .wg .Add (1 )
332
343
defer idx .wg .Done ()
333
344
334
- var b = batch {
335
- fields : make ([]* pilosa.Field , len (idx .Expressions ())),
336
- bitBatches : make ([]* bitBatch , len (idx .Expressions ())),
337
- }
338
-
339
345
ctx .Context , idx .cancel = context .WithCancel (ctx .Context )
340
346
processingFile := d .processingFilePath (i .Database (), i .Table (), i .ID ())
341
- if err : = index .WriteProcessingFile (
347
+ err = index .WriteProcessingFile (
342
348
processingFile ,
343
349
[]byte {processingFileOnSave },
344
- ); err != nil {
350
+ )
351
+ if err != nil {
345
352
return err
346
353
}
347
354
348
355
defer iter .Close ()
349
356
pilosaIndex := idx .index
350
- var rows uint64
357
+
358
+ var (
359
+ rows , timePilosa , timeMapping uint64
360
+
361
+ wg sync.WaitGroup
362
+ tokens = make (chan struct {}, indexThreads (ctx ))
363
+
364
+ errors []error
365
+ errmut sync.Mutex
366
+ )
367
+
351
368
for {
369
+ select {
370
+ case <- ctx .Done ():
371
+ return
372
+ default :
373
+ }
374
+
352
375
p , kviter , err := iter .Next ()
353
376
if err != nil {
354
377
if err == io .EOF {
355
378
break
356
379
}
357
- return err
358
- }
359
380
360
- numRows , err := d . savePartition ( ctx , p , kviter , idx , pilosaIndex , rows , & b )
361
- if err != nil {
381
+ idx . cancel ( )
382
+ wg . Wait ()
362
383
return err
363
384
}
364
385
365
- rows += numRows
386
+ wg .Add (1 )
387
+
388
+ go func () {
389
+ defer func () {
390
+ wg .Done ()
391
+ <- tokens
392
+ }()
393
+
394
+ tokens <- struct {}{}
395
+
396
+ var b = & batch {
397
+ fields : make ([]* pilosa.Field , len (idx .Expressions ())),
398
+ bitBatches : make ([]* bitBatch , len (idx .Expressions ())),
399
+ }
400
+
401
+ numRows , err := d .savePartition (ctx , p , kviter , idx , pilosaIndex , b )
402
+ if err != nil {
403
+ errmut .Lock ()
404
+ errors = append (errors , err )
405
+ idx .cancel ()
406
+ errmut .Unlock ()
407
+ return
408
+ }
409
+
410
+ atomic .AddUint64 (& timeMapping , uint64 (b .timeMapping ))
411
+ atomic .AddUint64 (& timePilosa , uint64 (b .timePilosa ))
412
+ atomic .AddUint64 (& rows , numRows )
413
+ }()
414
+ }
415
+
416
+ wg .Wait ()
417
+ if len (errors ) > 0 {
418
+ return errors [0 ]
366
419
}
367
420
368
421
logrus .WithFields (logrus.Fields {
369
422
"duration" : time .Since (start ),
370
- "pilosa" : b . timePilosa ,
371
- "mapping" : b . timeMapping ,
423
+ "pilosa" : timePilosa ,
424
+ "mapping" : timeMapping ,
372
425
"rows" : rows ,
373
426
"id" : i .ID (),
374
427
}).Debugf ("finished pilosa indexing" )
@@ -421,18 +474,18 @@ func (d *Driver) Delete(i sql.Index, partitions sql.PartitionIter) error {
421
474
return partitions .Close ()
422
475
}
423
476
424
- func (d * Driver ) saveBatch (ctx * sql.Context , m * mapping , colID uint64 , b * batch ) error {
425
- err := d .savePilosa (ctx , colID , b )
477
+ func (d * Driver ) saveBatch (ctx * sql.Context , m * mapping , cols uint64 , b * batch ) error {
478
+ err := d .savePilosa (ctx , cols , b )
426
479
if err != nil {
427
480
return err
428
481
}
429
482
430
- return d .saveMapping (ctx , m , colID , true , b )
483
+ return d .saveMapping (ctx , m , cols , true , b )
431
484
}
432
485
433
- func (d * Driver ) savePilosa (ctx * sql.Context , colID uint64 , b * batch ) error {
486
+ func (d * Driver ) savePilosa (ctx * sql.Context , cols uint64 , b * batch ) error {
434
487
span , _ := ctx .Span ("pilosa.Save.bitBatch" ,
435
- opentracing.Tag {Key : "cols" , Value : colID },
488
+ opentracing.Tag {Key : "cols" , Value : cols },
436
489
opentracing.Tag {Key : "fields" , Value : len (b .fields )},
437
490
)
438
491
defer span .Finish ()
@@ -457,12 +510,12 @@ func (d *Driver) savePilosa(ctx *sql.Context, colID uint64, b *batch) error {
457
510
func (d * Driver ) saveMapping (
458
511
ctx * sql.Context ,
459
512
m * mapping ,
460
- colID uint64 ,
513
+ cols uint64 ,
461
514
cont bool ,
462
515
b * batch ,
463
516
) error {
464
517
span , _ := ctx .Span ("pilosa.Save.mapping" ,
465
- opentracing.Tag {Key : "cols" , Value : colID },
518
+ opentracing.Tag {Key : "cols" , Value : cols },
466
519
opentracing.Tag {Key : "continues" , Value : cont },
467
520
)
468
521
defer span .Finish ()
@@ -541,3 +594,21 @@ func (d *Driver) newPilosaIndex(db, table string) (*pilosa.Index, error) {
541
594
}
542
595
return idx , nil
543
596
}
597
+
598
+ func indexThreads (ctx * sql.Context ) int {
599
+ typ , val := ctx .Session .Get (pilosaIndexThreadsVar )
600
+ if val != nil && typ == sql .Int64 {
601
+ return int (val .(int64 ))
602
+ }
603
+
604
+ var value int
605
+ if v , ok := os .LookupEnv (pilosaIndexThreadsKey ); ok {
606
+ value , _ = strconv .Atoi (v )
607
+ }
608
+
609
+ if value <= 0 {
610
+ value = runtime .NumCPU ()
611
+ }
612
+
613
+ return value
614
+ }
0 commit comments