@@ -305,11 +305,18 @@ def encode(self, variable: Variable, name: T_Name = None):
305
305
dims , data , attrs , encoding = unpack_for_encoding (variable )
306
306
307
307
dtype = np .dtype (encoding .get ("dtype" , data .dtype ))
308
+ # from netCDF best practices
309
+ # https://docs.unidata.ucar.edu/nug/current/best_practices.html#bp_Unsigned-Data
310
+ # "_Unsigned = "true" to indicate that
311
+ # integer data should be treated as unsigned"
312
+ is_unsigned = encoding .get ("_Unsigned" , "false" ) == "true"
313
+ # only used for _Unsigned cases
314
+ signed_dtype = np .dtype (
315
+ encoding .get ("dtype" , f"i{ dtype .itemsize } " if is_unsigned else dtype )
316
+ )
308
317
fv = encoding .get ("_FillValue" )
309
318
mv = encoding .get ("missing_value" )
310
- # to properly handle _FillValue/missing_value below [a], [b]
311
- # we need to check if unsigned data is written as signed data
312
- unsigned = encoding .get ("_Unsigned" ) is not None
319
+ fill_value = None
313
320
314
321
fv_exists = fv is not None
315
322
mv_exists = mv is not None
@@ -324,23 +331,28 @@ def encode(self, variable: Variable, name: T_Name = None):
324
331
325
332
if fv_exists :
326
333
# Ensure _FillValue is cast to same dtype as data's
327
- # [a] need to skip this if _Unsigned is available
328
- if not unsigned :
329
- encoding ["_FillValue" ] = dtype .type (fv )
334
+ encoding ["_FillValue" ] = (
335
+ self ._encode_unsigned_fill_value (name , fv , signed_dtype )
336
+ if is_unsigned
337
+ else dtype .type (fv )
338
+ )
330
339
fill_value = pop_to (encoding , attrs , "_FillValue" , name = name )
331
340
332
341
if mv_exists :
333
342
# try to use _FillValue, if it exists to align both values
334
343
# or use missing_value and ensure it's cast to same dtype as data's
335
- # [b] need to provide mv verbatim if _Unsigned is available
336
344
encoding ["missing_value" ] = attrs .get (
337
345
"_FillValue" ,
338
- (dtype .type (mv ) if not unsigned else mv ),
346
+ (
347
+ self ._encode_unsigned_fill_value (name , fv , signed_dtype )
348
+ if is_unsigned
349
+ else dtype .type (mv )
350
+ ),
339
351
)
340
352
fill_value = pop_to (encoding , attrs , "missing_value" , name = name )
341
353
342
354
# apply fillna
343
- if not pd .isnull (fill_value ):
355
+ if fill_value is not None and not pd .isnull (fill_value ):
344
356
# special case DateTime to properly handle NaT
345
357
if _is_time_like (attrs .get ("units" )) and data .dtype .kind in "iu" :
346
358
data = duck_array_ops .where (
@@ -349,46 +361,112 @@ def encode(self, variable: Variable, name: T_Name = None):
349
361
else :
350
362
data = duck_array_ops .fillna (data , fill_value )
351
363
364
+ if fill_value is not None and is_unsigned :
365
+ pop_to (encoding , attrs , "_Unsigned" )
366
+ # XXX: Is this actually needed? Doesn't the backend handle this?
367
+ data = duck_array_ops .astype (duck_array_ops .around (data ), signed_dtype )
368
+ attrs ["_FillValue" ] = fill_value
369
+
352
370
return Variable (dims , data , attrs , encoding , fastpath = True )
353
371
372
+ def _encode_unsigned_fill_value (
373
+ self , name : T_Name , fill_value : Any , signed_dtype : np .typing .DTypeLike
374
+ ) -> Any :
375
+ try :
376
+ # user provided the on-disk signed fill
377
+ if hasattr (fill_value , "item" ):
378
+ # if numpy type, convert to python native integer to determine overflow
379
+ # otherwise numpy unsigned ints will silently cast to the signed counterpart
380
+ fill_value = fill_value .item ()
381
+ new_fill = signed_dtype .type (fill_value )
382
+ except OverflowError :
383
+ warnings .warn (
384
+ f"variable { name !r} will be stored as signed integers "
385
+ f"but _FillValue attribute can't be represented as a "
386
+ f"signed integer." ,
387
+ SerializationWarning ,
388
+ stacklevel = 3 ,
389
+ )
390
+ # user provided the in-memory unsigned fill, convert to signed type
391
+ unsigned_dtype = np .dtype (f"u{ signed_dtype .itemsize } " )
392
+ # use view here to prevent OverflowError
393
+ new_fill = (
394
+ np .array (fill_value , dtype = unsigned_dtype ).view (signed_dtype ).item ()
395
+ )
396
+ return new_fill
397
+
354
398
def decode (self , variable : Variable , name : T_Name = None ):
355
399
raw_fill_dict , encoded_fill_values = _check_fill_values (
356
400
variable .attrs , name , variable .dtype
357
401
)
402
+ if "_Unsigned" not in variable .attrs and not raw_fill_dict :
403
+ return variable
358
404
359
- if raw_fill_dict :
360
- dims , data , attrs , encoding = unpack_for_decoding (variable )
361
- [
362
- safe_setitem (encoding , attr , value , name = name )
363
- for attr , value in raw_fill_dict .items ()
364
- ]
365
-
366
- if encoded_fill_values :
367
- # special case DateTime to properly handle NaT
368
- dtype : np .typing .DTypeLike
369
- decoded_fill_value : Any
370
- if _is_time_like (attrs .get ("units" )) and data .dtype .kind in "iu" :
371
- dtype , decoded_fill_value = np .int64 , np .iinfo (np .int64 ).min
372
- else :
373
- if "scale_factor" not in attrs and "add_offset" not in attrs :
374
- dtype , decoded_fill_value = dtypes .maybe_promote (data .dtype )
375
- else :
376
- dtype , decoded_fill_value = (
377
- _choose_float_dtype (data .dtype , attrs ),
378
- np .nan ,
379
- )
405
+ dims , data , attrs , encoding = unpack_for_decoding (variable )
406
+
407
+ # dims, data, attrs, encoding = unpack_for_decoding(variable)
408
+ # Even if _Unsigned is use, retain on-disk _FillValue
409
+ [
410
+ safe_setitem (encoding , attr , value , name = name )
411
+ for attr , value in raw_fill_dict .items ()
412
+ ]
380
413
381
- transform = partial (
382
- _apply_mask ,
383
- encoded_fill_values = encoded_fill_values ,
384
- decoded_fill_value = decoded_fill_value ,
385
- dtype = dtype ,
414
+ if "_Unsigned" in attrs :
415
+ unsigned = pop_to (attrs , encoding , "_Unsigned" )
416
+
417
+ if data .dtype .kind == "i" :
418
+ if unsigned == "true" :
419
+ unsigned_dtype = np .dtype (f"u{ data .dtype .itemsize } " )
420
+ transform = partial (np .asarray , dtype = unsigned_dtype )
421
+ if "_FillValue" in raw_fill_dict :
422
+ new_fill = np .array (
423
+ raw_fill_dict ["_FillValue" ], dtype = data .dtype
424
+ )
425
+ encoded_fill_values .remove (raw_fill_dict ["_FillValue" ])
426
+ # use view here to prevent OverflowError
427
+ encoded_fill_values .add (new_fill .view (unsigned_dtype ).item ())
428
+ data = lazy_elemwise_func (data , transform , unsigned_dtype )
429
+ elif data .dtype .kind == "u" :
430
+ if unsigned == "false" :
431
+ signed_dtype = np .dtype (f"i{ data .dtype .itemsize } " )
432
+ transform = partial (np .asarray , dtype = signed_dtype )
433
+ data = lazy_elemwise_func (data , transform , signed_dtype )
434
+ if "_FillValue" in raw_fill_dict :
435
+ new_fill = signed_dtype .type (raw_fill_dict ["_FillValue" ])
436
+ encoded_fill_values .remove (raw_fill_dict ["_FillValue" ])
437
+ encoded_fill_values .add (new_fill )
438
+ else :
439
+ warnings .warn (
440
+ f"variable { name !r} has _Unsigned attribute but is not "
441
+ "of integer type. Ignoring attribute." ,
442
+ SerializationWarning ,
443
+ stacklevel = 3 ,
386
444
)
387
- data = lazy_elemwise_func (data , transform , dtype )
388
445
389
- return Variable (dims , data , attrs , encoding , fastpath = True )
390
- else :
391
- return variable
446
+ if encoded_fill_values :
447
+ # special case DateTime to properly handle NaT
448
+ dtype : np .typing .DTypeLike
449
+ decoded_fill_value : Any
450
+ if _is_time_like (attrs .get ("units" )) and data .dtype .kind in "iu" :
451
+ dtype , decoded_fill_value = np .int64 , np .iinfo (np .int64 ).min
452
+ else :
453
+ if "scale_factor" not in attrs and "add_offset" not in attrs :
454
+ dtype , decoded_fill_value = dtypes .maybe_promote (data .dtype )
455
+ else :
456
+ dtype , decoded_fill_value = (
457
+ _choose_float_dtype (data .dtype , attrs ),
458
+ np .nan ,
459
+ )
460
+
461
+ transform = partial (
462
+ _apply_mask ,
463
+ encoded_fill_values = encoded_fill_values ,
464
+ decoded_fill_value = decoded_fill_value ,
465
+ dtype = dtype ,
466
+ )
467
+ data = lazy_elemwise_func (data , transform , dtype )
468
+
469
+ return Variable (dims , data , attrs , encoding , fastpath = True )
392
470
393
471
394
472
def _scale_offset_decoding (data , scale_factor , add_offset , dtype : np .typing .DTypeLike ):
@@ -506,74 +584,6 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
506
584
return variable
507
585
508
586
509
- class UnsignedIntegerCoder (VariableCoder ):
510
- def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
511
- # from netCDF best practices
512
- # https://docs.unidata.ucar.edu/nug/current/best_practices.html#bp_Unsigned-Data
513
- # "_Unsigned = "true" to indicate that
514
- # integer data should be treated as unsigned"
515
- if variable .encoding .get ("_Unsigned" , "false" ) == "true" :
516
- dims , data , attrs , encoding = unpack_for_encoding (variable )
517
-
518
- pop_to (encoding , attrs , "_Unsigned" )
519
- # we need the on-disk type here
520
- # trying to get it from encoding, resort to an int with the same precision as data.dtype if not available
521
- signed_dtype = np .dtype (encoding .get ("dtype" , f"i{ data .dtype .itemsize } " ))
522
- if "_FillValue" in attrs :
523
- try :
524
- # user provided the on-disk signed fill
525
- new_fill = signed_dtype .type (attrs ["_FillValue" ])
526
- except OverflowError :
527
- # user provided the in-memory unsigned fill, convert to signed type
528
- unsigned_dtype = np .dtype (f"u{ signed_dtype .itemsize } " )
529
- # use view here to prevent OverflowError
530
- new_fill = (
531
- np .array (attrs ["_FillValue" ], dtype = unsigned_dtype )
532
- .view (signed_dtype )
533
- .item ()
534
- )
535
- attrs ["_FillValue" ] = new_fill
536
- data = duck_array_ops .astype (duck_array_ops .around (data ), signed_dtype )
537
-
538
- return Variable (dims , data , attrs , encoding , fastpath = True )
539
- else :
540
- return variable
541
-
542
- def decode (self , variable : Variable , name : T_Name = None ) -> Variable :
543
- if "_Unsigned" in variable .attrs :
544
- dims , data , attrs , encoding = unpack_for_decoding (variable )
545
- unsigned = pop_to (attrs , encoding , "_Unsigned" )
546
-
547
- if data .dtype .kind == "i" :
548
- if unsigned == "true" :
549
- unsigned_dtype = np .dtype (f"u{ data .dtype .itemsize } " )
550
- transform = partial (np .asarray , dtype = unsigned_dtype )
551
- if "_FillValue" in attrs :
552
- new_fill = np .array (attrs ["_FillValue" ], dtype = data .dtype )
553
- # use view here to prevent OverflowError
554
- attrs ["_FillValue" ] = new_fill .view (unsigned_dtype ).item ()
555
- data = lazy_elemwise_func (data , transform , unsigned_dtype )
556
- elif data .dtype .kind == "u" :
557
- if unsigned == "false" :
558
- signed_dtype = np .dtype (f"i{ data .dtype .itemsize } " )
559
- transform = partial (np .asarray , dtype = signed_dtype )
560
- data = lazy_elemwise_func (data , transform , signed_dtype )
561
- if "_FillValue" in attrs :
562
- new_fill = signed_dtype .type (attrs ["_FillValue" ])
563
- attrs ["_FillValue" ] = new_fill
564
- else :
565
- warnings .warn (
566
- f"variable { name !r} has _Unsigned attribute but is not "
567
- "of integer type. Ignoring attribute." ,
568
- SerializationWarning ,
569
- stacklevel = 3 ,
570
- )
571
-
572
- return Variable (dims , data , attrs , encoding , fastpath = True )
573
- else :
574
- return variable
575
-
576
-
577
587
class DefaultFillvalueCoder (VariableCoder ):
578
588
"""Encode default _FillValue if needed."""
579
589
0 commit comments