@@ -896,11 +896,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
896
896
"""
897
897
# Make a copy of the input columns so we can modify it
898
898
if columns is not None :
899
- columns = list (columns )
900
-
901
- if len (algos .unique (columns )) < len (columns ):
902
- raise ValueError ('Non-unique columns not yet supported in '
903
- 'from_records' )
899
+ columns = _ensure_index (columns )
904
900
905
901
if com .is_iterator (data ):
906
902
if nrows == 0 :
@@ -932,48 +928,66 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
932
928
else :
933
929
data = values
934
930
935
- if isinstance (data , (np .ndarray , DataFrame , dict )):
936
- keys , sdict = _rec_to_dict (data )
931
+ if isinstance (data , dict ):
937
932
if columns is None :
938
- columns = keys
933
+ columns = arr_columns = _ensure_index (sorted (data ))
934
+ arrays = [data [k ] for k in columns ]
939
935
else :
940
- sdict = dict ((k , v ) for k , v in sdict .iteritems ()
941
- if k in columns )
936
+ arrays = []
937
+ arr_columns = []
938
+ for k , v in data .iteritems ():
939
+ if k in columns :
940
+ arr_columns .append (k )
941
+ arrays .append (v )
942
+
943
+ elif isinstance (data , (np .ndarray , DataFrame )):
944
+ arrays , columns = _to_arrays (data , columns )
945
+ if columns is not None :
946
+ columns = _ensure_index (columns )
947
+ arr_columns = columns
942
948
else :
943
- arrays , columns = _to_arrays (data , columns ,
944
- coerce_float = coerce_float )
945
- columns = list (columns ) # _to_arrays returns index, but we might mutate
946
- sdict = dict (zip (columns , arrays ))
949
+ arrays , arr_columns = _to_arrays (data , columns ,
950
+ coerce_float = coerce_float )
951
+
952
+ arr_columns = _ensure_index (arr_columns )
953
+ if columns is not None :
954
+ columns = _ensure_index (columns )
955
+ else :
956
+ columns = arr_columns
947
957
948
958
if exclude is None :
949
959
exclude = set ()
950
960
else :
951
961
exclude = set (exclude )
952
962
953
- for col in exclude :
954
- del sdict [col ]
955
- columns .remove (col )
956
-
957
963
result_index = None
958
964
if index is not None :
959
965
if (isinstance (index , basestring ) or
960
966
not hasattr (index , "__iter__" )):
961
- result_index = sdict . pop (index )
962
- result_index = Index ( result_index , name = index )
963
- columns . remove ( index )
967
+ i = columns . get_loc (index )
968
+ exclude . add ( index )
969
+ result_index = Index ( arrays [ i ], name = index )
964
970
else :
965
971
try :
966
- arrays = []
967
- for field in index :
968
- arrays .append (sdict [field ])
969
- for field in index :
970
- del sdict [field ]
971
- columns .remove (field )
972
- result_index = MultiIndex .from_arrays (arrays , names = index )
972
+ to_remove = [arr_columns .get_loc (field ) for field in index ]
973
+
974
+ result_index = MultiIndex .from_arrays (
975
+ [arrays [i ] for i in to_remove ], names = index )
976
+
977
+ exclude .update (index )
973
978
except Exception :
974
979
result_index = index
975
980
976
- return cls (sdict , index = result_index , columns = columns )
981
+ if any (exclude ):
982
+ to_remove = [arr_columns .get_loc (col ) for col in exclude ]
983
+ arrays = [v for i , v in enumerate (arrays ) if i not in to_remove ]
984
+ arr_columns = arr_columns .drop (exclude )
985
+ columns = columns .drop (exclude )
986
+
987
+ mgr = _arrays_to_mgr (arrays , arr_columns , result_index ,
988
+ columns )
989
+
990
+ return DataFrame (mgr )
977
991
978
992
def to_records (self , index = True ):
979
993
"""
@@ -5217,9 +5231,18 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None):
5217
5231
"""
5218
5232
Return list of arrays, columns
5219
5233
"""
5234
+ if isinstance (data , DataFrame ):
5235
+ if columns is not None :
5236
+ arrays = [data .icol (i ).values for i , col in enumerate (data .columns )
5237
+ if col in columns ]
5238
+ else :
5239
+ columns = data .columns
5240
+ arrays = [data .icol (i ).values for i in range (len (columns ))]
5241
+
5242
+ return arrays , columns
5220
5243
5221
5244
if len (data ) == 0 :
5222
- return [], columns if columns is not None else []
5245
+ return [], [] # columns if columns is not None else []
5223
5246
if isinstance (data [0 ], (list , tuple )):
5224
5247
return _list_to_arrays (data , columns , coerce_float = coerce_float ,
5225
5248
dtype = dtype )
@@ -5231,6 +5254,10 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None):
5231
5254
return _list_of_series_to_arrays (data , columns ,
5232
5255
coerce_float = coerce_float ,
5233
5256
dtype = dtype )
5257
+ elif isinstance (data , np .ndarray ):
5258
+ columns = list (data .dtype .names )
5259
+ arrays = [data [k ] for k in columns ]
5260
+ return arrays , columns
5234
5261
else :
5235
5262
# last ditch effort
5236
5263
data = map (tuple , data )
0 commit comments