Cyrine Ezzahra Cyrine Ezzahra - 3 months ago 20
Python Question

Merge two dataframes with pandas

I have two dataframes :

df_energy.info()



<class 'pandas.core.frame.DataFrame'>
Int64Index: 34673 entries, 1 to 43228
Data columns (total 6 columns):
TIMESTAMP 34673 non-null datetime64[ns]
P_ACT_KW 34673 non-null float64
PERIODE_TARIF 34673 non-null object
P_SOUSCR 34673 non-null float64
SITE 34673 non-null object
TARIF 34673 non-null object
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 1.9+ MB



and df1 :

df1.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38840 entries, 0 to 38839
Data columns (total 7 columns):
TIMESTAMP 38840 non-null datetime64[ns]
ACT_TIME_AERATEUR_1_F1 38696 non-null float64
ACT_TIME_AERATEUR_1_F3 38697 non-null float64
ACT_TIME_AERATEUR_1_F5 38695 non-null float64
ACT_TIME_AERATEUR_1_F6 38695 non-null float64
ACT_TIME_AERATEUR_1_F7 38693 non-null float64
ACT_TIME_AERATEUR_1_F8 38696 non-null float64
dtypes: datetime64[ns](1), float64(6)
memory usage: 2.1 MB



I try to merge these two dataframes based on TIMESTAMP column :

merged_df_energy = pd.merge(df_energy.set_index('TIMESTAMP'),
df1,
right_index=True,
left_index =True)


But I get this error :


TypeError Traceback (most recent call last)
<ipython-input-190-34cd0916eb6a> in <module>()
2 df1,
3 right_index=True,
----> 4 left_index =True)
5 merged_df_energy.info()

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\tools\merge.py


in merge(left, right, how, on, left_on, right_on, left_index,
right_index, sort, suffixes, copy, indicator)
37 right_index=right_index, sort=sort, suffixes=suffixes,
38 copy=copy, indicator=indicator)
---> 39 return op.get_result()
40 if debug:
41 merge.doc = _merge_doc % '\nleft : DataFrame'

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\tools\merge.py


in get_result(self)
215 self.left, self.right)
216
--> 217 join_index, left_indexer, right_indexer = self._get_join_info()
218
219 ldata, rdata = self.left._data, self.right._data

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\tools\merge.py


in _get_join_info(self)
337 if self.left_index and self.right_index:
338 join_index, left_indexer, right_indexer = \
--> 339 left_ax.join(right_ax, how=self.how, return_indexers=True)
340 elif self.right_index and self.how == 'left':
341 join_index, left_indexer, right_indexer = \

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\tseries\index.py


in join(self, other, how, level, return_indexers)
1072 this, other = self._maybe_utc_convert(other)
1073 return Index.join(this, other, how=how, level=level,
-> 1074 return_indexers=return_indexers)
1075
1076 def _maybe_utc_convert(self, other):

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\indexes\base.py


in join(self, other, how, level, return_indexers)
2480 this = self.astype('O')
2481 other = other.astype('O')
-> 2482 return this.join(other, how=how, return_indexers=return_indexers)
2483
2484 _validate_join_method(how)

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\indexes\base.py


in join(self, other, how, level, return_indexers)
2493 else:
2494 return self._join_non_unique(other, how=how,
-> 2495 return_indexers=return_indexers)
2496 elif self.is_monotonic and other.is_monotonic:
2497 try:

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\indexes\base.py


in _join_non_unique(self, other, how, return_indexers)
2571 left_idx, right_idx = _get_join_indexers([self.values],
2572 [other._values], how=how,
-> 2573 sort=True)
2574
2575 left_idx = com._ensure_platform_int(left_idx)

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\tools\merge.py


in _get_join_indexers(left_keys, right_keys, sort, how)
544
545 # get left & right join labels and num. of levels at each location
--> 546 llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))
547
548 # get flat i8 keys from label lists

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\tools\merge.py


in _factorize_keys(lk, rk, sort)
718 if sort:
719 uniques = rizer.uniques.to_array()
--> 720 llab, rlab = _sort_labels(uniques, llab, rlab)
721
722 # NA group

C:\Users\Demonstrator\Anaconda3\lib\site-packages\pandas\tools\merge.py


in _sort_labels(uniques, left, right)
741 uniques = Index(uniques).values
742
--> 743 sorter = uniques.argsort()
744
745 reverse_indexer = np.empty(len(sorter), dtype=np.int64)

pandas\tslib.pyx in pandas.tslib._Timestamp.__richcmp__ (pandas\tslib.c:18619)()

TypeError: Cannot compare type 'Timestamp' with type 'int'



Can you help me please to resolve this problem?

Thank you

Answer

Try this:

import pandas

result = pandas.merge(df_energy, df1, on='TIMESTAMP')

If you want to save it:

result.to_csv(path_or_buf='result.csv', sep=',')

Or check the columns:

result_fields = result.columns.tolist()
print (result_fields)