更新时间:2022-12-09 18:38:50
使用:
#convert column to int
df['person_count'] = df['person_count'].astype(int)
#reshape for one column from city and person_symbol columns
df1=df.set_index(['sport_name','person_name','person_count']).stack().reset_index(name='val')
print (df1)
sport_name person_name person_count level_3 val
0 football ramesh 10 city mumbai
1 football ramesh 10 person_symbol ram
2 football ramesh 14 city mumbai
3 football ramesh 14 person_symbol mum
4 football ramesh 25 city delhi
5 football ramesh 25 person_symbol mum
6 football ramesh 20 city delhi
7 football ramesh 20 person_symbol ram
8 football mohit 11 city pune
9 football mohit 11 person_symbol moh
10 cricket mahesh 34 city surat
11 cricket mahesh 34 person_symbol mah
12 cricket mahesh 23 city surat
13 cricket mahesh 23 person_symbol sur
14 cricket mahesh 43 city panji
15 cricket mahesh 43 person_symbol sur
16 cricket mahesh 34 city panji
17 cricket mahesh 34 person_symbol mah
#concatenate columns
a = df1['sport_name'] + '.' + df1['person_name'] + '.TOTAL.' + df1['val'] + '_count'
#groupby by Series a and aggregate sum
df2 = df1['person_count'].groupby(a.rename('derived_symbol'), sort=False)
.sum()
.reset_index(name='person_count')
print (df2)
derived_symbol person_count
0 football.ramesh.TOTAL.mumbai_count 24
1 football.ramesh.TOTAL.ram_count 30
2 football.ramesh.TOTAL.mum_count 39
3 football.ramesh.TOTAL.delhi_count 45
4 football.mohit.TOTAL.pune_count 11
5 football.mohit.TOTAL.moh_count 11
6 cricket.mahesh.TOTAL.surat_count 57
7 cricket.mahesh.TOTAL.mah_count 68
8 cricket.mahesh.TOTAL.sur_count 66
9 cricket.mahesh.TOTAL.panji_count 77