且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

如何有效地将一个Pandas数据框的每一列与另一数据框的每一列相乘?

更新时间:2022-12-12 08:43:54

# use numpy to get a pair of indices that map out every
# combination of columns from df_1 and columns of df_2
pidx = np.indices((df_1.shape[1], df_2.shape[1])).reshape(2, -1)

# use pandas MultiIndex to create a nice MultiIndex for
# the final output
lcol = pd.MultiIndex.from_product([df_1.columns, df_2.columns],
                                  names=[df_1.columns.name, df_2.columns.name])

# df_1.values[:, pidx[0]] slices df_1 values for every combination
# like wise with df_2.values[:, pidx[1]]
# finally, I marry up the product of arrays with the MultiIndex
pd.DataFrame(df_1.values[:, pidx[0]] * df_2.values[:, pidx[1]],
             columns=lcol)

代码

from string import ascii_letters

df_1 = pd.DataFrame(np.random.randint(0, 2, (1000, 26)), columns=list(ascii_letters[:26]))
df_2 = pd.DataFrame(np.random.randint(0, 2, (1000, 52)), columns=list(ascii_letters))

def pir1(df_1, df_2):
    pidx = np.indices((df_1.shape[1], df_2.shape[1])).reshape(2, -1)

    lcol = pd.MultiIndex.from_product([df_1.columns, df_2.columns],
                                      names=[df_1.columns.name, df_2.columns.name])

    return pd.DataFrame(df_1.values[:, pidx[0]] * df_2.values[:, pidx[1]],
                        columns=lcol)

def Test2(DA,DB):
  MA = DA.as_matrix()
  MB = DB.as_matrix()
  MM = np.zeros((len(MA),len(MA[0])*len(MB[0])))
  Col = []
  for i in range(len(MB[0])):
    for j in range(len(MA[0])):
      MM[:,i*len(MA[0])+j] = MA[:,j]*MB[:,i]
      Col.append('1col_'+str(i+1)+'_2col_'+str(j+1))
  return pd.DataFrame(MM,dtype=int,columns=Col)

结果