更新时间:2023-01-30 13:18:08
好的,花了点时间解决了,但是现在我很确定这是可行的.也比递归方法快得多.
OK, took a while to nut out but now I'm pretty sure this works. Much faster than recursive method too.
def _sort_tree_df(self, df, tree_column, sort_column, delimeter=':'):
df=df.copy()
parts = df[tree_column].str.split(delimeter).apply(lambda x: [y.strip() for y in x]).apply(pd.Series)
for i, column in enumerate(parts.columns):
df[column] = parts[column]
sort_columns = [df[tree_column].values]
sort_columns.append(df[sort_column].abs().values)
df['level'] = df[tree_column].str.count(':')
for x in range(len(parts.columns), 0, -1):
group_columns = list(range(0, x))
sorting_by = df.copy()
sorting_by.loc[sorting_by['level'] != x-1, sort_column] = np.nan
sorting_by = sorting_by.groupby(group_columns)[sort_column].transform('sum').abs().values
sort_columns.append(sorting_by)
sort_indexes = np.lexsort(sort_columns)
df_sorted = df.iloc[sort_indexes[::-1]]
df_sorted.reset_index(drop=True, inplace=True)
df.drop([column for column in parts.columns], inplace=True, axis=1)
df.drop('level', inplace=True, axis=1)
return df_sorted