- 您没有指定任何数据或数据结构。已经建立了一个结构,将定义 节点、有效的流和按日期的实际流
- 从这个结构中,使用https://plotly.com/python/dropdowns/ 构建了year 和month 过滤器下拉列表。请注意,这些是独立的过滤器,这是 updatemenus 的静态结构的工作方式
- 在这个解决方案中,一般data frame列名和Sankeylink属性名是一样的。如果您的数据结构解决方案不是这种情况,可以修改为使用 dict 而不是 list
节点
|
0 |
| 0 |
A0 |
| 1 |
A1 |
| 2 |
B0 |
| 3 |
B1 |
| 4 |
C0 |
| 5 |
C1 |
有效流dfflow
| source |
target |
source_name |
target_name |
| 0 |
2 |
A0 |
B0 |
| 1 |
2 |
A1 |
B0 |
| 0 |
3 |
A0 |
B1 |
| 1 |
3 |
A1 |
B1 |
| 2 |
4 |
B0 |
C0 |
| 3 |
4 |
B1 |
C0 |
| 2 |
5 |
B0 |
C1 |
| 3 |
5 |
B1 |
C1 |
样本流按日期df
| source |
target |
source_name |
target_name |
value |
date |
| 0 |
2 |
A0 |
B0 |
3.58321 |
2020-07-31 00:00:00 |
| 1 |
2 |
A1 |
B0 |
4.74713 |
2020-07-31 00:00:00 |
| 0 |
3 |
A0 |
B1 |
4.96593 |
2020-07-31 00:00:00 |
| 1 |
3 |
A1 |
B1 |
3.64883 |
2020-07-31 00:00:00 |
| 2 |
4 |
B0 |
C0 |
4.67168 |
2020-07-31 00:00:00 |
| 3 |
4 |
B1 |
C0 |
4.73339 |
2020-07-31 00:00:00 |
| 2 |
5 |
B0 |
C1 |
1.85678 |
2020-07-31 00:00:00 |
| 3 |
5 |
B1 |
C1 |
1.76691 |
2020-07-31 00:00:00 |
| 0 |
2 |
A0 |
B0 |
4.85048 |
2020-08-31 00:00:00 |
| 1 |
2 |
A1 |
B0 |
3.74573 |
2020-08-31 00:00:00 |
| 0 |
3 |
A0 |
B1 |
4.40529 |
2020-08-31 00:00:00 |
| 1 |
3 |
A1 |
B1 |
4.84975 |
2020-08-31 00:00:00 |
| 2 |
4 |
B0 |
C0 |
1.82983 |
2020-08-31 00:00:00 |
| 3 |
4 |
B1 |
C0 |
2.87512 |
2020-08-31 00:00:00 |
| 2 |
5 |
B0 |
C1 |
4.59346 |
2020-08-31 00:00:00 |
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import itertools
nodes = (
pd.DataFrame(itertools.product(list("ABC"), range(2)))
.astype(str)
.apply("".join, axis=1)
)
df = pd.DataFrame(
itertools.combinations(nodes.index, 2), columns=["source", "target"]
).pipe(lambda d: d.assign(value=np.random.randint(1, 5, len(d))))
# create dataframe of flows between nodes, A nodes to to B nodes, B nodes to C nodes
dfflow = pd.concat(
[
pd.DataFrame(
itertools.product(
nodes.loc[nodes.str[0] == l[0]].index.tolist(),
nodes.loc[nodes.str[0] == l[1]].index.tolist(),
),
columns=["source", "target"],
)
for l in ["AB", "BC"]
]
)
# for purpose of human readability, put node names on flows
dfflow = dfflow.merge(
nodes.rename("source_name"), left_on="source", right_index=True
).merge(nodes.rename("target_name"), left_on="target", right_index=True)
# create some values against flows for a range of dates
df = pd.concat(
[
dfflow.assign(value=np.random.uniform(1, 5, len(dfflow)), date=d)
for d in pd.date_range("1-jul-2020", freq="M", periods=14)
]
)
# utility function to build a dropdown menu
def menu(df, filter=pd.Series([0]), y=1):
label = np.concatenate([filter.unique(), [-99]])
return {
"y":y,
"buttons": [
{
"label": str(l) if not l==-99 else "All",
"method": "restyle",
"args": [
"link",
{
"label"
if attr == "date"
else attr: df.loc[filter == l , attr].values if l!=-99 else df.loc[:,attr].values
for attr in ["source", "target", "value", "date"]
},
],
}
for l in label
]
}
# build the sankey diagram and the required filter drop downs
go.Figure(
go.Sankey(
node={"label": nodes.values},
link={
"source": df["source"],
"target": df["target"],
"value": df["value"],
"label": df["date"],
},
)
).update_layout(
margin={"l": 0, "r": 0, "t": 0, "b": 0},
updatemenus=[
menu(
df,
filter=df["date"].dt.year,
),
menu(
df,
filter=df["date"].dt.month, y=.9
),
],
)