不幸的是rolling 只处理数字,所以map 使用解码和编码,但是如果大数据帧很慢:
def f(x):
if np.all(x == 1):
return 2
elif np.all(x == 0):
return 3
else:
return np.nan
df['Output'] = df['Direction'].map({'UP':1,'DOWN':0})
.rolling(6)
.apply(f)
.map({2:'STRONG_UP',3:'STRONG_DOWN'})
print (df)
Index Direction Output
0 10887 UP NaN
1 10888 UP NaN
2 10889 UP NaN
3 10890 UP NaN
4 10891 UP NaN
5 10892 UP STRONG_UP
6 10893 UP STRONG_UP
7 10894 UP STRONG_UP
8 10895 UP STRONG_UP
9 10896 UP STRONG_UP
10 10897 UP STRONG_UP
11 10898 UP STRONG_UP
12 10899 UP STRONG_UP
13 10900 DOWN NaN
14 10901 DOWN NaN
15 10902 UP NaN
16 10903 UP NaN
17 10904 DOWN NaN
18 10905 DOWN NaN
19 10906 DOWN NaN
如果性能很重要,strides 和 numpy.select 的另一个想法:
def rolling_window(a, window):
shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
strides = a.strides + (a.strides[-1],)
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
n = 6
x = np.concatenate([[None] * (n-1), df['Direction'].to_numpy()])
a = rolling_window(x, n)
print (a)
[[None None None None None 'UP']
[None None None None 'UP' 'UP']
[None None None 'UP' 'UP' 'UP']
[None None 'UP' 'UP' 'UP' 'UP']
[None 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'UP']
['UP' 'UP' 'UP' 'UP' 'UP' 'DOWN']
['UP' 'UP' 'UP' 'UP' 'DOWN' 'DOWN']
['UP' 'UP' 'UP' 'DOWN' 'DOWN' 'DOWN']
['UP' 'UP' 'DOWN' 'DOWN' 'DOWN' 'UP']
['UP' 'DOWN' 'DOWN' 'DOWN' 'UP' 'UP']
['DOWN' 'DOWN' 'DOWN' 'UP' 'UP' 'DOWN']
['DOWN' 'DOWN' 'UP' 'UP' 'DOWN' 'DOWN']]
m1 = np.all(a == 'UP', axis=1)
m2 = np.all(a == 'DOWN', axis=1)
df['Output'] = np.select([m1, m2], ['STRONG_UP','STRONG_DOWN'], None)
print (df)
Index Direction Output
0 10887 UP None
1 10888 UP None
2 10889 UP None
3 10890 UP None
4 10891 UP None
5 10892 UP STRONG_UP
6 10893 UP STRONG_UP
7 10894 UP STRONG_UP
8 10895 UP STRONG_UP
9 10896 UP STRONG_UP
10 10897 UP STRONG_UP
11 10898 UP STRONG_UP
12 10899 UP STRONG_UP
13 10900 DOWN None
14 10901 DOWN None
15 10902 DOWN None
16 10903 UP None
17 10904 UP None
18 10905 DOWN None
19 10906 DOWN None
性能:Forst方法被省略了,因为太慢了。
print (pd.show_versions())
INSTALLED VERSIONS
------------------
commit : f2ca0a2665b2d169c97de87b8e778dbed86aea07
python : 3.8.5.final.0
python-bits : 64
OS : Windows
OS-release : 7
Version : 6.1.7601
machine : AMD64
processor : Intel64 Family 6 Model 60 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : en
LOCALE : Slovak_Slovakia.1250
pandas : 1.1.1
numpy : 1.19.1
import perfplot
np.random.seed(123)
def GW(df):
df['group'] = np.r_[True, df.Direction.values[1:] != df.Direction.values[:-1]].cumsum()
df['count'] = df.groupby('group').cumcount()+1
df['result'] = np.where(df['count'] >= 6, 'STRONG_'+df.Direction, np.nan)
df = (df[['Index','Direction','result']])
return df
def ST(df):
def rolling_window(a, window):
shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
strides = a.strides + (a.strides[-1],)
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
n = 6
x = np.concatenate([[None] * (n-1), df['Direction'].to_numpy()])
a = rolling_window(x, n)
m1 = np.all(a == 'UP', axis=1)
m2 = np.all(a == 'DOWN', axis=1)
df['Output2'] = np.select([m1, m2], ['STRONG_UP','STRONG_DOWN'], None)
return df
def make_df(n):
direction = np.random.choice(['UP','DOWN'], n)
df = pd.DataFrame({
'Index': np.arange(len(direction)),
'Direction': direction
})
return df
perfplot.show(
setup=make_df,
kernels=[GW, ST],
n_range=[2**k for k in range(5, 25)],
logx=True,
logy=True,
equality_check=False,
xlabel='len(df)')