import numpy as np
import pandas as pd

left = pd.DataFrame({'A': [1., np.nan, 3., 4.],
                    'B': [1., 2., 3., 4.]})

d = np.nanpercentile(left["B"], np.linspace(0, 100, 5))
print(np.linspace(0, 100, 5))
print(d)
print("*" * 100)
print(type(d))
print("*" * 100)
print(pd.cut(left["B"], d))
print("*" * 100)
print(pd.cut(left["B"], d, labels=False))

 

[  0.  25.  50.  75. 100.]
[1.   1.75 2.5  3.25 4.  ]
****************************************************************************************************
<class 'numpy.ndarray'>
****************************************************************************************************
0            NaN
1    (1.75, 2.5]
2    (2.5, 3.25]
3    (3.25, 4.0]
Name: B, dtype: category
Categories (4, interval[float64]): [(1.0, 1.75] < (1.75, 2.5] < (2.5, 3.25] < (3.25, 4.0]]
****************************************************************************************************
0    NaN
1    1.0
2    2.0
3    3.0
Name: B, dtype: float64

分类:

技术点:

相关文章: