import numpy as np import pandas as pd left = pd.DataFrame({'A': [1., np.nan, 3., 4.], 'B': [1., 2., 3., 4.]}) d = np.nanpercentile(left["B"], np.linspace(0, 100, 5)) print(np.linspace(0, 100, 5)) print(d) print("*" * 100) print(type(d)) print("*" * 100) print(pd.cut(left["B"], d)) print("*" * 100) print(pd.cut(left["B"], d, labels=False))
[ 0. 25. 50. 75. 100.] [1. 1.75 2.5 3.25 4. ] **************************************************************************************************** <class 'numpy.ndarray'> **************************************************************************************************** 0 NaN 1 (1.75, 2.5] 2 (2.5, 3.25] 3 (3.25, 4.0] Name: B, dtype: category Categories (4, interval[float64]): [(1.0, 1.75] < (1.75, 2.5] < (2.5, 3.25] < (3.25, 4.0]] **************************************************************************************************** 0 NaN 1 1.0 2 2.0 3 3.0 Name: B, dtype: float64