numba 并行 njit 编译不适用于 np.isnan()答案

【问题标题】：numba parallel njit compilation not working with np.isnan()numba 并行 njit 编译不适用于 np.isnan()
【发布时间】：2020-10-22 00:28:38
【问题描述】：

我需要 njit 编译这个函数（并行化）。但它会在调用 np.isnan() 的行中引发以下错误。支持 np.isnan() ，因此：我错过了什么？

import numpy as np, pandas as pd 
from numba import njit, prange, jit

Ts=pd.DataFrame({"end":pd.to_datetime(np.asarray([ '2019-08-20 10:23:00', '2019-08-20 10:24:00', '2019-08-20 10:25:00', '2019-08-20 10:26:00', '2019-08-20 10:27:00', '2019-08-20 10:28:00', '2019-08-20 10:29:00', '2019-08-20 10:30:00', '2019-08-20 10:31:00', '2019-08-20 10:32:00', '2019-08-20 10:33:00', '2019-08-20 10:34:00', '2019-08-20 10:35:00', '2019-08-20 10:36:00', '2019-08-20 10:37:00', '2019-08-20 10:38:00', '2019-08-20 10:39:00', '2019-08-20 10:40:00', '2019-08-20 10:41:00', '2019-08-20 10:42:00', '2019-08-20 10:43:00', '2019-08-20 10:44:00', '2019-08-20 10:45:00', '2019-08-20 10:46:00', '2019-08-20 10:47:00', '2019-08-20 10:48:00', '2019-08-20 10:49:00', '2019-08-20 10:50:00', '2019-08-20 10:51:00', '2019-08-20 10:52:00', '2019-08-20 10:53:00', '2019-08-20 10:54:00', '2019-08-20 10:55:00', '2019-08-20 10:56:00', '2019-08-20 10:57:00', '2019-08-20 10:58:00', '2019-08-20 10:59:00', '2019-08-20 11:00:00', '2019-08-20 11:01:00', '2019-08-20 11:02:00', '2019-08-20 11:03:00', '2019-08-20 11:04:00', '2019-08-20 11:05:00', '2019-08-20 11:06:00', '2019-08-20 11:07:00', '2019-08-20 11:08:00', '2019-08-20 11:09:00', '2019-08-20 11:10:00', '2019-08-20 11:11:00', '2019-08-20 11:12:00', '2019-08-20 11:13:00', '2019-08-20 11:14:00', '2019-08-20 11:15:00', '2019-08-20 11:16:00', '2019-08-20 11:17:00', '2019-08-20 11:18:00', '2019-08-20 11:19:00', '2019-08-20 11:20:00', '2019-08-20 11:21:00', '2019-08-20 11:22:00', '2019-08-20 11:23:00', '2019-08-20 11:24:00', '2019-08-20 11:25:00', '2019-08-20 11:26:00', '2019-08-20 11:27:00', '2019-08-20 11:28:00', '2019-08-20 11:29:00', '2019-08-20 11:30:00', '2019-08-20 11:31:00', '2019-08-20 11:32:00', '2019-08-20 11:33:00', '2019-08-20 11:34:00', '2019-08-20 11:35:00', '2019-08-20 11:36:00', '2019-08-20 11:37:00', '2019-08-20 11:38:00', '2019-08-20 11:39:00', '2019-08-20 11:40:00', '2019-08-20 11:41:00', '2019-08-20 11:42:00', '2019-08-20 11:43:00', '2019-08-20 11:44:00', '2019-08-20 11:45:00', '2019-08-20 11:46:00', '2019-08-20 11:47:00', '2019-08-20 11:48:00', '2019-08-20 11:49:00', '2019-08-20 11:50:00', '2019-08-20 11:51:00', '2019-08-20 11:52:00', '2019-08-20 11:53:00', '2019-08-20 11:54:00', '2019-08-20 11:55:00', '2019-08-20 11:56:00', '2019-08-20 11:57:00', '2019-08-20 11:58:00', '2019-08-20 11:59:00', '2019-08-20 12:00:00']))}
                                              , index = pd.DatetimeIndex(['2019-08-20 10:21:00', '2019-08-20 10:22:00', '2019-08-20 10:23:00', '2019-08-20 10:24:00', '2019-08-20 10:25:00', '2019-08-20 10:26:00', '2019-08-20 10:27:00', '2019-08-20 10:28:00', '2019-08-20 10:29:00', '2019-08-20 10:30:00', '2019-08-20 10:31:00', '2019-08-20 10:32:00', '2019-08-20 10:33:00', '2019-08-20 10:34:00', '2019-08-20 10:35:00', '2019-08-20 10:36:00', '2019-08-20 10:37:00', '2019-08-20 10:38:00', '2019-08-20 10:39:00', '2019-08-20 10:40:00', '2019-08-20 10:41:00', '2019-08-20 10:42:00', '2019-08-20 10:43:00', '2019-08-20 10:44:00', '2019-08-20 10:45:00', '2019-08-20 10:46:00', '2019-08-20 10:47:00', '2019-08-20 10:48:00', '2019-08-20 10:49:00', '2019-08-20 10:50:00', '2019-08-20 10:51:00', '2019-08-20 10:52:00', '2019-08-20 10:53:00', '2019-08-20 10:54:00', '2019-08-20 10:55:00', '2019-08-20 10:56:00', '2019-08-20 10:57:00', '2019-08-20 10:58:00', '2019-08-20 10:59:00', '2019-08-20 11:00:00', '2019-08-20 11:01:00', '2019-08-20 11:02:00', '2019-08-20 11:03:00', '2019-08-20 11:04:00', '2019-08-20 11:05:00', '2019-08-20 11:06:00', '2019-08-20 11:07:00', '2019-08-20 11:08:00', '2019-08-20 11:09:00', '2019-08-20 11:10:00', '2019-08-20 11:11:00', '2019-08-20 11:12:00', '2019-08-20 11:13:00', '2019-08-20 11:14:00', '2019-08-20 11:15:00', '2019-08-20 11:16:00', '2019-08-20 11:17:00', '2019-08-20 11:18:00', '2019-08-20 11:19:00', '2019-08-20 11:20:00', '2019-08-20 11:21:00', '2019-08-20 11:22:00', '2019-08-20 11:23:00', '2019-08-20 11:24:00', '2019-08-20 11:25:00', '2019-08-20 11:26:00', '2019-08-20 11:27:00', '2019-08-20 11:28:00', '2019-08-20 11:29:00', '2019-08-20 11:30:00', '2019-08-20 11:31:00', '2019-08-20 11:32:00', '2019-08-20 11:33:00', '2019-08-20 11:34:00', '2019-08-20 11:35:00', '2019-08-20 11:36:00', '2019-08-20 11:37:00', '2019-08-20 11:38:00', '2019-08-20 11:39:00', '2019-08-20 11:40:00', '2019-08-20 11:41:00', '2019-08-20 11:42:00', '2019-08-20 11:43:00', '2019-08-20 11:44:00', '2019-08-20 11:45:00', '2019-08-20 11:46:00', '2019-08-20 11:47:00', '2019-08-20 11:48:00', '2019-08-20 11:49:00', '2019-08-20 11:50:00', '2019-08-20 11:51:00', '2019-08-20 11:52:00', '2019-08-20 11:53:00', '2019-08-20 11:54:00', '2019-08-20 11:55:00', '2019-08-20 11:56:00', '2019-08-20 11:57:00', '2019-08-20 11:58:00']))
raw_valuesS_ix = pd.DatetimeIndex(['2019-08-20 10:21:00', '2019-08-20 10:22:00', '2019-08-20 10:23:00', '2019-08-20 10:24:00', '2019-08-20 10:25:00', '2019-08-20 10:26:00', '2019-08-20 10:27:00', '2019-08-20 10:28:00', '2019-08-20 10:29:00', '2019-08-20 10:30:00', '2019-08-20 10:31:00', '2019-08-20 10:32:00', '2019-08-20 10:33:00', '2019-08-20 10:34:00', '2019-08-20 10:35:00', '2019-08-20 10:36:00', '2019-08-20 10:37:00', '2019-08-20 10:38:00', '2019-08-20 10:39:00', '2019-08-20 10:40:00', '2019-08-20 10:41:00', '2019-08-20 10:42:00', '2019-08-20 10:43:00', '2019-08-20 10:44:00', '2019-08-20 10:45:00', '2019-08-20 10:46:00', '2019-08-20 10:47:00', '2019-08-20 10:48:00', '2019-08-20 10:49:00', '2019-08-20 10:50:00', '2019-08-20 10:51:00', '2019-08-20 10:52:00', '2019-08-20 10:53:00', '2019-08-20 10:54:00', '2019-08-20 10:55:00', '2019-08-20 10:56:00', '2019-08-20 10:57:00', '2019-08-20 10:58:00', '2019-08-20 10:59:00', '2019-08-20 11:00:00', '2019-08-20 11:01:00', '2019-08-20 11:02:00', '2019-08-20 11:03:00', '2019-08-20 11:04:00', '2019-08-20 11:05:00', '2019-08-20 11:06:00', '2019-08-20 11:07:00', '2019-08-20 11:08:00', '2019-08-20 11:09:00', '2019-08-20 11:10:00', '2019-08-20 11:11:00', '2019-08-20 11:12:00', '2019-08-20 11:13:00', '2019-08-20 11:14:00', '2019-08-20 11:15:00', '2019-08-20 11:16:00', '2019-08-20 11:17:00', '2019-08-20 11:18:00', '2019-08-20 11:19:00', '2019-08-20 11:20:00', '2019-08-20 11:21:00', '2019-08-20 11:22:00', '2019-08-20 11:23:00', '2019-08-20 11:24:00', '2019-08-20 11:25:00', '2019-08-20 11:26:00', '2019-08-20 11:27:00', '2019-08-20 11:28:00', '2019-08-20 11:29:00', '2019-08-20 11:30:00', '2019-08-20 11:31:00', '2019-08-20 11:32:00', '2019-08-20 11:33:00', '2019-08-20 11:34:00', '2019-08-20 11:35:00', '2019-08-20 11:36:00', '2019-08-20 11:37:00', '2019-08-20 11:38:00', '2019-08-20 11:39:00', '2019-08-20 11:40:00', '2019-08-20 11:41:00', '2019-08-20 11:42:00', '2019-08-20 11:43:00', '2019-08-20 11:44:00', '2019-08-20 11:45:00', '2019-08-20 11:46:00', '2019-08-20 11:47:00', '2019-08-20 11:48:00', '2019-08-20 11:49:00', '2019-08-20 11:50:00', '2019-08-20 11:51:00', '2019-08-20 11:52:00', '2019-08-20 11:53:00', '2019-08-20 11:54:00', '2019-08-20 11:55:00', '2019-08-20 11:56:00', '2019-08-20 11:57:00', '2019-08-20 11:58:00', '2019-08-20 11:59:00', '2019-08-20 12:00:00'], dtype='datetime64[ns]', freq=None)
raw_valuesS = pd.DataFrame( {"A":np.random.uniform(low=0.5, high=13.3, size=(len(raw_valuesS_ix),)), "B":np.random.uniform(low=0.5, high=13.3, size=(len(raw_valuesS_ix),)), "C":np.random.uniform(low=0.5, high=13.3, size=(len(raw_valuesS_ix),)),"D":np.random.uniform(low=0.5, high=13.3, size=(len(raw_valuesS_ix),)),"E":np.random.uniform(low=0.5, high=13.3, size=(len(raw_valuesS_ix),))}, index = raw_valuesS_ix)
raw_valuesS_arr = np.asarray(raw_valuesS,                                    dtype=np.float64)
t0_locs = np.asarray(np.searchsorted(raw_valuesS.index, Ts.index),       dtype=np.int64)
t1_locs = np.asarray(np.searchsorted(raw_valuesS.index, np.asarray(Ts)), dtype=np.int64)
result  = np.empty(raw_valuesS_arr.shape[0],                                 dtype=np.int64); result.fill(-1.)

@njit(parallel=True)
def drop_FullNaN_Values_t1_Settings(raw_valuesS_arr, t0_locs, t1_locs, result, amount_of_sets):                 
    for i in prange(amount_of_sets): 

        a = raw_valuesS_arr[t0_locs[i]: t1_locs[i]+1, 0]
        #b = np.isnan(a)# <---

        if np.isnan(raw_valuesS_arr[t0_locs[i]: t1_locs[i]+1, 0]).all()==True:
            result[i] = i 
    return result

result = drop_FullNaN_Values_t1_Settings(raw_valuesS_arr, t0_locs, t1_locs, result, np.int64(len(Ts)))


  File "c:\users\...\site-packages\numba\core\dispatcher.py", line 418, in _compile_for_args
    error_rewrite(e, 'unsupported_error')

  File "c:\users\...\site-packages\numba\core\dispatcher.py", line 358, in error_rewrite
    reraise(type(e), e, None)

  File "c:\users\...\site-packages\numba\core\utils.py", line 80, in reraise
    raise value.with_traceback(tb)

UnsupportedError: Unsupported op-code encountered: arrayexpr(expr=(<ufunc 'isnan'>, [Var($a.286, main.py:107)]), ty=array(bool, 1d, F))

【问题讨论】：

标签： python pandas numpy parallel-processing numba

【解决方案1】：

这就是我克服同样问题的方法。

似乎 nan 存储为特定数字，对我来说这是“-9223372036854775808”。（我通过运行调试器并查看内存中存储的内容找到了这个数字）。然后我检查了我想确定它是否是 nan 的数字是否等于这个数字。

if number_to_check == -9223372036854775808:
    print("This number is nan")

【讨论】：