【发布时间】:2019-02-14 13:14:28
【问题描述】:
我正在尝试比较 pandas 数据框中的两列。我想要的是从正文中删除用户名
raw_data = {
'user_name': ['name1 name2', 'nameX nameY '],
'text_body': ['this is the text were i should delete name1 and name2',
'this is the text were i should delete nameX and nameY']}
df_a = pd.DataFrame(raw_data, columns = ['user_name', 'text_body'])
df_a
我将两个 pd 列分隔为标记,以遍历我应该删除单词的第二列。
def sent_to_words(sentences):
# function to transforms a sentence into tokens
tokens = [nltk.word_tokenize(sent) for sent in sentences]
return tokens
def remover_user_name(text_body,user_name):
#sent_to_words is a function that transforms the raw data into small tokens
user_name_token = sent_to_words(user_name)
for row in dataset.itertuples(index=True, name='Pandas'):
for user in user_name_token:
dataset['user_clean'] = data.apply(lambda x: data.str.strip(user) for user in user_name_token)
return dataset['user_clean'].tolist()
data = dataset['Textemsg'].apply(lambda x: clean_data(x))
user_name = to_lower(dataset['user_name'])
dataaa = remover_user_name(data,user_name)
print(dataaa)
这是我得到的错误:
TypeError Traceback (most recent call last)
<ipython-input-104-9b39af043e09> in <module>()
1 data = dataset['Textemsg'].apply(lambda x: clean_data(x))
2 user_name = to_lower(dataset['user_name'])
----> 3 dataaa = remover_user_name(data,user_name)
4 print(dataaa)
<ipython-input-103-0a5a8bce7b52> in remover_user_name(data, user_name)
3 for row in dataset.itertuples(index=True, name='Pandas'):
4 for user in user_name_token:
----> 5 dataset['user_clean'] = data.apply(lambda x: data.str.strip(user) for user in user_name_token)
6 return dataset['user_clean'].tolist()
/opt/conda/lib/python3.6/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
3192 else:
3193 values = self.astype(object).values
-> 3194 mapped = lib.map_infer(values, f, convert=convert_dtype)
3195
3196 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/src/inference.pyx in pandas._libs.lib.map_infer()
TypeError: 'generator' object is not callable
【问题讨论】:
标签: python string pandas string-comparison