【发布时间】:2018-05-03 00:53:18
【问题描述】:
我有一个用 python 编写的深度学习代码(Anaconda3,Ubuntu 16.04)。它基本上基于经过训练的模型对给定视频进行广告检测,如果视频是广告则必须返回(我们假设它只是一个单镜头视频)。我现在不可用的同事写了这个。
原始文件在某处(用于多镜头)有一个循环遍历镜头列表,现在假设视频是单镜头,则删除该循环。但是看起来有些数组大小搞砸了,因此出现了错误。如何解决问题?
我不熟悉 python 和深度学习,应该是一般的 python 编程问题,而不是语义,因为它之前运行良好。
这是错误:
File "/Ad_module_textfast_stream.py", line 36, in label_prediction
pred_labels= clf_trained.predict( mfcc_feat.reshape(-1, 200) )
ValueError: cannot reshape array of size 8640 into shape (200)
所以它应该在调用video_audio_extractor(video_name) 时运行。这是代码。现在final_label_list 必须包含视频或广告。而且我猜因为它只是一个单一的镜头,它必须只包含一个元素。
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time, cv2, librosa
import skvideo.io
import numpy as np
import tensorflow as tf
import subprocess, os, glob
from sklearn.externals import joblib
#################################### Loading the Dataset
def label_prediction( mfcc_list, num_frames= 3):
num_shots= len(mfcc_list)
pred_labels_list= list()
final_label_list= list()
if 2>1:
clf_trained = joblib.load('trainsvm_mfcc.pkl')
#predicted_train_labels= clf_trained.predict( mfcc_list)
for j in range(num_shots):
mfcc_feat = mfcc_list[j]
num_frames= 1
mfcc_feat= ( mfcc_feat- np.mean(mfcc_feat) ) / ( np.std(mfcc_feat)+ 1e-6 )
#### now access operation you wanna run
#pred_labels= clf_trained.predict( mfcc_feat)
pred_labels= clf_trained.predict( mfcc_feat.reshape(-1, 200) )
final_label= 0 ## ads:1, and video:0
if pred_labels> 0:
final_label= 1
pred_labels_list.append(pred_labels)
final_label_list.append(final_label)
################ post-processing
final_label_list[0]= 0 ### video starts with natural content
final_label_list[len(final_label_list)-1]= 0 ## last shot
for kk in range(2,len(final_label_list)-2): ### one video between many ads, most likely to be ads
if final_label_list[kk]==0 and final_label_list[kk-2]==1 and final_label_list[kk-1]==1 and final_label_list[kk+1]==1 and final_label_list[kk+2]==1:
final_label_list[kk]= 1
return final_label_list, pred_labels_list
def video_audio_extractor( video_name):
cur_video= skvideo.io.vread(video_name)
metadata = skvideo.io.ffprobe(video_name)
vid_info= metadata["video"]
items = list(vid_info.items())
avg_fps_info= items[22][1]
avg_fps= int(avg_fps_info[0:2])
cur_num_frame= cur_video.shape[0]
cur_audio, cur_sr= librosa.load(video_name)
mfcc_list= list()
cur_audioshot_mfcc= librosa.feature.mfcc( y= cur_audio, sr= cur_sr, n_mfcc=20)
cur_audioshot_mfcc_1d= np.reshape( cur_audioshot_mfcc, [cur_audioshot_mfcc.shape[0]*cur_audioshot_mfcc.shape[1],])
mfcc_list.append(cur_audioshot_mfcc_1d)
final_label_list, pred_labels_list= label_prediction( mfcc_list, num_frames= 3)
return mfcc_list, avg_fps, final_label_list
以下是包含 for 循环的原始 video_audio_extractor 函数:
def video_audio_extractor( video_name):
cur_video= skvideo.io.vread(video_name)
metadata = skvideo.io.ffprobe(video_name)
vid_info= metadata["video"]
items = list(vid_info.items())
avg_fps_info= items[22][1]
avg_fps= int(avg_fps_info[0:2])
cur_num_frame= cur_video.shape[0]
cur_audio, cur_sr= librosa.load(video_name)
cur_shot_name= 'video_shots.txt'
#cur_shot_name= cur_video_name[0:-4]+'_shots.txt'
line = list(open(cur_shot_name, 'r'))
mfcc_list= list()
for shot_ind in range(len(line)):
cur_line= line[ shot_ind]
cur_line_list= cur_line.split()
first_frame= int( cur_line_list[0] )
last_frame = int( cur_line_list[1] )
cur_audioshot_first_ind= int( np.floor(first_frame*len(cur_audio)/cur_num_frame ) )
cur_audioshot_last_ind = int( np.floor(last_frame *len(cur_audio)/cur_num_frame ) )
cur_audioshot= cur_audio[cur_audioshot_first_ind:cur_audioshot_last_ind]
new_rate= 5000*cur_sr/len(cur_audioshot)
cur_audioshot_resampled = librosa.resample(cur_audioshot, cur_sr, new_rate)
cur_audioshot_mfcc= librosa.feature.mfcc(y=cur_audioshot_resampled, sr= new_rate, n_mfcc=20)
cur_audioshot_mfcc_1d= np.reshape( cur_audioshot_mfcc, [cur_audioshot_mfcc.shape[0]*cur_audioshot_mfcc.shape[1],])
mfcc_list.append(cur_audioshot_mfcc_1d)
return mfcc_list, line, avg_fps
【问题讨论】:
标签: python numpy tensorflow scikit-learn librosa