【发布时间】:2022-01-14 15:51:17
【问题描述】:
我收到以下错误
ValueError: feature_names mismatch: ['Have_IP', 'Have_At', 'URL_Length', 'URL_Depth', 'Redirection', 'https_Domain', 'TinyURL', 'Prefix/Suffix', 'DNS_Record', 'Web_Traffic' , 'Domain_Age', 'Domain_End', 'iFrame', 'Mouse_Over', 'Right_Click', 'Web_Forwards'] ['0']
输入数据中的预期 https_Domain、DNS_Record、Domain_Age、URL_Depth、Have_At、iFrame、Have_IP、TinyURL、重定向、Web_Traffic、Web_Forwards、Mouse_Over、前缀/后缀、Right_Click、URL_Length、Domain_End 训练数据没有以下字段:0
self.label_2 = QtWidgets.QLabel(self.centralwidget)
self.label_2.setGeometry(QtCore.QRect(320, 260, 191, 51))
font = QtGui.QFont()
font.setPointSize(16)
self.label_2.setFont(font)
self.label_2.setStyleSheet("color: rgb(255, 0, 0);")
self.label_2.setObjectName("label_2")
self.textEdit = QtWidgets.QTextEdit(self.centralwidget)
self.textEdit.setGeometry(QtCore.QRect(250, 140, 281, 31))
self.textEdit.setObjectName("textEdit")
self.pushButton = QtWidgets.QPushButton(self.centralwidget)
self.pushButton.setGeometry(QtCore.QRect(360, 210, 80, 25))
self.pushButton.setObjectName("pushButton")
self.pushButton.clicked.connect(self.UrlfeatureExtraction)
self.label_3 = QtWidgets.QLabel(self.centralwidget)
self.label_3.setGeometry(QtCore.QRect(120, 150, 91, 16))
self.label_3.setObjectName("label_3")
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 30))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.label.setText(_translate("MainWindow", " PHISHING WEBSITE DETECTION"))
self.pushButton.setText(_translate("MainWindow", "DETECT"))
self.label_2.setText(_translate("MainWindow", "ANSWER IS:-"))
self.label_3.setText(_translate("MainWindow", "ENTER URL"))
# 1.Domain of the URL (Domain)
def getDomain(self,url):
domain = urlparse(url).netloc
if re.match(r"^www.", domain):
domain = domain.replace("www.", "")
return domain
# 2.Checks for IP address in URL (Have_IP)
def havingIP(self,url):
try:
ipaddress.ip_address(url)
ip = 1
except:
ip = 0
return ip
# 3.Checks the presence of @ in URL (Have_At)
def haveAtSign(self,url):
if "@" in url:
at = 1
else:
at = 0
return at
# 4.Finding the length of URL and categorizing (URL_Length)
def getLength(self,url):
if len(url) < 54:
length = 0
else:
length = 1
return length
# 5.Gives number of '/' in URL (URL_Depth)
def getDepth(self,url):
s = urlparse(url).path.split('/')
depth = 0
for j in range(len(s)):
if len(s[j]) != 0:
depth = depth + 1
return depth
# 6.Checking for redirection '//' in the url (Redirection)
def redirection(self,url):
pos = url.rfind('//')
if pos > 6:
if pos > 7:
return 1
else:
return 0
else:
return 0
# 7.Existence of “HTTPS” Token in the Domain Part of the URL (https_Domain)
def httpDomain(self,url):
domain = urlparse(url).netloc
if 'https' in domain:
return 1
else:
return 0
# listing shortening services
shortening_services = r"bit\.ly|goo\.gl|shorte\.st|go2l\.ink|x\.co|ow\.ly|t\.co|tinyurl|tr\.im|is\.gd|cli\.gs|" \
r"yfrog\.com|migre\.me|ff\.im|tiny\.cc|url4\.eu|twit\.ac|su\.pr|twurl\.nl|snipurl\.com|" \
r"short\.to|BudURL\.com|ping\.fm|post\.ly|Just\.as|bkite\.com|snipr\.com|fic\.kr|loopt\.us|" \
r"doiop\.com|short\.ie|kl\.am|wp\.me|rubyurl\.com|om\.ly|to\.ly|bit\.do|t\.co|lnkd\.in|db\.tt|" \
r"qr\.ae|adf\.ly|goo\.gl|bitly\.com|cur\.lv|tinyurl\.com|ow\.ly|bit\.ly|ity\.im|q\.gs|is\.gd|" \
r"po\.st|bc\.vc|twitthis\.com|u\.to|j\.mp|buzurl\.com|cutt\.us|u\.bb|yourls\.org|x\.co|" \
r"prettylinkpro\.com|scrnch\.me|filoops\.info|vzturl\.com|qr\.net|1url\.com|tweez\.me|v\.gd|" \
r"tr\.im|link\.zip\.net"
# 8. Checking for Shortening Services in URL (Tiny_URL)
def tinyURL(self,url):
match = re.search(self.shortening_services, url)
if match:
return 1
else:
return 0
# 9.Checking for Prefix or Suffix Separated by (-) in the Domain (Prefix/Suffix)
def prefixSuffix(self,url):
if '-' in urlparse(url).netloc:
return 1 # phishing
else:
return 0 # legitimate
def get_ipython(self):
pass
self.get_ipython().system('pip install python-whois')
# 12.Web traffic (Web_Traffic)
def web_traffic(self,url):
try:
# Filling the whitespaces in the URL if any
url = urllib.parse.quote(url)
rank = \
BeautifulSoup(urllib.request.urlopen("http://data.alexa.com/data?cli=10&dat=s&url=" + url).read(),
"xml").find(
"REACH")['RANK']
rank = int(rank)
except TypeError:
return 1
if rank < 100000:
return 1
else:
return 0
# 13.Survival time of domain: The difference between termination time and creation time (Domain_Age)
def domainAge(self,domain_name):
creation_date = domain_name.creation_date
expiration_date = domain_name.expiration_date
if (isinstance(creation_date, str) or isinstance(expiration_date, str)):
try:
creation_date = datetime.strptime(creation_date, '%Y-%m-%d')
expiration_date = datetime.strptime(expiration_date, "%Y-%m-%d")
except:
return 1
if ((expiration_date is None) or (creation_date is None)):
return 1
elif ((type(expiration_date) is list) or (type(creation_date) is list)):
return 1
else:
ageofdomain = abs((expiration_date - creation_date).days)
if ((ageofdomain / 30) < 6):
age = 1
else:
age = 0
return age
# 14.End time of domain: The difference between termination time and current time (Domain_End)
def domainEnd(self,domain_name):
expiration_date = domain_name.expiration_date
if isinstance(expiration_date, str):
try:
expiration_date = datetime.strptime(expiration_date, "%Y-%m-%d")
except:
return 1
if (expiration_date is None):
return 1
elif (type(expiration_date) is list):
return 1
else:
today = datetime.now()
end = abs((expiration_date - today).days)
if ((end / 30) < 6):
end = 0
else:
end = 1
return end
# 15. IFrame Redirection (iFrame)
def iframe(self,response):
if response == "":
return 1
else:
if re.findall(r"[<iframe>|<frameBorder>]", response.text):
return 0
else:
return 1
# 16.Checks the effect of mouse over on status bar (Mouse_Over)
def mouseOver(self,response):
if response == "":
return 1
else:
if re.findall("<script>.+onmouseover.+</script>", response.text):
return 1
else:
return 0
# 17.Checks the status of the right click attribute (Right_Click)
def rightClick(self,response):
if response == "":
return 1
else:
if re.findall(r"event.button ?== ?2", response.text):
return 0
else:
return 1
# 18.Checks the number of forwardings (Web_Forwards)
def forwarding(self,response):
if response == "":
return 1
else:
if len(response.history) <= 2:
return 0
else:
return 1
# Function to extract features
# There are 17 features extracted from the dataset
def featureExtractions(self,url):
self.getDomain(url)
features=[]
features = [self.havingIP(url), self.haveAtSign(url),self.getLength(url),self.getDepth(url),self.redirection(url),
self.httpDomain(url),self.tinyURL(url),self.prefixSuffix(url)]
# Address bar based features (9)
# Domain based features (4)
dns = 0
try:
domain_name = whois.whois(urlparse(url).netloc)
except:
dns = 1
features.append(dns)
features.append(self.web_traffic(url))
features.append(1 if dns == 1 else self.domainAge(domain_name))
features.append(1 if dns == 1 else self.domainEnd(domain_name))
# HTML & Javascript based features (4)
try:
response = requests.get(url)
except:
response = ""
features.append(self.iframe(response))
features.append(self.mouseOver(response))
features.append(self.rightClick(response))
features.append(self.forwarding(response))
# label=0
# features.append(label)
book = xlsxwriter.Workbook('Extracted_data.xlsx')
sheet1 = book.add_worksheet()
feature_names = ['Have_IP', 'Have_At', 'URL_Length', 'URL_Depth','Redirection',
'https_Domain', 'TinyURL','Prefix/Suffix', 'DNS_Record','Web_Traffic', 'Domain_Age', 'Domain_End',
'iFrame', 'Mouse_Over','Right_Click', 'Web_Forwards']
row = 0
column = 0
for item in feature_names :
# write operation perform
sheet1.write(row, column, item)
# incrementing the value of row by one with each iterations.
column += 1
row = 1
column = 0
for item in features :
# write operation perform
sheet1.write(row, column, item)
# incrementing the value of row by one with each iterations.
column += 1
book.close()
return features
def UrlfeatureExtraction(self):
url = self.textEdit.toPlainText()
data=[]
label=0
data=self.featureExtractions(url)
# data.pop()
XGmodel = pickle.load(open('XGBoostClassifier.pkl', 'rb'))
cols_when_model_builds = XGmodel.get_booster().feature_names
print(data)
dfa=pd.DataFrame(data)
y_pred = XGmodel.predict(dfa)
predictions = [round(value) for value in y_pred]
# evaluate predictions
if predictions == 0:
value = "Legitimate"
self.label.setText(value)
else:
value = "Phishing"
self.label.setText(value);
如果 name == "main": 导入系统 应用程序 = QtWidgets.QApplication(sys.argv) MainWindow = QtWidgets.QMainWindow() ui = Ui_MainWindow() ui.setupUi(主窗口) MainWindow.show() sys.exit(app.exec_())
我的 Xtrain 和 Xtest 包含 16 个特征,我已将其传递给模型的输入数据。我什至匹配了它的序列和没有特征。 我无法理解这个错误是什么...... 请帮我解决这个问题
【问题讨论】:
-
请阅读ML标签的description。
-
请确保您是正确的formatting code。另请注意,您不应该编辑 pyuic 文件;在有关using Designer 的官方指南中阅读如何正确使用这些文件。
-
您的错误消息何时结束而代码开始?请整理您的代码和错误信息
-
请修剪您的代码,以便更容易找到您的问题。请按照以下指南创建minimal reproducible example。
标签: python dataframe machine-learning pyqt5 xgboost