#coding=utf-8
import numpy as np
import pandas as pd
import datetime
#导入数据
df_ferrara = pd.read_csv('WeatherData/ferrara_270615.csv')
df_milano = pd.read_csv('WeatherData/milano_270615.csv')
df_mantova = pd.read_csv('WeatherData/mantova_270615.csv')
df_ravenna = pd.read_csv('WeatherData/ravenna_270615.csv')
df_torino = pd.read_csv('WeatherData/torino_270615.csv')
df_asti = pd.read_csv('WeatherData/asti_270615.csv')
df_bologna = pd.read_csv('WeatherData/bologna_270615.csv')
df_piacenza = pd.read_csv('WeatherData/piacenza_270615.csv')
df_cesena = pd.read_csv('WeatherData/cesena_270615.csv')
df_faenza = pd.read_csv('WeatherData/faenza_270615.csv')
#导入库
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from dateutil import parser
# 取出我们要分析的温度和日期数据
y1 = df_milano['temp']
x1 = df_milano['day']
# print(df_milano)
# 把日期数据转换成 datetime 的格式
day_milano = [parser.parse(x) for x in x1]
print(day_milano)
# 调用 subplot 函数, fig 是图像对象,ax 是坐标轴对象
fig, ax = plt.subplots()
# 调整x轴坐标刻度,使其旋转70度,方便查看
plt.xticks(rotation=70)
# 设定时间的格式
hours = mdates.DateFormatter('%H:%M')
print(hours)
# 画出图像,day_milano是X轴数据,y1是Y轴数据,‘r’代表的是'red' 红色
plt.plot(day_milano ,y1, 'r')
plt.show()

# 读取温度和日期数据
y1 = df_ravenna['temp']
x1 = df_ravenna['day']
y2 = df_faenza['temp']
x2 = df_faenza['day']
y3 = df_cesena['temp']
x3 = df_cesena['day']
y4 = df_milano['temp']
x4 = df_milano['day']
y5 = df_asti['temp']
x5 = df_asti['day']
y6 = df_torino['temp']
x6 = df_torino['day']
# 把日期从 string 类型转化为标准的 datetime 类型
day_ravenna = [parser.parse(x) for x in x1]
day_faenza = [parser.parse(x) for x in x2]
day_cesena = [parser.parse(x) for x in x3]
day_milano = [parser.parse(x) for x in x4]
day_asti = [parser.parse(x) for x in x5]
day_torino = [parser.parse(x) for x in x6]
# 调用 subplots() 函数,重新定义 fig, ax 变量
fig, ax = plt.subplots()
plt.xticks(rotation=70)
hours = mdates.DateFormatter('%H:%M')
ax.xaxis.set_major_formatter(hours)
#这里需要画出三根线,所以需要三组参数, 'g'代表'green'
plt.plot(day_ravenna,y1,'r',day_faenza,y2,'r',day_cesena,y3,'r')
plt.plot(day_milano,y4,'g',day_asti,y5,'g',day_torino,y6,'g')
plt.show()

# dist 是一个装城市距离海边距离的列表
dist = [df_ravenna['dist'][0],
df_cesena['dist'][0],
df_faenza['dist'][0],
df_ferrara['dist'][0],
df_bologna['dist'][0],
df_mantova['dist'][0],
df_piacenza['dist'][0],
df_milano['dist'][0],
df_asti['dist'][0],
df_torino['dist'][0]
]
# temp_max 是一个存放每个城市最高温度的列表
temp_max = [df_ravenna['temp'].max(),
df_cesena['temp'].max(),
df_faenza['temp'].max(),
df_ferrara['temp'].max(),
df_bologna['temp'].max(),
df_mantova['temp'].max(),
df_piacenza['temp'].max(),
df_milano['temp'].max(),
df_asti['temp'].max(),
df_torino['temp'].max()
]
# temp_min 是一个存放每个城市最低温度的列表
temp_min = [df_ravenna['temp'].min(),
df_cesena['temp'].min(),
df_faenza['temp'].min(),
df_ferrara['temp'].min(),
df_bologna['temp'].min(),
df_mantova['temp'].min(),
df_piacenza['temp'].min(),
df_milano['temp'].min(),
df_asti['temp'].min(),
df_torino['temp'].min()
]
print(dist)
print(temp_max)
print(temp_min)
fig, ax = plt.subplots()
plt.plot(dist,temp_max,'ro')
plt.show()

下面我们来线性拟合,运行的时间会比较长。
from sklearn.svm import SVR
# dist1是靠近海的城市集合,dist2是远离海洋的城市集合
dist1 = dist[0:5]
dist2 = dist[5:10]
# 改变列表的结构,dist1现在是5个列表的集合
# 之后我们会看到 nbumpy 中 reshape() 函数也有同样的作用
dist1 = [[x] for x in dist1]
dist2 = [[x] for x in dist2]
# temp_max1 是 dist1 中城市的对应最高温度
temp_max1 = temp_max[0:5]
# temp_max2 是 dist2 中城市的对应最高温度
temp_max2 = temp_max[5:10]
# 我们调用SVR函数,在参数中规定了使用线性的拟合函数
# 并且把 C 设为1000来尽量拟合数据(因为不需要精确预测不用担心过拟合)
svr_lin1 = SVR(kernel='linear', C=1e3)
svr_lin2 = SVR(kernel='linear', C=1e3)
# 加入数据,进行拟合(这一步可能会跑很久,大概10多分钟,休息一下:) )
svr_lin1.fit(dist1, temp_max1)
svr_lin2.fit(dist2, temp_max2)
# 关于 reshape 函数,转化形状
xp1 = np.arange(10,100,10).reshape((9,1))
xp2 = np.arange(50,400,50).reshape((7,1))
yp1 = svr_lin1.predict(xp1)
yp2 = svr_lin2.predict(xp2)
# # 画出图像
plt.plot(xp1, yp1, c='b', label='Strong sea effect')
plt.plot(xp2, yp2, c='g', label='Light sea effect')
# 限制了 x 轴的取值范围
ax.set_xlim(0,400)
plt.show()

# 从图上可以见到,离海 60 公里以内,气温上升速度很快,
# 从 28 度陡升至 31 度,随后增速渐趋缓和(如果还继续增长的话),
# 更长的距离才会有小幅上升。这两种趋势可分别用两条直线来表示,
# 直线的表达式为: y = ax + b 其中 a 为斜率,b 为截距。
print(svr_lin1.coef_) #斜率
print(svr_lin1.intercept_) # 截距
print(svr_lin2.coef_)
print(svr_lin2.intercept_)
# 现在将这两条直线的交点作为受海洋影响和不受海洋影响的区域的分界点,
# 或者至少是海洋影响较弱的分界点。
from scipy.optimize import fsolve
# 定义了第一条拟合直线
def line1(x):
a1 = svr_lin1.coef_[0][0]
b1 = svr_lin1.intercept_[0]
return a1*x + b1
# 定义了第二条拟合直线
def line2(x):
a2 = svr_lin2.coef_[0][0]
b2 = svr_lin2.intercept_[0]
return a2*x + b2
# 定义了找到两条直线的交点的 x 坐标的函数
def findIntersection(fun1,fun2,x0):
return fsolve(lambda x : fun1(x) - fun2(x),x0)
result = findIntersection(line1,line2,0.0)
print("[x,y] = [ %d , %d ]" % (result,line1(result)))
# x = [0,10,20, ..., 300]
x = np.linspace(0,300,31)
plt.plot(x,line1(x),x,line2(x),result,line1(result),'ro')
plt.show()
