import json import numpy as np import matplotlib.pyplot as plt colorList = json.load(open('color/config.json','r'))["color"] import csv with open('data/passenger.csv', 'r') as f: reader = csv.reader(f) header = next(reader) data = [row for row in reader] data = np.array(data).astype(float).T data[0]=data[0].astype(int) xList=data[1]+data[2]+data[3] yList=data[5] plt.scatter(xList[:6],yList[:6],color=colorList[0]) from scipy.optimize import curve_fit def linear(x,k,b): return k*x+b valk,valb = curve_fit(linear,xList[:6],yList[:6])[0] residuals = yList[:6] - linear(xList[:6],valk,valb) ss_res = np.sum(residuals**2) ss_tot = np.sum((yList[:6]-np.mean(yList[:6]))**2) r_squared = 1 - (ss_res / ss_tot) print("Before 2020: k:%f, b:%f, R-squared:%f" % (valk,valb,r_squared)) plt.plot(np.arange(0,2000000,1000),linear(np.arange(0,2000000,1000),valk,valb),color=colorList[2],label='before 2020') plt.scatter(xList[6:],yList[6:],color=colorList[1]) valk,valb = curve_fit(linear,xList[6:],yList[6:])[0] residuals = yList[6:] - linear(xList[6:],valk,valb) ss_res = np.sum(residuals**2) ss_tot = np.sum((yList[6:]-np.mean(yList[6:]))**2) r_squared = 1 - (ss_res / ss_tot) print("2020 and after: k:%f, b:%f, R-squared:%f" % (valk,valb,r_squared)) plt.plot(np.arange(0,2000000,1000),linear(np.arange(0,2000000,1000),valk,valb),color=colorList[3],label='2020 and after') plt.xlabel('Total Passengers') plt.ylabel('Total Revenue') plt.legend() plt.title('Passenger-Revenue Relation') plt.savefig('result/passenger-and-revenue-relation.png',dpi=1024) plt.show()