大数据技术-题库s

更新时间: 试题数量: 购买人数: 提供作者:

有效期: 个月

章节介绍: 共有个章节

收藏
搜索
题库预览
利用模型进行数据判断 #导入Python库 import matplotlib.pyplot as plt ____ plt.rcParams['axes.unicode_minus'] = False ____ from statsmodels.tsa.stattools import adfuller as ADF from statsmodels.stats.diagnostic import acorr_ljungbox from statsmodels.tsa.arima.model import ARIMA from statsmodels.graphics.api import qqplot import sys import numpy as np import warnings warnings.filterwarnings('ignore') #查看数据源 ____ print(df) #设置行索引 df['日期'] = df['日期'].astype(str) df['日期'] = pd.to_datetime(df['日期']) ____ df.index = df['日期'] y_data = df['资金流入'] #平稳性检验 fig = plt.figure(figsize=[18, 9]) y_data.plot(color='#e98e95', title='资金流入序列图') plt.xlabel("日期") plt.ylabel("资金流入") plt.legend(loc="best") ____ fig.savefig('数据时序图.png') adf_res = ADF(y_data) print(adf_res) critical_value_1percent = adf_res[4]['1%'] if adf_res[0] < critical_value_1percent: d = 0 y_data_diff = y_data else: d = 2 y_data_diff = y_data.diff(1).diff(1).dropna() #白噪声检验 acr = acorr_ljungbox(y_data_diff, lags=1) ____ print(acr) #模型构建 if acr['lb_pvalue'].values[0] < 0.05: bicValue_Dif = sys.maxsize p_Dif = 0 q_Dif = 0 p_max = 6 q_max = 6 for p in range(0, p_max): for q in range(0, q_max): try: model = ARIMA(y_data, order=(p, d, q)).fit() bicValue = model.bic if bicValue <= bicValue_Dif: bicValue_Dif = bicValue p_Dif = p q_Dif = q except: continue print('d=', d) print('p=', p_Dif) print('q=', q_Dif) model = ARIMA(y_data, order=(p_Dif, d, q_Dif)) ar_model = model.fit() resid_data = ar_model.resid residuals = pd.DataFrame(resid_data) print(residuals.head()) fig = plt.figure(figsize=[16, 14]) ____ residuals.plot(ax=ax1) plt.title('残差时序图') ax2 = plt.subplot(312) residuals.plot(kind='kde', ax=ax2) ____ ax3 = plt.subplot(313) qqplot(residuals, line='q', ax=ax3, fit=True) plt.title('QQ图') plt.savefig('残差检验结果.png') plt.show() sim_data = ar_model.predict(1, len(y_data)) df_sim = pd.DataFrame(sim_data.values, columns=['模拟值']) df_sim.index = y_data.index df_concat = pd.concat([y_data, df_sim], axis=1) print(df_concat.head()) plt.figure(figsize=[18, 9]) df_concat[['资金流入', '模拟值']].plot(figsize=(12, 8)) ____ plt.ylabel("资金流入") plt.legend(loc="best") plt.show() plt.savefig('模型拟合图.png') pre_days = 7 pre_data = ar_model.forecast(pre_days) pre_data.index = np.arange(0, len(pre_data)) print(pre_data) plt.figure(figsize=[18, 9]) pre_data.plot(color='#e98e95', title='资金流入预测', label="资金流入预测值", linewidth=2, marker='o') plt.ylabel("资金流入数据") plt.legend(loc="best") plt.show() ____ pre_df = pd.DataFrame({'预测数据': pre_data.values}) pre_df['预测数据'] = pre_df['预测数据'].astype(int) pre_df.to_excel('模型预测结果.xlsx', index=False, encoding='utf-8-sig') print(pre_df) else: print('该数据是白噪声,数据没有规律,算法运行结束!')(含图)(含图)