In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
# 당뇨병 환자의 데이터
diabetes = datasets.load_diabetes()
In [20]:
diabetes.data.shape , diabetes.data , len(diabetes.data)
Out[20]:
((442, 10), array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226, 0.01990842, -0.01764613], [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338, -0.06832974, -0.09220405], [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226, 0.00286377, -0.02593034], ..., [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952, -0.04687948, 0.01549073], [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962, 0.04452837, -0.02593034], [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338, -0.00421986, 0.00306441]]), 442)
In [21]:
diabetes_X = diabetes.data[:, 2].reshape(len(diabetes.data),1)
In [22]:
diabetes_X.shape , diabetes_X
Out[22]:
((442, 1), array([[ 0.06169621], [-0.05147406], [ 0.04445121], [-0.01159501], [-0.03638469], [-0.04069594], [-0.04716281], [-0.00189471], [ 0.06169621], [ 0.03906215], [-0.08380842], [ 0.01750591], [-0.02884001], [-0.00189471], [-0.02560657], [-0.01806189], [ 0.04229559], [ 0.01211685], [-0.0105172 ], [-0.01806189], [-0.05686312], [-0.02237314], [-0.00405033], [ 0.06061839], [ 0.03582872], [-0.01267283], [-0.07734155], [ 0.05954058], [-0.02129532], [-0.00620595], [ 0.04445121], [-0.06548562], [ 0.12528712], [-0.05039625], [-0.06332999], [-0.03099563], [ 0.02289497], [ 0.01103904], [ 0.07139652], [ 0.01427248], [-0.00836158], [-0.06764124], [-0.0105172 ], [-0.02345095], [ 0.06816308], [-0.03530688], [-0.01159501], [-0.0730303 ], [-0.04177375], [ 0.01427248], [-0.00728377], [ 0.0164281 ], [-0.00943939], [-0.01590626], [ 0.0250506 ], [-0.04931844], [ 0.04121778], [-0.06332999], [-0.06440781], [-0.02560657], [-0.00405033], [ 0.00457217], [-0.00728377], [-0.0374625 ], [-0.02560657], [-0.02452876], [-0.01806189], [-0.01482845], [-0.02991782], [-0.046085 ], [-0.06979687], [ 0.03367309], [-0.00405033], [-0.02021751], [ 0.00241654], [-0.03099563], [ 0.02828403], [-0.03638469], [-0.05794093], [-0.0374625 ], [ 0.01211685], [-0.02237314], [-0.03530688], [ 0.00996123], [-0.03961813], [ 0.07139652], [-0.07518593], [-0.00620595], [-0.04069594], [-0.04824063], [-0.02560657], [ 0.0519959 ], [ 0.00457217], [-0.06440781], [-0.01698407], [-0.05794093], [ 0.00996123], [ 0.08864151], [-0.00512814], [-0.06440781], [ 0.01750591], [-0.04500719], [ 0.02828403], [ 0.04121778], [ 0.06492964], [-0.03207344], [-0.07626374], [ 0.04984027], [ 0.04552903], [-0.00943939], [-0.03207344], [ 0.00457217], [ 0.02073935], [ 0.01427248], [ 0.11019775], [ 0.00133873], [ 0.05846277], [-0.02129532], [-0.0105172 ], [-0.04716281], [ 0.00457217], [ 0.01750591], [ 0.08109682], [ 0.0347509 ], [ 0.02397278], [-0.00836158], [-0.06117437], [-0.00189471], [-0.06225218], [ 0.0164281 ], [ 0.09618619], [-0.06979687], [-0.02129532], [-0.05362969], [ 0.0433734 ], [ 0.05630715], [-0.0816528 ], [ 0.04984027], [ 0.11127556], [ 0.06169621], [ 0.01427248], [ 0.04768465], [ 0.01211685], [ 0.00564998], [ 0.04660684], [ 0.12852056], [ 0.05954058], [ 0.09295276], [ 0.01535029], [-0.00512814], [ 0.0703187 ], [-0.00405033], [-0.00081689], [-0.04392938], [ 0.02073935], [ 0.06061839], [-0.0105172 ], [-0.03315126], [-0.06548562], [ 0.0433734 ], [-0.06225218], [ 0.06385183], [ 0.03043966], [ 0.07247433], [-0.0191397 ], [-0.06656343], [-0.06009656], [ 0.06924089], [ 0.05954058], [-0.02668438], [-0.02021751], [-0.046085 ], [ 0.07139652], [-0.07949718], [ 0.00996123], [-0.03854032], [ 0.01966154], [ 0.02720622], [-0.00836158], [-0.01590626], [ 0.00457217], [-0.04285156], [ 0.00564998], [-0.03530688], [ 0.02397278], [-0.01806189], [ 0.04229559], [-0.0547075 ], [-0.00297252], [-0.06656343], [-0.01267283], [-0.04177375], [-0.03099563], [-0.00512814], [-0.05901875], [ 0.0250506 ], [-0.046085 ], [ 0.00349435], [ 0.05415152], [-0.04500719], [-0.05794093], [-0.05578531], [ 0.00133873], [ 0.03043966], [ 0.00672779], [ 0.04660684], [ 0.02612841], [ 0.04552903], [ 0.04013997], [-0.01806189], [ 0.01427248], [ 0.03690653], [ 0.00349435], [-0.07087468], [-0.03315126], [ 0.09403057], [ 0.03582872], [ 0.03151747], [-0.06548562], [-0.04177375], [-0.03961813], [-0.03854032], [-0.02560657], [-0.02345095], [-0.06656343], [ 0.03259528], [-0.046085 ], [-0.02991782], [-0.01267283], [-0.01590626], [ 0.07139652], [-0.03099563], [ 0.00026092], [ 0.03690653], [ 0.03906215], [-0.01482845], [ 0.00672779], [-0.06871905], [-0.00943939], [ 0.01966154], [ 0.07462995], [-0.00836158], [-0.02345095], [-0.046085 ], [ 0.05415152], [-0.03530688], [-0.03207344], [-0.0816528 ], [ 0.04768465], [ 0.06061839], [ 0.05630715], [ 0.09834182], [ 0.05954058], [ 0.03367309], [ 0.05630715], [-0.06548562], [ 0.16085492], [-0.05578531], [-0.02452876], [-0.03638469], [-0.00836158], [-0.04177375], [ 0.12744274], [-0.07734155], [ 0.02828403], [-0.02560657], [-0.06225218], [-0.00081689], [ 0.08864151], [-0.03207344], [ 0.03043966], [ 0.00888341], [ 0.00672779], [-0.02021751], [-0.02452876], [-0.01159501], [ 0.02612841], [-0.05901875], [-0.03638469], [-0.02452876], [ 0.01858372], [-0.0902753 ], [-0.00512814], [-0.05255187], [-0.02237314], [-0.02021751], [-0.0547075 ], [-0.00620595], [-0.01698407], [ 0.05522933], [ 0.07678558], [ 0.01858372], [-0.02237314], [ 0.09295276], [-0.03099563], [ 0.03906215], [-0.06117437], [-0.00836158], [-0.0374625 ], [-0.01375064], [ 0.07355214], [-0.02452876], [ 0.03367309], [ 0.0347509 ], [-0.03854032], [-0.03961813], [-0.00189471], [-0.03099563], [-0.046085 ], [ 0.00133873], [ 0.06492964], [ 0.04013997], [-0.02345095], [ 0.05307371], [ 0.04013997], [-0.02021751], [ 0.01427248], [-0.03422907], [ 0.00672779], [ 0.00457217], [ 0.03043966], [ 0.0519959 ], [ 0.06169621], [-0.00728377], [ 0.00564998], [ 0.05415152], [-0.00836158], [ 0.114509 ], [ 0.06708527], [-0.05578531], [ 0.03043966], [-0.02560657], [ 0.10480869], [-0.00620595], [-0.04716281], [-0.04824063], [ 0.08540807], [-0.01267283], [-0.03315126], [-0.00728377], [-0.01375064], [ 0.05954058], [ 0.02181716], [ 0.01858372], [-0.01159501], [-0.00297252], [ 0.01750591], [-0.02991782], [-0.02021751], [-0.05794093], [ 0.06061839], [-0.04069594], [-0.07195249], [-0.05578531], [ 0.04552903], [-0.00943939], [-0.03315126], [ 0.04984027], [-0.08488624], [ 0.00564998], [ 0.02073935], [-0.00728377], [ 0.10480869], [-0.02452876], [-0.00620595], [-0.03854032], [ 0.13714305], [ 0.17055523], [ 0.00241654], [ 0.03798434], [-0.05794093], [-0.00943939], [-0.02345095], [-0.0105172 ], [-0.03422907], [-0.00297252], [ 0.06816308], [ 0.00996123], [ 0.00241654], [-0.03854032], [ 0.02612841], [-0.08919748], [ 0.06061839], [-0.02884001], [-0.02991782], [-0.0191397 ], [-0.04069594], [ 0.01535029], [-0.02452876], [ 0.00133873], [ 0.06924089], [-0.06979687], [-0.02991782], [-0.046085 ], [ 0.01858372], [ 0.00133873], [-0.03099563], [-0.00405033], [ 0.01535029], [ 0.02289497], [ 0.04552903], [-0.04500719], [-0.03315126], [ 0.097264 ], [ 0.05415152], [ 0.12313149], [-0.08057499], [ 0.09295276], [-0.05039625], [-0.01159501], [-0.0277622 ], [ 0.05846277], [ 0.08540807], [-0.00081689], [ 0.00672779], [ 0.00888341], [ 0.08001901], [ 0.07139652], [-0.02452876], [-0.0547075 ], [-0.03638469], [ 0.0164281 ], [ 0.07786339], [-0.03961813], [ 0.01103904], [-0.04069594], [-0.03422907], [ 0.00564998], [ 0.08864151], [-0.03315126], [-0.05686312], [-0.03099563], [ 0.05522933], [-0.06009656], [ 0.00133873], [-0.02345095], [-0.07410811], [ 0.01966154], [-0.01590626], [-0.01590626], [ 0.03906215], [-0.0730303 ]]))
In [29]:
# 트레이닝 데이터와 테스트 데이터를 나눔.
diabetes_X_train = diabetes_X[:-20] #앞의 전체-20개
diabetes_X_test = diabetes_X[-20:] #뒤의 20개
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]
In [ ]:
#위와 같이 데이터가 준비가 되었습니다.
#그러면 모델을 만들고 훈련을 시켜봅시다. 어떻게 하면 될까요?
In [38]:
#모델을 만듭니다.
regr = linear_model.LinearRegression()
In [40]:
#모델을 훈련을 시킵니다.
regr.fit(diabetes_X_train, diabetes_y_train)
Out[40]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
In [43]:
#예측은 어떻게 하면 될까요?
In [44]:
diabetes_y_pred = regr.predict(diabetes_X_test)
diabetes_y_pred
Out[44]:
array([225.9732401 , 115.74763374, 163.27610621, 114.73638965, 120.80385422, 158.21988574, 236.08568105, 121.81509832, 99.56772822, 123.83758651, 204.73711411, 96.53399594, 154.17490936, 130.91629517, 83.3878227 , 171.36605897, 137.99500384, 137.99500384, 189.56845268, 84.3990668 ])
In [45]:
print('Coefficients: \n', regr.coef_)
Coefficients: [938.23786125]
In [47]:
#R^2은 어떻게 구하면 될까요?
#다음과 같이 하면 됩니다.
In [48]:
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))
Variance score: 0.47
In [49]:
plt.scatter(diabetes_X_test, diabetes_y_test, color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
In [ ]:
'데이터분석 > 파이썬' 카테고리의 다른 글
의사결정나무 (0) | 2021.08.08 |
---|---|
의사결정 나무 연습문제 (0) | 2021.08.08 |