반응형
실습 2
tf07_california.py
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
# from sklearn.datasets import load_boston -> 윤리적인 문제로 제공 안 됨
from sklearn.datasets import fetch_california_housing
# 1. 데이터
# datasets = load_boston()
datasets = fetch_california_housing()
x = datasets.data
y = datasets.target
print(datasets.feature_names)
# ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
# print(datasets.DESCR)
# # :속성 정보:
# - 블럭 그룹의 중위수 소득
# - 블럭 그룹의 주택 연령 중위수
# - 가구당 평균 객실 수
# - 평균 가구당 침실 수
# - 모집단 블럭 그룹 모집단
# - 평균 가구원수
# - Latitude 블록 그룹 위도
# - 경도 블록 그룹 경도
print(x.shape) # (20640, 8)
print(y.shape) # (20640,)
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size=0.7, random_state=128, shuffle=True
)
print(x_train.shape) # (14447, 8)
print(y_train.shape) # (14447,)
# 2. 모델구성
model = Sequential()
model.add(Dense(14, input_dim=8))
model.add(Dense(64))
model.add(Dense(200))
model.add(Dense(256))
model.add(Dense(200))
model.add(Dense(1))
# 3. 컴파일, 훈련
model.compile(loss='mse', optimizer='adam')
model.fit(x_train, y_train, epochs=500, batch_size=300)
# 4. 평가, 예측
loss = model.evaluate(x_test, y_test)
print('loss : ', loss)
y_predict = model.predict(x_test)
r2 = r2_score(y_test, y_predict)
print('r2score : ', r2)
# 결과
# loss : 0.6773682832717896
# r2score : 0.4803216875343258
tf08_california_activation.py
#[실습] activation 함수를 사용하여 성능을 향상시키기
# activation='relu'
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
# from sklearn.datasets import load_boston -> 윤리적인 문제로 제공 안 됨
from sklearn.datasets import fetch_california_housing
# 1. 데이터
# datasets = load_boston()
datasets = fetch_california_housing()
x = datasets.data
y = datasets.target
print(datasets.feature_names)
# ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
# print(datasets.DESCR)
# # :속성 정보:
# - 블럭 그룹의 중위수 소득
# - 블럭 그룹의 주택 연령 중위수
# - 가구당 평균 객실 수
# - 평균 가구당 침실 수
# - 모집단 블럭 그룹 모집단
# - 평균 가구원수
# - Latitude 블록 그룹 위도
# - 경도 블록 그룹 경도
print(x.shape) # (20640, 8)
print(y.shape) # (20640,)
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size=0.7, random_state=128, shuffle=True
)
print(x_train.shape) # (14447, 8)
print(y_train.shape) # (14447,)
# 2. 모델구성
model = Sequential()
model.add(Dense(32, activation='linear', input_dim=8))
model.add(Dense(64, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(34, activation='relu'))
model.add(Dense(1, activation='linear')) # 회귀모델에서 인풋과 아웃풋 활성화 함수는 'linear'
# 3. 컴파일, 훈련
model.compile(loss='mse', optimizer='adam')
model.fit(x_train, y_train, epochs=1000, batch_size=256)
# 4. 평가, 예측
loss = model.evaluate(x_test, y_test)
print('loss : ', loss)
y_predict = model.predict(x_test)
r2 = r2_score(y_test, y_predict)
print('r2score : ', r2)
# result
# r2score : 0.6456720942721941
# 모든 히든레이어에 activation='relu'를 사용하면 성능이 향상됨.
tf09_cancer_sigmoid.py
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score
from sklearn.datasets import load_breast_cancer
import time
# 1. data
datasets = load_breast_cancer()
print(datasets.DESCR)
print(datasets.feature_names)
x = datasets.data
y = datasets.target
print(x.shape, y.shape) # (569, 30) (569,)
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size=0.7, random_state=100, shuffle=True
)
# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=30, activation='linear'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='sigmoid')) # 2진분류는 무조건 아웃풋 레이어의 활성함수를 'sigmoid'로 해줘햐 한다.
# 3. 컴파일, 훈련
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mse', 'accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=100, batch_size=200, verbose=3) # verbos=0,1,2,3 으로 작업 상태 표시를 제어할 수 있음.
end_time = time.time() - start_time
# 4. 평가, 예측
loss = model.evaluate(x_test, y_test)
y_predict = model.predict(x_test)
# [실습] accuracy_score를 출력하라!!!
# y_predict 반올림하기. 두 가지 방법이 있음.
# 1. y_predict = np.where(y_predict > 0.5, 1, 0)
# 2. y_predict = np.round(y_predict)
y_predict = np.where(y_predict > 0.5, 1, 0)
acc = accuracy_score(y_test, y_predict)
print('loss : ', loss)
print('acc : ', acc)
print('걸린 시간: ', end_time)
# acc : 0.9239766081871345
# loss : [0.16830424964427948, 0.050364427268505096, 0.9239766001701355]
# 걸린시간 : 1.110365390777588
tf10_iris_onehotencoding.py
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import time
# 1. 데이터 구성
datasets = load_iris()
print(datasets.DESCR)
print(datasets.feature_names)
# ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
x = datasets.data
# 위랑 동일한 뜻. x = datasets['data']
y = datasets.target
print(x.shape, y.shape) # (150, 4) (150,)
### one_hot_encoding ###
from keras.utils import to_categorical
y = to_categorical(y)
print(y)
print(y.shape) # (150, 3)
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) #(105, 4) (105,)
print(x_test.shape, y_test.shape) # (45, 4) (45,)
print(y_test)
# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=4))
model.add(Dense(64))
model.add(Dense(128))
model.add(Dense(256))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax')) # 다중분류는 소프트맥스
# 3. 컴파일, 훈련
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=500, batch_size=100)
end_time = time.time() - start_time
print('time spent : ', end_time)
# 4. 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)
# time spent : 3.028395891189575
# loss : 0.039043981581926346
# acc : 0.9777777791023254
tf10_iris_softmax.py
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import time
# 1. 데이터 구성
datasets = load_iris()
print(datasets.DESCR)
print(datasets.feature_names)
# ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
x = datasets.data
# 위랑 동일한 뜻. x = datasets['data']
y = datasets.target
print(x.shape, y.shape) # (150, 4) (150,)
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) #(105, 4) (105,)
print(x_test.shape, y_test.shape) # (45, 4) (45,)
print(y_test)
# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=4))
model.add(Dense(64))
model.add(Dense(128))
model.add(Dense(256))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax')) # 다중분류는 소프트맥스
# 3. 컴파일, 훈련
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=500, batch_size=100)
end_time = time.time() - start_time
print('time spent : ', end_time)
# 4. 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)
# time spent : 2.917778730392456
# loss : 0.018691886216402054
# acc : 1.0 1.0나오면 과적합일 가능성이 높다.
tf11_wine_onehotencoding.py
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine
import time
# 1. 데이터 구성
datasets = load_wine()
print(datasets.DESCR)
print(datasets.feature_names)
x = datasets.data
y = datasets.target
print(x.shape, y.shape) # (178, 13) (178,)
### one_hot_encoding ###
from keras.utils import to_categorical
y = to_categorical(y)
print(y)
print(y.shape) # (178, 3)
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) # (124, 13) (124, 3)
print(x_test.shape, y_test.shape) # (54, 13) (54, 3)
# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=13))
model.add(Dense(64))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax'))
# 3. 컴파일, 훈련
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=500, batch_size=100)
end_time = time.time() - start_time
print('time spent : ', end_time)
# 4 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)
# time spent : 2.9441092014312744
# loss : 0.23444879055023193
# acc : 0.9259259104728699
tf11_wine_softmax.py
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine
import time
# 1. 데이터 구성
datasets = load_wine()
print(datasets.DESCR) # class 확인할 것. class의 개수가 output 개수.
print(datasets.feature_names)
x = datasets.data
y = datasets.target
print(x.shape, y.shape) # (178, 13) (178,)
x_train, x_test, y_train, y_test = train_test_split(
x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) # (124, 13) (124,)
print(x_test.shape, y_test.shape) # (54, 13) (54,)
# 2. 모델 구성
model = Sequential()
model.add(Dense(64, input_dim=13))
model.add(Dense(128))
model.add(Dense(256))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax'))
# 3. 컴파일, 훈련
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=200, batch_size=200)
end_time = time.time() - start_time
print('time spent : ', end_time)
# 4. 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)
# time spent : 1.2617859840393066
# loss : 0.23711225390434265
# acc : 0.9259259104728699
반응형
'[네이버클라우드] AIaaS 개발자 과정 > AI' 카테고리의 다른 글
[네이버클라우드캠프] 2023.5.9 AI(1) - 실습 1 (0) | 2023.05.09 |
---|---|
[네이버클라우드캠프] 2023.5.8 AI(2) - 파이썬 기초, 딥러닝 다층 퍼셉트론 실습 (0) | 2023.05.08 |
[네이버클라우드캠프] 2023.5.8 AI - 인공지능 개념 정리 (추가) (0) | 2023.05.08 |
[네이버클라우드캠프] 2023.5.8 AI(1) - Anaconda, Visual Studio Code 설치 (0) | 2023.05.08 |