[네이버클라우드] AIaaS 개발자 과정/AI

[네이버클라우드캠프] 2023.5.9 AI(2) - 실습 2

_꼬마돌 2023. 5. 9. 17:43
반응형

실습 2

tf07_california.py

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
# from sklearn.datasets import load_boston -> 윤리적인 문제로 제공 안 됨
from sklearn.datasets import fetch_california_housing

# 1. 데이터
# datasets = load_boston()
datasets = fetch_california_housing()
x = datasets.data
y = datasets.target

print(datasets.feature_names)
# ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
# print(datasets.DESCR)
# # :속성 정보:
# - 블럭 그룹의 중위수 소득
# - 블럭 그룹의 주택 연령 중위수
# - 가구당 평균 객실 수
# - 평균 가구당 침실 수
# - 모집단 블럭 그룹 모집단
# - 평균 가구원수
# - Latitude 블록 그룹 위도
# - 경도 블록 그룹 경도

print(x.shape)  # (20640, 8)
print(y.shape)  # (20640,)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=128, shuffle=True
)
print(x_train.shape)    # (14447, 8)
print(y_train.shape)    # (14447,)

# 2. 모델구성
model = Sequential()
model.add(Dense(14, input_dim=8))
model.add(Dense(64))
model.add(Dense(200))
model.add(Dense(256))
model.add(Dense(200))
model.add(Dense(1))


# 3. 컴파일, 훈련
model.compile(loss='mse', optimizer='adam')
model.fit(x_train, y_train, epochs=500, batch_size=300)

# 4. 평가, 예측
loss = model.evaluate(x_test, y_test)
print('loss : ', loss)

y_predict = model.predict(x_test)

r2 = r2_score(y_test, y_predict)
print('r2score : ', r2)

# 결과
# loss :  0.6773682832717896
# r2score :  0.4803216875343258

 

tf08_california_activation.py

#[실습] activation 함수를 사용하여 성능을 향상시키기
# activation='relu'

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
# from sklearn.datasets import load_boston -> 윤리적인 문제로 제공 안 됨
from sklearn.datasets import fetch_california_housing

# 1. 데이터
# datasets = load_boston()
datasets = fetch_california_housing()
x = datasets.data
y = datasets.target

print(datasets.feature_names)
# ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
# print(datasets.DESCR)
# # :속성 정보:
# - 블럭 그룹의 중위수 소득
# - 블럭 그룹의 주택 연령 중위수
# - 가구당 평균 객실 수
# - 평균 가구당 침실 수
# - 모집단 블럭 그룹 모집단
# - 평균 가구원수
# - Latitude 블록 그룹 위도
# - 경도 블록 그룹 경도

print(x.shape)  # (20640, 8)
print(y.shape)  # (20640,)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=128, shuffle=True
)
print(x_train.shape)    # (14447, 8)
print(y_train.shape)    # (14447,)

# 2. 모델구성
model = Sequential()
model.add(Dense(32, activation='linear', input_dim=8))
model.add(Dense(64, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(34, activation='relu'))
model.add(Dense(1, activation='linear'))    # 회귀모델에서 인풋과 아웃풋 활성화 함수는 'linear'

# 3. 컴파일, 훈련
model.compile(loss='mse', optimizer='adam')
model.fit(x_train, y_train, epochs=1000, batch_size=256)

# 4. 평가, 예측
loss = model.evaluate(x_test, y_test)
print('loss : ', loss)

y_predict = model.predict(x_test)

r2 = r2_score(y_test, y_predict)
print('r2score : ', r2)

# result
# r2score : 0.6456720942721941
# 모든 히든레이어에 activation='relu'를 사용하면 성능이 향상됨.

 

tf09_cancer_sigmoid.py

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score
from sklearn.datasets import load_breast_cancer
import time

# 1. data
datasets = load_breast_cancer()
print(datasets.DESCR)
print(datasets.feature_names)

x = datasets.data
y = datasets.target 
print(x.shape, y.shape) # (569, 30) (569,)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)

# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=30, activation='linear'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='sigmoid'))   # 2진분류는 무조건 아웃풋 레이어의 활성함수를 'sigmoid'로 해줘햐 한다. 

# 3. 컴파일, 훈련
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mse', 'accuracy'])

start_time = time.time()
model.fit(x_train, y_train, epochs=100, batch_size=200, verbose=3)  # verbos=0,1,2,3 으로 작업 상태 표시를 제어할 수 있음.
end_time = time.time() - start_time

# 4. 평가, 예측
loss = model.evaluate(x_test, y_test)
y_predict = model.predict(x_test)


# [실습] accuracy_score를 출력하라!!!
# y_predict 반올림하기. 두 가지 방법이 있음.
# 1. y_predict = np.where(y_predict > 0.5, 1, 0)
# 2. y_predict = np.round(y_predict)

y_predict = np.where(y_predict > 0.5, 1, 0)
acc = accuracy_score(y_test, y_predict)
print('loss : ', loss)
print('acc : ', acc)    

print('걸린 시간: ', end_time)
# acc :  0.9239766081871345
# loss :  [0.16830424964427948, 0.050364427268505096, 0.9239766001701355]
# 걸린시간 : 1.110365390777588

 

tf10_iris_onehotencoding.py

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import time

# 1. 데이터 구성
datasets = load_iris()
print(datasets.DESCR)
print(datasets.feature_names)
# ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

x = datasets.data
# 위랑 동일한 뜻. x = datasets['data']
y = datasets.target
print(x.shape, y.shape) # (150, 4) (150,)

### one_hot_encoding ###
from keras.utils import to_categorical
y = to_categorical(y)
print(y)
print(y.shape)  # (150, 3)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) #(105, 4) (105,)
print(x_test.shape, y_test.shape)   # (45, 4) (45,)
print(y_test)


# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=4))
model.add(Dense(64))
model.add(Dense(128))
model.add(Dense(256))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax'))   # 다중분류는 소프트맥스

# 3. 컴파일, 훈련
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=500, batch_size=100)
end_time = time.time() - start_time
print('time spent : ', end_time)

# 4. 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)
# time spent :  3.028395891189575
# loss : 0.039043981581926346
# acc :  0.9777777791023254

 

tf10_iris_softmax.py

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import time

# 1. 데이터 구성
datasets = load_iris()
print(datasets.DESCR)
print(datasets.feature_names)
# ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

x = datasets.data
# 위랑 동일한 뜻. x = datasets['data']
y = datasets.target
print(x.shape, y.shape) # (150, 4) (150,)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) #(105, 4) (105,)
print(x_test.shape, y_test.shape)   # (45, 4) (45,)
print(y_test)

# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=4))
model.add(Dense(64))
model.add(Dense(128))
model.add(Dense(256))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax'))   # 다중분류는 소프트맥스

# 3. 컴파일, 훈련
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=500, batch_size=100)
end_time = time.time() - start_time
print('time spent : ', end_time)

# 4. 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)
# time spent :  2.917778730392456
# loss :  0.018691886216402054
# acc :  1.0   1.0나오면 과적합일 가능성이 높다.

 

tf11_wine_onehotencoding.py

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine
import time

# 1. 데이터 구성
datasets = load_wine()
print(datasets.DESCR)
print(datasets.feature_names)

x = datasets.data
y = datasets.target
print(x.shape, y.shape) # (178, 13) (178,)

### one_hot_encoding ###
from keras.utils import to_categorical
y = to_categorical(y)
print(y)
print(y.shape)          # (178, 3)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) # (124, 13) (124, 3)
print(x_test.shape, y_test.shape)   # (54, 13) (54, 3)

# 2. 모델 구성
model = Sequential()
model.add(Dense(100, input_dim=13))
model.add(Dense(64))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax'))

# 3. 컴파일, 훈련
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=500, batch_size=100)
end_time = time.time() - start_time
print('time spent : ', end_time)

# 4 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)

# time spent :  2.9441092014312744
# loss :  0.23444879055023193
# acc :  0.9259259104728699

 

tf11_wine_softmax.py

import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine
import time

# 1. 데이터 구성
datasets = load_wine()
print(datasets.DESCR)       # class 확인할 것. class의 개수가 output 개수.
print(datasets.feature_names)

x = datasets.data
y = datasets.target
print(x.shape, y.shape) # (178, 13) (178,)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, train_size=0.7, random_state=100, shuffle=True
)
print(x_train.shape, y_train.shape) # (124, 13) (124,)
print(x_test.shape, y_test.shape)   # (54, 13) (54,)

# 2. 모델 구성
model = Sequential()
model.add(Dense(64, input_dim=13))
model.add(Dense(128))
model.add(Dense(256))
model.add(Dense(128))
model.add(Dense(64))
model.add(Dense(3, activation='softmax'))

# 3. 컴파일, 훈련
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
start_time = time.time()
model.fit(x_train, y_train, epochs=200, batch_size=200)
end_time = time.time() - start_time
print('time spent : ', end_time)

# 4. 평가, 예측
loss, acc = model.evaluate(x_test, y_test)
print('loss : ', loss)
print('acc : ', acc)

# time spent : 1.2617859840393066
# loss :  0.23711225390434265
# acc :  0.9259259104728699

반응형