import pandas as pd
import numpy as np
import tensorflow as tf
FEATURES = []
FEAT_CONT=["LotFrontage", "LotArea", "OverallQual", "OverallCond", "YearBuilt", "YearRemodAdd", "MasVnrArea", "TotalBsmtSF",
"1stFlrSF", "2ndFlrSF", "LowQualFinSF", "GrLivArea", "BsmtFullBath", "BsmtHalfBath", "FullBath", "HalfBath",
"TotRmsAbvGrd", "Fireplaces", "GarageYrBlt", "GarageCars", "GarageArea", "WoodDeckSF", "OpenPorchSF", "EnclosedPorch", "3SsnPorch",
"ScreenPorch", "PoolArea", "MiscVal"]
FEAT_CAT = [ "MSSubClass", "MSZoning", "Street", "Alley", "LotShape", "LandContour", "Utilities", "LotConfig", "LandSlope", "Neighborhood",
"Condition1", "Condition2", "BldgType", "HouseStyle", "RoofStyle", "RoofMatl", "Exterior1st", "Exterior2nd",
"MasVnrType", "ExterQual", "ExterCond", "Foundation", "Heating", "HeatingQC", "CentralAir", "Electrical", "KitchenQual", "Functional",
"FireplaceQu", "GarageType", "GarageFinish", "GarageQual", "GarageCond", "PavedDrive", "PoolQC", "Fence", "MiscFeature",
"SaleType", "SaleCondition"]
def get_input_fn(data_set, num_epochs=None, shuffle=True):
return tf.compat.v1.estimator.inputs.pandas_input_fn(
x=pd.DataFrame({k: data_set[k].values for k in FEATURES}),
y=pd.Series(data_set["SalePrice"].values),
num_epochs=num_epochs,
shuffle=shuffle)
prediction_set = pd.read_csv("test.csv")
prediction_set.dropna(how='all', axis=1,inplace=True)
prediction_set.fillna(0,inplace = True)
prediction_set = prediction_set.drop(["Id"],axis=1)
prediction_set["SalePrice"]=0.0
for i in FEAT_CAT:
prediction_set[i][prediction_set[i]==0]="0"
training_set = pd.read_csv("train.csv")
training_set.dropna(how='all', axis=1,inplace=True)
training_set.fillna(0,inplace = True)
training_set = training_set.drop(["Id"],axis=1)
for i in FEAT_CAT:
training_set[i][training_set[i]==0]="0"
test_set = training_set.iloc[1400:,:]
training_set = training_set.iloc[:1400,:]
FEATURES = FEAT_CONT + FEAT_CAT
feature_cols = [tf.feature_column.numeric_column(k) for k in FEAT_CONT]
feature_cat = []
for i in FEAT_CAT:
data = training_set[i].values.tolist()
data = set(data)
feature_cat.append(tf.feature_column.embedding_column(tf.feature_column.categorical_column_with_vocabulary_list(key=i,vocabulary_list=tuple(data)),16 ))
numN = len(FEATURES)
regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, hidden_units=[300,150,75,30], model_dir=None)
regressor.train(input_fn=get_input_fn(training_set), steps=5000)
ev = regressor.evaluate(input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False))
loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))
y = regressor.predict(input_fn=get_input_fn(test_set, num_epochs=1, shuffle=False))
pred = []
for i in y:
pred.append((i["predictions"][0]))
test_result = test_set["SalePrice"].values.tolist()
for i in range(len(pred)):
print(i,pred[i],test_result[i])
y = regressor.predict(input_fn=get_input_fn(prediction_set, num_epochs=1, shuffle=False))
pred = []
for i in y:
pred.append((i["predictions"][0]))
id = 1460
with open("output.csv","w") as f:
for i in range(len(pred)):
line = str(id+i+1)+","+str(pred[i]) +"\n"
f.write(line)