X_test = pd.read_csv('../input/test.csv', index_col='Id') X = pd.read_csv('../input/train.csv', index_col='Id')
cols_with_missing = [col for col in X.columns if X[col].isnull().any()] X.drop(cols_with_missing, axis=1, inplace=True) object_cols = [col for col in X.columns if X[col].dtype == "object"] low_cardinality_cols = [col for col in object_cols if X[col].nunique() < 10]
cols_with_missing = [col for col in X_test.columns if X_test[col].isnull().any()] X_test.drop(cols_with_missing, axis=1, inplace=True) test_object_cols = [col for col in X_test.columns if X_test[col].dtype == "object"] test_low_cardinality_cols = [col for col in test_object_cols if X_test[col].nunique() < 10]
# "Cardinality" means the number of unique values in a column # Select categorical columns with relatively low cardinality (convenient but arbitrary) categorical_cols = [cname for cname in X_train_full.columns if X_train_full[cname].nunique() < 10and X_train_full[cname].dtype == "object"]
# Select numerical columns numerical_cols = [cname for cname in X_train_full.columns if X_train_full[cname].dtype in ['int64', 'float64']]