A small, complete example of the issue
import numpy as np import pandas as pd import timeit from sklearn import preprocessing import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import train_test_split from sklearn.metrics import accuracy_score from sklearn.metrics import classification_report,roc_auc_score from sklearn.externals import joblib from sklearn.metrics import (precision_score, recall_score,f1_score) from sklearn.metrics import confusion_matrix import os import psutil from sklearn.preprocessing import Imputer
start_time = timeit.default_timer() np.set_printoptions(suppress=True)
df=pd.read_csv('/Users/sudarshan/Desktop/masterproject/SampleDatasets/dataset.csv') print (df.isnull().any()) df.dropna(inplace=True) print (df.isnull().any())
df['flag'] = df['A'].str.extract('([A])', expand=True)
just_dummies = pd.Series(df['flags']) values = just_dummies.str.get_dummies(sep='|')
s = df['flags'].apply(pd.Series)
dfMod= pd.concat([df,pd.DataFrame(s.values.tolist())], axis=1)
dfMod = pd.concat([df, values], axis=1)
print dfMod.head() print(dfMod.columns.tolist())
dfMod['A'] = dfMod['A'].astype(float) dfMod['F'] = dfMod['F'].astype(float) dfMod['P'] = dfMod['P'].astype(float) dfMod['R'] = dfMod['R'].astype(float) dfMod['S'] = dfMod['S'].astype(float)
print(dfMod.columns.tolist()) X = df.as_matrix(columns=['packets','bytes','duration','A','F','P','R','S'])
Expected Output
output should be if the flag is having a value of FPA it should be processed as separate,like 'F','P','A'
Output of pd.show_versions()
Comment From: jreback
this is not reproducible make s small example that is minimal meaning you construct a representative frame then show the actual error
Comment From: jreback
you are better off asking this on SO, or pls post a self-reproducible example.