A small, complete example of the issue

import numpy as np import pandas as pd import timeit from sklearn import preprocessing import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.ensemble import RandomForestClassifier from sklearn.cross_validation import train_test_split from sklearn.metrics import accuracy_score from sklearn.metrics import classification_report,roc_auc_score from sklearn.externals import joblib from sklearn.metrics import (precision_score, recall_score,f1_score) from sklearn.metrics import confusion_matrix import os import psutil from sklearn.preprocessing import Imputer

start_time = timeit.default_timer() np.set_printoptions(suppress=True)

df=pd.read_csv('/Users/sudarshan/Desktop/masterproject/SampleDatasets/dataset.csv') print (df.isnull().any()) df.dropna(inplace=True) print (df.isnull().any())

df['flag'] = df['A'].str.extract('([A])', expand=True)

just_dummies = pd.Series(df['flags']) values = just_dummies.str.get_dummies(sep='|')

s = df['flags'].apply(pd.Series)

dfMod= pd.concat([df,pd.DataFrame(s.values.tolist())], axis=1)

dfMod = pd.concat([df, values], axis=1)

print dfMod.head() print(dfMod.columns.tolist())

dfMod['A'] = dfMod['A'].astype(float) dfMod['F'] = dfMod['F'].astype(float) dfMod['P'] = dfMod['P'].astype(float) dfMod['R'] = dfMod['R'].astype(float) dfMod['S'] = dfMod['S'].astype(float)

print(dfMod.columns.tolist()) X = df.as_matrix(columns=['packets','bytes','duration','A','F','P','R','S'])

Expected Output

output should be if the flag is having a value of FPA it should be processed as separate,like 'F','P','A'

Output of pd.show_versions()

# Paste the output here pd.show_versions() here

Comment From: jreback

this is not reproducible make s small example that is minimal meaning you construct a representative frame then show the actual error

Comment From: jreback

you are better off asking this on SO, or pls post a self-reproducible example.