Code Sample, a copy-pastable example if possible
print(TARGET_TABLE)
print(df_tag2vec.shape)
print(df_tag2vec.head())
# send to bq
df_tag2vec.to_gbq(TARGET_TABLE,
project_id,
private_key = private_key,
if_exists = 'append',
chunksize = 1000
)
Problem description
I am having trouble writing a pandas df to gbq. It gets so far and then i get a generic error such as "GenericGBQException: Reason: backendError, Message: Error encountered during execution. Retrying may solve the problem."
Expected Output
The streaming insert should get to 100%
Actual Output (so you can see a bit about the df i'm trying to send)
variety.tag2vec_model_v001
(39975, 205)
tag dim_0 dim_1 dim_2 dim_3 \
0 writers-guild-of-america-east 0.863149 -2.219846 1.494849 -1.521250
1 john-waters 2.815305 3.936131 -0.984150 1.720128
2 the-great-wall -1.456924 0.426809 -0.832744 -2.163773
3 matt-damon 0.055670 0.387900 -0.587764 -1.748286
4 constance-wu -1.437565 0.193700 -0.183830 3.129335
dim_4 dim_5 dim_6 dim_7 dim_8 ... dim_194 \
0 -2.521716 -2.150109 2.474365 0.418439 0.197788 ... -1.475637
1 1.923637 2.749673 -0.626592 2.727084 1.857024 ... 0.890091
2 1.520479 -0.089576 -0.450388 -0.056411 -1.534561 ... 0.760129
3 -1.150792 -0.570530 -0.513267 1.295110 0.096565 ... -0.044599
4 -0.822471 -1.351726 -0.655856 1.208656 -1.045858 ... 2.419254
dim_195 dim_196 dim_197 dim_198 dim_199 \
0 1.161305 -3.531474 1.789569 1.961487 3.104461
1 0.158052 -2.178421 1.177937 4.073684 -0.685760
2 -2.016968 1.066475 0.422333 2.549898 -1.837330
3 1.312604 -1.465636 0.238204 0.317638 -1.336374
4 -1.924557 0.518352 -3.803247 -0.242159 -2.717843
vec_array \
0 0.86314863,-2.21984625,1.49484944,-1.52124977,-2.52171636,-2.15010858,2.474365,0.41843852,0.19778761,0.93465149,1.53398824,-0.45039997,-0.97009069,0.26170924,0.66139561,2.44006419,-1.49820542,3.79944873,1.29380906,-1.63682914,0.03483771,1.8013476...
1 2.81530476,3.93613124,-0.98414975,1.72012818,1.92363739,2.74967265,-0.6265915,2.7270844,1.85702419,1.18509197,-1.14664268,4.3590126,1.98414719,-0.97125101,-1.44822264,-3.50116491,1.49479496,5.5663414,0.4945443,-2.02598834,-2.03271103,2.18173981,0...
2 -1.45692432e+00,4.26809013e-01,-8.32743526e-01,-2.16377330e+00,1.52047944e+00,-8.95758346e-02,-4.50387537e-01,-5.64112216e-02,-1.53456116e+00,-7.65420198e-01,7.50524223e-01,2.46556687e+00,1.41133189e+00,-3.62224830e-03,5.33745065e-02,5.65747023e-...
3 0.05566961,0.38790002,-0.58776414,-1.74828553,-1.15079153,-0.57053,-0.51326668,1.29510999,0.09656468,-0.2691057,-1.18279386,1.81077254,0.27502298,1.23938048,-0.38538951,0.03872446,1.02271771,0.63706911,0.51867175,-0.71383053,1.07655907,-0.0203045...
4 -1.43756473,0.19370019,-0.18383034,3.1293354,-0.8224709,-1.35172617,-0.65585625,1.20865571,-1.04585755,0.29337004,0.37566027,-2.94096756,-0.33877131,-1.17759991,-2.97414494,-1.1319052,-1.61069989,-0.3649314,0.42119837,0.78121763,-2.99270153,-1.67...
model_info load_job_info train_or_test
0 variety_tag2vec_model_v001 2017-10-12 12:15 train
1 variety_tag2vec_model_v001 2017-10-12 12:15 train
2 variety_tag2vec_model_v001 2017-10-12 12:15 train
3 variety_tag2vec_model_v001 2017-10-12 12:15 train
4 variety_tag2vec_model_v001 2017-10-12 12:15 train
[5 rows x 205 columns]
Streaming Insert is 2.5015634771732334% Complete
Streaming Insert is 5.003126954346467% Complete
Streaming Insert is 7.5046904315197% Complete
---------------------------------------------------------------------------
HttpError Traceback (most recent call last)
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in load_data(self, dataframe, dataset_id, table_id, chunksize)
536 tableId=table_id,
--> 537 body=body).execute()
538 except HttpError as ex:
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\oauth2client\_helpers.py in positional_wrapper(*args, **kwargs)
132 logger.warning(message)
--> 133 return wrapped(*args, **kwargs)
134 return positional_wrapper
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\googleapiclient\http.py in execute(self, http, num_retries)
839 if resp.status >= 300:
--> 840 raise HttpError(resp, content, uri=self.uri)
841 return self.postproc(resp, content)
HttpError: <HttpError 503 when requesting https://www.googleapis.com/bigquery/v2/projects/pmc-analytical-data-mart/datasets/variety/tables/tag2vec_model_v001/insertAll?alt=json returned "Error encountered during execution. Retrying may solve the problem.">
During handling of the above exception, another exception occurred:
GenericGBQException Traceback (most recent call last)
<ipython-input-37-4bb14368469e> in <module>()
10 private_key = private_key,
11 if_exists = 'append',
---> 12 chunksize = 1000
13 )
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas\core\frame.py in to_gbq(self, destination_table, project_id, chunksize, verbose, reauth, if_exists, private_key)
955 return gbq.to_gbq(self, destination_table, project_id=project_id,
956 chunksize=chunksize, verbose=verbose, reauth=reauth,
--> 957 if_exists=if_exists, private_key=private_key)
958
959 @classmethod
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas\io\gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, verbose, reauth, if_exists, private_key)
107 chunksize=chunksize,
108 verbose=verbose, reauth=reauth,
--> 109 if_exists=if_exists, private_key=private_key)
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, verbose, reauth, if_exists, private_key)
852 table.create(table_id, table_schema)
853
--> 854 connector.load_data(dataframe, dataset_id, table_id, chunksize)
855
856
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in load_data(self, dataframe, dataset_id, table_id, chunksize)
537 body=body).execute()
538 except HttpError as ex:
--> 539 self.process_http_error(ex)
540
541 # For streaming inserts, even if you receive a success HTTP
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in process_http_error(ex)
350
351 raise GenericGBQException(
--> 352 "Reason: {0}, Message: {1}".format(reason, message))
353
354 raise GenericGBQException(errors)
GenericGBQException: Reason: backendError, Message: Error encountered during execution. Retrying may solve the problem.
Output of pd.show_versions()
Comment From: andrewm4894
I've resorted to just sending smaller samples each time but it's still on and off in terms of failing on even the small samples quite often.
For example:
# get target table
TARGET_TABLE = MY_LOB + '.' + MY_MODEL_NAME
print(TARGET_TABLE)
# TODO - find more reliable way to load pd df into bq
for i in range(0,5):
# use repeated sampling to get all data in if keeps failing
df_tag2vec_sample = df_tag2vec.sample(frac=0.05, replace=False, random_state=np.random.choice(1000))
# send to bq
df_tag2vec_sample.to_gbq(TARGET_TABLE,
project_id,
private_key = private_key,
if_exists = 'append',
chunksize = 1000
)
# send to bq
#df_tag2vec.to_gbq(TARGET_TABLE,
# project_id,
# private_key = private_key,
# if_exists = 'append',
# chunksize = 1000
# )
Gives me:
variety.tag2vec_model_v001
Streaming Insert is 50.02501250625313% Complete
Streaming Insert is 100.0% Complete
Streaming Insert is 50.02501250625313% Complete
Streaming Insert is 100.0% Complete
---------------------------------------------------------------------------
HttpError Traceback (most recent call last)
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in load_data(self, dataframe, dataset_id, table_id, chunksize)
536 tableId=table_id,
--> 537 body=body).execute()
538 except HttpError as ex:
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\oauth2client\_helpers.py in positional_wrapper(*args, **kwargs)
132 logger.warning(message)
--> 133 return wrapped(*args, **kwargs)
134 return positional_wrapper
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\googleapiclient\http.py in execute(self, http, num_retries)
839 if resp.status >= 300:
--> 840 raise HttpError(resp, content, uri=self.uri)
841 return self.postproc(resp, content)
HttpError: <HttpError 503 when requesting https://www.googleapis.com/bigquery/v2/projects/pmc-analytical-data-mart/datasets/variety/tables/tag2vec_model_v001/insertAll?alt=json returned "Error encountered during execution. Retrying may solve the problem.">
During handling of the above exception, another exception occurred:
GenericGBQException Traceback (most recent call last)
<ipython-input-34-48e5a58267c2> in <module>()
12 private_key = private_key,
13 if_exists = 'append',
---> 14 chunksize = 1000
15 )
16
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas\core\frame.py in to_gbq(self, destination_table, project_id, chunksize, verbose, reauth, if_exists, private_key)
955 return gbq.to_gbq(self, destination_table, project_id=project_id,
956 chunksize=chunksize, verbose=verbose, reauth=reauth,
--> 957 if_exists=if_exists, private_key=private_key)
958
959 @classmethod
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas\io\gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, verbose, reauth, if_exists, private_key)
107 chunksize=chunksize,
108 verbose=verbose, reauth=reauth,
--> 109 if_exists=if_exists, private_key=private_key)
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in to_gbq(dataframe, destination_table, project_id, chunksize, verbose, reauth, if_exists, private_key)
852 table.create(table_id, table_schema)
853
--> 854 connector.load_data(dataframe, dataset_id, table_id, chunksize)
855
856
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in load_data(self, dataframe, dataset_id, table_id, chunksize)
537 body=body).execute()
538 except HttpError as ex:
--> 539 self.process_http_error(ex)
540
541 # For streaming inserts, even if you receive a success HTTP
c:\users\andrew\appdata\local\programs\python\python36\lib\site-packages\pandas_gbq\gbq.py in process_http_error(ex)
350
351 raise GenericGBQException(
--> 352 "Reason: {0}, Message: {1}".format(reason, message))
353
354 raise GenericGBQException(errors)
GenericGBQException: Reason: backendError, Message: Error encountered during execution. Retrying may solve the problem.
So it worked once or twice and then hit the generic error.
Not sure what might be going on as nothing too special about my df - a little wide but not massive or anything.
Any help or ideas much appreciated as love the gbq part of the library (have had no trouble using it to read - so long as not too massive a table), But would love to use it 100% for write also.
Comment From: jreback
support has long ago moved to: https://github.com/pydata/pandas-gbq
Comment From: jorisvandenbossche
@andrewm4894 in other words, please open an issue at the issue tracker of https://github.com/pydata/pandas-gbq