hi,

Code Sample,

        step = 100
        requestlist = []
        headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}

        for i in range(0, len(df), step):

            tmp_df = df.iloc[i:i + step].copy()

            final_json_string = []

            tmp_df.apply(lambda row: row_to_json_str(row), axis=1)

            final_json_string = "\n".join(final_json_string)
            final_json_string += "\n"

            url = f'{ES_URL}/{table_name}/_bulk'
            requestlist.append(
                grequests.post(url, headers=headersdata=final_json_string))
            del tmp_df
            gc.collect()

        r = grequests.imap(requestlist, size=4, exception_handler=exception_handler)

del final_json_string
del requestlist
del df
cnx.close()
gc.collect()

Problem description

os : Ubuntu 16.04.7 LTS Python 3.8.1 (default, Jan 6 2020, 09:57:21) [GCC 5.4.0 20160609] on linux

after migrating from 0.25.3 to 1.1.3 the memory usage increase after each method call

back to 0.25.3 the script the memory usage is stable

the source of dataframe is Pandas.read_sql. the goal of the script is to convert a dataframe to Elasticsearch json request

the loop below isn't exactly the same but the memory profiler give same result : 1669.2 MiB to 6218.0 MiB when for loop comes

memory_usage

108 1669.2 MiB 0.0 MiB list_df = [df[i:i + n] for i in range(0, df.shape[0], n)] 109 110 1669.2 MiB 0.0 MiB requestlist = [] 111 112 1669.2 MiB 0.0 MiB headers = {'Content-type': 'application/json', 'Accept': 'text/plain'} 113 116 6218.0 MiB 0.0 MiB for tmp_df in list_df: 117 6214.4 MiB 0.0 MiB final_json_string = [] 118 119 6215.6 MiB 1.5 MiB tmp_df.apply(lambda row: row_to_json_str(row), axis=1) 120 121 6216.9 MiB 1.3 MiB final_json_string = "\n".join(final_json_string) 122 6218.0 MiB 1.3 MiB final_json_string += "\n" 123 124 6218.0 MiB 0.2 MiB url = f'{ES_URL}/{table_name}/_bulk' 125 127 128 6218.0 MiB 0.0 MiB requestlist.append( 129 6218.0 MiB 0.2 MiB grequests.post(url, headers=headers, data=final_json_string)) 130 132 6218.0 MiB 0.0 MiB grequests.imap(requestlist, size=4, exception_handler=exception_handler) 133 6218.0 MiB 0.0 MiB del tmp_df 134 6218.0 MiB 0.0 MiB del requestlist 135 6218.0 MiB 0.0 MiB del list_df

137 6218.0 MiB 0.0 MiB del df

139 6218.0 MiB 0.0 MiB cnx.close()

thanks

Comment From: mroeschke

Thanks for the report, it appears these are run with very old versions of pandas so closing for now. Can reopen if these also appear with more recent versions of pandas