Code Sample

I am trying to get a DataFrame from dict whose values are of variable length. However, I get a ValueError.

In [1]: import numpy as np

In [2]: import pandas as pd

In [3]: x_dict = {"A{}".format(i): np.random.randint(1, 25, data) for i, data in enumerate([5, 10, 15, 20])}

In [4]: x_dict
Out[4]: 
{'A0': array([ 5,  7, 17, 10, 24]),
 'A1': array([ 4,  8, 11,  7,  9,  1, 23, 22, 20, 15]),
 'A2': array([ 9,  5, 20,  2, 21, 12, 21,  8,  1, 10,  5, 21,  7,  9,  5]),
 'A3': array([ 2,  1, 24, 21, 24, 16,  6,  9, 20,  7, 24, 11, 23, 15,  3, 22,  5,
        21,  1, 20])}

In [5]: x_df = pd.DataFrame.from_dict(x_dict)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-5-e23195a7d765> in <module>()
----> 1 x_df = pd.DataFrame.from_dict(x_dict)

/python2.7/site-packages/pandas/core/frame.pyc in from_dict(cls, data, orient, dtype)
    807             raise ValueError('only recognize index or columns for orient')
    808 
--> 809         return cls(data, index=index, columns=columns, dtype=dtype)
    810 
    811     @deprecate_kwarg(old_arg_name='outtype', new_arg_name='orient')

/python2.7/site-packages/pandas/core/frame.pyc in __init__(self, data, index, columns, dtype, copy)
    222                                  dtype=dtype, copy=copy)
    223         elif isinstance(data, dict):
--> 224             mgr = self._init_dict(data, index, columns, dtype=dtype)
    225         elif isinstance(data, ma.MaskedArray):
    226             import numpy.ma.mrecords as mrecords

/python2.7/site-packages/pandas/core/frame.pyc in _init_dict(self, data, index, columns, dtype)
    358             arrays = [data[k] for k in keys]
    359 
--> 360         return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
    361 
    362     def _init_ndarray(self, values, index, columns, dtype=None, copy=False):

/python2.7/site-packages/pandas/core/frame.pyc in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
   5229     # figure out the index, if necessary
   5230     if index is None:
-> 5231         index = extract_index(arrays)
   5232     else:
   5233         index = _ensure_index(index)

/python2.7/site-packages/pandas/core/frame.pyc in extract_index(data)
   5277             lengths = list(set(raw_lengths))
   5278             if len(lengths) > 1:
-> 5279                 raise ValueError('arrays must all be same length')
   5280 
   5281             if have_dicts:

ValueError: arrays must all be same length

Expected output

DataFrame with keys as columns and values as rows.

In [11]: x_df
Out[11]: 
      A1    A0    A3    A2
0    4.0   5.0   2.0   9.0
1    8.0   7.0   1.0   5.0
2   11.0  17.0  24.0  20.0
3    7.0  10.0  21.0   2.0
4    9.0  24.0  24.0  21.0
5    1.0   NaN  16.0  12.0
6   23.0   NaN   6.0  21.0
7   22.0   NaN   9.0   8.0
8   20.0   NaN  20.0   1.0
9   15.0   NaN   7.0  10.0
10   NaN   NaN  24.0   5.0
11   NaN   NaN  11.0  21.0
12   NaN   NaN  23.0   7.0
13   NaN   NaN  15.0   9.0
14   NaN   NaN   3.0   5.0
15   NaN   NaN  22.0   NaN
16   NaN   NaN   5.0   NaN
17   NaN   NaN  21.0   NaN
18   NaN   NaN   1.0   NaN
19   NaN   NaN  20.0   NaN

output of pd.show_versions()

INSTALLED VERSIONS - commit: None - python: 2.7.11.final.0 - python-bits: 64 - OS: Darwin - OS-release: 15.5.0 - machine: x86_64 - processor: i386 - byteorder: little - LC_ALL: None - LANG: en_US.UTF - pandas: 0.18.1 - nose: 1.3.7 - pip: 8.1.2 - setuptools: 23.1.0 - Cython: 0.24 - numpy: 1.11.1 - scipy: 0.17.1 - statsmodels: 0.6.1 - xarray: None - IPython: 4.2.0 - sphinx: 1.4.4 - patsy: None - dateutil: 2.5.3 - pytz: 2016.4 - blosc: None - bottleneck: None - tables: None - numexpr: None - matplotlib: 1.5.1 - openpyxl: None - xlrd: 1.0.0 - xlwt: None - xlsxwriter: None - lxml: None - bs4: 4.4.1 - html5lib: None - httplib2: None - apiclient: None - sqlalchemy: None - pymysql: None - psycopg2: None - jinja2: 2.8 - boto: None - pandas_datareader: None

Comment From: jreback

if you wrap each of these in a Series it would work it doesn't because it's impossible to align a non indexed ragged array as the error message indicates

Comment From: akshayparopkari

Thank you @jreback! 👍