Synapse Notebook MemoryError

Andy AndiLolo 0 Reputation points
2023-06-06T18:11:44.0966667+00:00
/home/trusted-service-user/cluster-env/env/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3169: DtypeWarning: Columns (4,14,15,17) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
/home/trusted-service-user/cluster-env/env/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3169: DtypeWarning: Columns (2,4,12,23,38) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
/tmp/ipykernel_7003/2831107642.py in <module>
     33 naofs['Patient_Full_Name'].replace(r'\s+|\\n', '', regex=True, inplace=True)
     34 
---> 35 coms = pd.read_csv('abfss://******@dlsparagonprod.dfs.core.windows.net/Lite_Integration_Production_Data/COMS_WINOMS_Production_Dash_Data.csv', dtype=object)
     36 coms['Patient_Full_Name'].replace(r'\s+|\\n', '', regex=True, inplace=True)
     37 

~/cluster-env/env/lib/python3.8/site-packages/pandas/io/parsers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
    608     kwds.update(kwds_defaults)
    609 
--> 610     return _read(filepath_or_buffer, kwds)
    611 
    612 

~/cluster-env/env/lib/python3.8/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    466 
    467     with parser:
--> 468         return parser.read(nrows)
    469 
    470 

~/cluster-env/env/lib/python3.8/site-packages/pandas/io/parsers.py in read(self, nrows)
   1067             new_rows = len(index)
   1068 
-> 1069         df = DataFrame(col_dict, columns=columns, index=index)
   1070 
   1071         self._currow += new_rows

~/cluster-env/env/lib/python3.8/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
    527 
    528         elif isinstance(data, dict):
--> 529             mgr = init_dict(data, index, columns, dtype=dtype)
    530         elif isinstance(data, ma.MaskedArray):
    531             import numpy.ma.mrecords as mrecords

~/cluster-env/env/lib/python3.8/site-packages/pandas/core/internals/construction.py in init_dict(data, index, columns, dtype)
    285             arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
    286         ]
--> 287     return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
    288 
    289 

~/cluster-env/env/lib/python3.8/site-packages/pandas/core/internals/construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype, verify_integrity)
     93     axes = [columns, index]
     94 
---> 95     return create_block_manager_from_arrays(arrays, arr_names, axes)
     96 
     97 

~/cluster-env/env/lib/python3.8/site-packages/pandas/core/internals/managers.py in create_block_manager_from_arrays(arrays, names, axes)
   1699     arrays = [x if not isinstance(x, ABCPandasArray) else x.to_numpy() for x in arrays]
   1700     try:
-> 1701         blocks = _form_blocks(arrays, names, axes)
   1702         mgr = BlockManager(blocks, axes)
   1703         mgr._consolidate_inplace()

~/cluster-env/env/lib/python3.8/site-packages/pandas/core/internals/managers.py in _form_blocks(arrays, names, axes)
   1788 
   1789     if len(items_dict["ObjectBlock"]) > 0:
-> 1790         object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_)
   1791         blocks.extend(object_blocks)
   1792 

~/cluster-env/env/lib/python3.8/site-packages/pandas/core/internals/managers.py in _simple_blockify(tuples, dtype)
   1832     not None, coerce to this dtype
   1833     """
-> 1834     values, placement = _stack_arrays(tuples, dtype)
   1835 
   1836     # TODO: CHECK DTYPE?

~/cluster-env/env/lib/python3.8/site-packages/pandas/core/internals/managers.py in _stack_arrays(tuples, dtype)
   1878     shape = (len(arrays),) + _shape_compat(first)
   1879 
-> 1880     stacked = np.empty(shape, dtype=dtype)
   1881     for i, arr in enumerate(arrays):
   1882         stacked[i] = _asarray_compat(arr)

MemoryError: Unable to allocate 198. MiB for an array with shape (48, 539689) and data type object
Azure Synapse Analytics
Azure Synapse Analytics
An Azure analytics service that brings together data integration, enterprise data warehousing, and big data analytics. Previously known as Azure SQL Data Warehouse.
5,373 questions
0 comments No comments
{count} votes

1 answer

Sort by: Most helpful
  1. Vinodh247 34,661 Reputation points MVP Volunteer Moderator
    2023-06-07T13:41:02.78+00:00

    Hi,

    Thanks for reaching out to Microsoft Q&A.

    Can you make sure you are referencing to the correct python? see below a solved answer for similar question as yours.

    https://learn.microsoft.com/en-us/answers/questions/625734/memoryerror-unable-to-allocate-84-4-mib-for-an-arr

    Pls check and let me know if this worked.

    Please Upvote and Accept as answer if the reply was helpful, this will be benefitting the other community members who go through the same issue.


Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.