15 KiB
Pandas Errors and Warnings
The pandas.errors module provides comprehensive exception classes and warnings for error handling in pandas operations. These help developers identify and handle specific issues that can arise during data manipulation, file I/O, and analysis tasks.
Core Imports
import pandas as pd
from pandas import errors
from pandas.errors import (
# Data Type Errors
IntCastingNaNError,
DtypeWarning,
# Parsing and I/O Errors
ParserError,
ParserWarning,
EmptyDataError,
# Index and Data Structure Errors
UnsortedIndexError,
InvalidIndexError,
IndexingError,
DuplicateLabelError,
# Performance and Operation Warnings
PerformanceWarning,
SettingWithCopyWarning,
SettingWithCopyError,
ChainedAssignmentError,
# Computation and Analysis Errors
DataError,
SpecificationError,
MergeError,
# Frequency and Time Series Errors
NullFrequencyError,
OutOfBoundsDatetime,
OutOfBoundsTimedelta,
# Engine and Backend Errors
NumbaUtilError,
NumExprClobberingError,
UndefinedVariableError,
UnsupportedFunctionCall,
# File Format Specific Errors
DatabaseError,
PossibleDataLossError,
ClosedFileError,
PyperclipException,
PyperclipWindowsException,
# Development and Internal Errors
AbstractMethodError,
InvalidComparison,
LossySetitemError,
NoBufferPresent,
)
Data Type and Conversion Errors
Type Casting Errors
IntCastingNaNError { .api }
class IntCastingNaNError(ValueError)
Exception raised when converting an array with NaN values to integer type using astype().
# Example that raises IntCastingNaNError
import numpy as np
df = pd.DataFrame(np.array([[1, np.nan], [2, 3]]), dtype="i8")
# IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer
DtypeWarning { .api }
class DtypeWarning(Warning)
Warning issued when read_csv or read_table encounter mixed data types in columns, typically when processing large files in chunks.
# Mixed types in column trigger DtypeWarning
df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + ['1'] * 100000)})
df.to_csv('mixed_types.csv', index=False)
df2 = pd.read_csv('mixed_types.csv') # DtypeWarning: Columns (0) have mixed types
Parsing and I/O Errors
File Reading Errors
ParserError { .api }
class ParserError(ValueError)
Generic exception for parsing errors in file reading functions like read_csv and read_html.
# Malformed CSV data
data = '''a,b,c
cat,foo,bar
dog,foo,"baz'''
from io import StringIO
pd.read_csv(StringIO(data), skipfooter=1, engine='python')
# ParserError: ',' expected after '"'
ParserWarning { .api }
class ParserWarning(Warning)
Warning when pandas falls back from the 'c' parser to 'python' parser due to unsupported options.
# Using regex separator triggers ParserWarning
csv_data = '''a;b;c
1;1,8
1;2,1'''
df = pd.read_csv(StringIO(csv_data), sep='[;,]') # ParserWarning: Falling back to 'python' engine
EmptyDataError { .api }
class EmptyDataError(ValueError)
Exception raised when read_csv encounters empty data or headers.
from io import StringIO
empty = StringIO()
pd.read_csv(empty) # EmptyDataError: No columns to parse from file
Index and Data Structure Errors
Index Management Errors
UnsortedIndexError { .api }
class UnsortedIndexError(KeyError)
Error when slicing a MultiIndex that hasn't been lexicographically sorted.
# MultiIndex slicing without sorting
df = pd.DataFrame({"cat": [0, 0, 1, 1], "color": ["white", "white", "brown", "black"]})
df = df.set_index(["cat", "color"])
df.loc[(0, "black"):(1, "white")] # UnsortedIndexError: Key length was greater than lexsort depth
InvalidIndexError { .api }
class InvalidIndexError(Exception)
Exception for invalid index key usage, particularly with MultiIndex operations.
# Invalid MultiIndex access
idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]])
df = pd.DataFrame([[1, 1, 2, 2], [3, 3, 4, 4]], columns=idx)
df[:, 0] # InvalidIndexError: (slice(None, None, None), 0)
IndexingError { .api }
class IndexingError(Exception)
Exception for dimension mismatches and invalid indexing operations.
df = pd.DataFrame({'A': [1, 1, 1]})
df.loc[..., ..., 'A'] # IndexingError: indexer may only contain one '...' entry
DuplicateLabelError { .api }
class DuplicateLabelError(ValueError)
Error when operations would introduce duplicate labels on objects with allows_duplicate_labels=False.
s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags(allows_duplicate_labels=False)
s.reindex(['a', 'a', 'b']) # DuplicateLabelError: Index has duplicates
Performance and Copy Warnings
Assignment and Copy Behavior
PerformanceWarning { .api }
class PerformanceWarning(Warning)
Warning for operations that may impact performance, such as indexing past lexsort depth.
# MultiIndex performance warning
df = pd.DataFrame({"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"]})
df = df.set_index(["jim", "joe"])
df.loc[(1, 'z')] # PerformanceWarning: indexing past lexsort depth may impact performance
SettingWithCopyWarning { .api }
class SettingWithCopyWarning(Warning)
Warning when setting values on a copied slice from a DataFrame (chained assignment).
df = pd.DataFrame({'A': [1, 1, 1, 2, 2]})
df.loc[0:3]['A'] = 'a' # SettingWithCopyWarning: A value is trying to be set on a copy
SettingWithCopyError { .api }
class SettingWithCopyError(ValueError)
Exception version of SettingWithCopyWarning when mode.chained_assignment is set to 'raise'.
pd.options.mode.chained_assignment = 'raise'
df = pd.DataFrame({'A': [1, 1, 1, 2, 2]})
df.loc[0:3]['A'] = 'a' # SettingWithCopyError: A value is trying to be set on a copy
ChainedAssignmentError { .api }
class ChainedAssignmentError(Warning)
Warning for chained assignment when Copy-on-Write mode is enabled, indicating the assignment won't update the original object.
pd.options.mode.copy_on_write = True
df = pd.DataFrame({'A': [1, 1, 1, 2, 2]})
df["A"][0:3] = 10 # ChainedAssignmentError: chained assignment never works with Copy-on-Write
Computation and Analysis Errors
Data Operation Errors
DataError { .api }
class DataError(Exception)
Exception for operations on non-numerical data where numerical data is required.
ser = pd.Series(['a', 'b', 'c'])
ser.rolling(2).sum() # DataError: No numeric types to aggregate
SpecificationError { .api }
class SpecificationError(Exception)
Exception raised by agg() when aggregation functions are incorrectly specified.
df = pd.DataFrame({'A': [1, 1, 1, 2, 2], 'B': range(5)})
df.groupby('A').B.agg({'foo': 'count'}) # SpecificationError: nested renamer is not supported
MergeError { .api }
class MergeError(ValueError)
Exception during DataFrame merge operations, particularly validation failures.
left = pd.DataFrame({"a": ["a", "b", "b", "d"], "b": ["cat", "dog", "weasel", "horse"]})
right = pd.DataFrame({"a": ["a", "b", "c", "d"], "c": ["meow", "bark", "chirp", "nay"]}).set_index("a")
left.join(right, on="a", validate="one_to_one") # MergeError: Merge keys are not unique in left dataset
Frequency and Time Series Errors
Temporal Data Errors
NullFrequencyError { .api }
class NullFrequencyError(ValueError)
Exception when a frequency cannot be null for time series operations like shift().
df = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
df.shift(2) # NullFrequencyError: Cannot shift with no freq
OutOfBoundsDatetime { .api }
class OutOfBoundsDatetime(ValueError)
Exception for datetime values outside pandas' supported range (imported from pandas._libs.tslibs).
OutOfBoundsTimedelta { .api }
class OutOfBoundsTimedelta(ValueError)
Exception for timedelta values outside pandas' supported range (imported from pandas._libs.tslibs).
Engine and Backend Errors
Computational Engine Errors
NumbaUtilError { .api }
class NumbaUtilError(Exception)
Error for unsupported Numba engine routines in pandas operations.
df = pd.DataFrame({"key": ["a", "a", "b", "b"], "data": [1, 2, 3, 4]})
def incorrect_function(x):
return sum(x) * 2.7
df.groupby("key").agg(incorrect_function, engine="numba") # NumbaUtilError: first 2 arguments must be ['values', 'index']
NumExprClobberingError { .api }
class NumExprClobberingError(NameError)
Exception when using built-in numexpr names as variable names in eval() or query().
df = pd.DataFrame({'abs': [1, 1, 1]})
df.query("abs > 2") # NumExprClobberingError: Variables overlap with builtins
UndefinedVariableError { .api }
class UndefinedVariableError(NameError)
Exception for undefined variable names in query() or eval() expressions.
df = pd.DataFrame({'A': [1, 1, 1]})
df.query("A > x") # UndefinedVariableError: name 'x' is not defined
UnsupportedFunctionCall { .api }
class UnsupportedFunctionCall(ValueError)
Exception for calling unsupported NumPy functions on pandas objects.
df = pd.DataFrame({"A": [0, 0, 1, 1], "B": ["x", "x", "z", "y"]})
import numpy as np
np.cumsum(df.groupby(["A"])) # UnsupportedFunctionCall: numpy operations not valid with groupby
File Format Specific Errors
Database and Storage Errors
DatabaseError { .api }
class DatabaseError(OSError)
Error when executing SQL with bad syntax or database errors.
from sqlite3 import connect
conn = connect(':memory:')
pd.read_sql('select * test', conn) # DatabaseError: Execution failed on sql
PossibleDataLossError { .api }
class PossibleDataLossError(Exception)
Exception when trying to open an HDFStore file that's already opened with a different mode.
ClosedFileError { .api }
class ClosedFileError(Exception)
Exception when performing operations on a closed HDFStore file.
Clipboard and System Integration
PyperclipException { .api }
class PyperclipException(RuntimeError)
Exception for unsupported clipboard functionality in to_clipboard() and read_clipboard().
PyperclipWindowsException { .api }
class PyperclipWindowsException(PyperclipException)
Windows-specific exception when clipboard access is denied due to other processes.
File Format Warnings
HDF5 and Storage Warnings
IncompatibilityWarning { .api }
class IncompatibilityWarning(Warning)
Warning for incompatible HDF5 file operations with where criteria.
AttributeConflictWarning { .api }
class AttributeConflictWarning(Warning)
Warning when index attributes conflict during HDFStore operations.
Stata File Warnings
PossiblePrecisionLoss { .api }
class PossiblePrecisionLoss(Warning)
Warning when to_stata() converts int64 values to float64 due to range limitations.
ValueLabelTypeMismatch { .api }
class ValueLabelTypeMismatch(Warning)
Warning when Stata export encounters non-string category values.
InvalidColumnName { .api }
class InvalidColumnName(Warning)
Warning when column names are invalid Stata variables and need conversion.
CategoricalConversionWarning { .api }
class CategoricalConversionWarning(Warning)
Warning when reading partially labeled Stata files with iterators.
Style and Formatting Warnings
CSSWarning { .api }
class CSSWarning(UserWarning)
Warning when CSS styling conversion fails or encounters unhandled formats.
df = pd.DataFrame({'A': [1, 1, 1]})
df.style.applymap(lambda x: 'background-color: blueGreenRed;').to_excel('styled.xlsx')
# CSSWarning: Unhandled color format: 'blueGreenRed'
Development and Internal Errors
Abstract Method and Development Errors
AbstractMethodError { .api }
class AbstractMethodError(NotImplementedError)
def __init__(self, class_instance, methodtype: str = "method") -> None
Error for abstract methods that must be implemented in concrete classes. Supports different method types: 'method', 'classmethod', 'staticmethod', 'property'.
class Foo:
@classmethod
def classmethod(cls):
raise pd.errors.AbstractMethodError(cls, methodtype="classmethod")
def method(self):
raise pd.errors.AbstractMethodError(self)
Foo.classmethod() # AbstractMethodError: This classmethod must be defined in the concrete class Foo
Internal Implementation Errors
InvalidComparison { .api }
class InvalidComparison(Exception)
Internal exception for invalid comparison operations (internal use only).
LossySetitemError { .api }
class LossySetitemError(Exception)
Internal exception for non-lossless __setitem__ operations on numpy arrays (internal use only).
NoBufferPresent { .api }
class NoBufferPresent(Exception)
Internal exception signaling absence of requested buffer in _get_data_buffer (internal use only).
Configuration Errors
Options and Configuration
OptionError { .api }
class OptionError(AttributeError)
Exception for pandas configuration option errors (imported from pandas._config.config).
InvalidVersion { .api }
class InvalidVersion(ValueError)
Exception for invalid version strings (imported from pandas.util.version).
Type Definitions
# Error Categories
DataTypeError = Union[IntCastingNaNError, DtypeWarning]
ParsingError = Union[ParserError, ParserWarning, EmptyDataError]
IndexError = Union[UnsortedIndexError, InvalidIndexError, IndexingError, DuplicateLabelError]
CopyWarning = Union[PerformanceWarning, SettingWithCopyWarning, SettingWithCopyError, ChainedAssignmentError]
ComputationError = Union[DataError, SpecificationError, MergeError]
TimeSeriesError = Union[NullFrequencyError, OutOfBoundsDatetime, OutOfBoundsTimedelta]
EngineError = Union[NumbaUtilError, NumExprClobberingError, UndefinedVariableError, UnsupportedFunctionCall]
FileFormatError = Union[DatabaseError, PossibleDataLossError, ClosedFileError, PyperclipException]
FormatWarning = Union[IncompatibilityWarning, AttributeConflictWarning, PossiblePrecisionLoss, ValueLabelTypeMismatch, InvalidColumnName, CategoricalConversionWarning, CSSWarning]
DevelopmentError = Union[AbstractMethodError, InvalidComparison, LossySetitemError, NoBufferPresent]
ConfigurationError = Union[OptionError, InvalidVersion]
# All pandas errors and warnings
PandasError = Union[
DataTypeError, ParsingError, IndexError, CopyWarning, ComputationError,
TimeSeriesError, EngineError, FileFormatError, FormatWarning,
DevelopmentError, ConfigurationError
]
The pandas.errors module provides comprehensive error handling for all aspects of pandas operations, from basic data type conversions to complex multi-index operations and file I/O. Understanding these error types helps in writing robust pandas applications with proper exception handling and performance optimization.