mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
540 lines
No EOL
15 KiB
Markdown
540 lines
No EOL
15 KiB
Markdown
# Pandas Errors and Warnings
|
|
|
|
The `pandas.errors` module provides comprehensive exception classes and warnings for error handling in pandas operations. These help developers identify and handle specific issues that can arise during data manipulation, file I/O, and analysis tasks.
|
|
|
|
## Core Imports
|
|
|
|
```python
|
|
import pandas as pd
|
|
from pandas import errors
|
|
from pandas.errors import (
|
|
# Data Type Errors
|
|
IntCastingNaNError,
|
|
DtypeWarning,
|
|
|
|
# Parsing and I/O Errors
|
|
ParserError,
|
|
ParserWarning,
|
|
EmptyDataError,
|
|
|
|
# Index and Data Structure Errors
|
|
UnsortedIndexError,
|
|
InvalidIndexError,
|
|
IndexingError,
|
|
DuplicateLabelError,
|
|
|
|
# Performance and Operation Warnings
|
|
PerformanceWarning,
|
|
SettingWithCopyWarning,
|
|
SettingWithCopyError,
|
|
ChainedAssignmentError,
|
|
|
|
# Computation and Analysis Errors
|
|
DataError,
|
|
SpecificationError,
|
|
MergeError,
|
|
|
|
# Frequency and Time Series Errors
|
|
NullFrequencyError,
|
|
OutOfBoundsDatetime,
|
|
OutOfBoundsTimedelta,
|
|
|
|
# Engine and Backend Errors
|
|
NumbaUtilError,
|
|
NumExprClobberingError,
|
|
UndefinedVariableError,
|
|
UnsupportedFunctionCall,
|
|
|
|
# File Format Specific Errors
|
|
DatabaseError,
|
|
PossibleDataLossError,
|
|
ClosedFileError,
|
|
PyperclipException,
|
|
PyperclipWindowsException,
|
|
|
|
# Development and Internal Errors
|
|
AbstractMethodError,
|
|
InvalidComparison,
|
|
LossySetitemError,
|
|
NoBufferPresent,
|
|
)
|
|
```
|
|
|
|
## Data Type and Conversion Errors
|
|
|
|
### Type Casting Errors
|
|
|
|
**IntCastingNaNError** { .api }
|
|
```python
|
|
class IntCastingNaNError(ValueError)
|
|
```
|
|
Exception raised when converting an array with NaN values to integer type using `astype()`.
|
|
|
|
```python
|
|
# Example that raises IntCastingNaNError
|
|
import numpy as np
|
|
df = pd.DataFrame(np.array([[1, np.nan], [2, 3]]), dtype="i8")
|
|
# IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer
|
|
```
|
|
|
|
**DtypeWarning** { .api }
|
|
```python
|
|
class DtypeWarning(Warning)
|
|
```
|
|
Warning issued when `read_csv` or `read_table` encounter mixed data types in columns, typically when processing large files in chunks.
|
|
|
|
```python
|
|
# Mixed types in column trigger DtypeWarning
|
|
df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + ['1'] * 100000)})
|
|
df.to_csv('mixed_types.csv', index=False)
|
|
df2 = pd.read_csv('mixed_types.csv') # DtypeWarning: Columns (0) have mixed types
|
|
```
|
|
|
|
## Parsing and I/O Errors
|
|
|
|
### File Reading Errors
|
|
|
|
**ParserError** { .api }
|
|
```python
|
|
class ParserError(ValueError)
|
|
```
|
|
Generic exception for parsing errors in file reading functions like `read_csv` and `read_html`.
|
|
|
|
```python
|
|
# Malformed CSV data
|
|
data = '''a,b,c
|
|
cat,foo,bar
|
|
dog,foo,"baz'''
|
|
from io import StringIO
|
|
pd.read_csv(StringIO(data), skipfooter=1, engine='python')
|
|
# ParserError: ',' expected after '"'
|
|
```
|
|
|
|
**ParserWarning** { .api }
|
|
```python
|
|
class ParserWarning(Warning)
|
|
```
|
|
Warning when pandas falls back from the 'c' parser to 'python' parser due to unsupported options.
|
|
|
|
```python
|
|
# Using regex separator triggers ParserWarning
|
|
csv_data = '''a;b;c
|
|
1;1,8
|
|
1;2,1'''
|
|
df = pd.read_csv(StringIO(csv_data), sep='[;,]') # ParserWarning: Falling back to 'python' engine
|
|
```
|
|
|
|
**EmptyDataError** { .api }
|
|
```python
|
|
class EmptyDataError(ValueError)
|
|
```
|
|
Exception raised when `read_csv` encounters empty data or headers.
|
|
|
|
```python
|
|
from io import StringIO
|
|
empty = StringIO()
|
|
pd.read_csv(empty) # EmptyDataError: No columns to parse from file
|
|
```
|
|
|
|
## Index and Data Structure Errors
|
|
|
|
### Index Management Errors
|
|
|
|
**UnsortedIndexError** { .api }
|
|
```python
|
|
class UnsortedIndexError(KeyError)
|
|
```
|
|
Error when slicing a MultiIndex that hasn't been lexicographically sorted.
|
|
|
|
```python
|
|
# MultiIndex slicing without sorting
|
|
df = pd.DataFrame({"cat": [0, 0, 1, 1], "color": ["white", "white", "brown", "black"]})
|
|
df = df.set_index(["cat", "color"])
|
|
df.loc[(0, "black"):(1, "white")] # UnsortedIndexError: Key length was greater than lexsort depth
|
|
```
|
|
|
|
**InvalidIndexError** { .api }
|
|
```python
|
|
class InvalidIndexError(Exception)
|
|
```
|
|
Exception for invalid index key usage, particularly with MultiIndex operations.
|
|
|
|
```python
|
|
# Invalid MultiIndex access
|
|
idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]])
|
|
df = pd.DataFrame([[1, 1, 2, 2], [3, 3, 4, 4]], columns=idx)
|
|
df[:, 0] # InvalidIndexError: (slice(None, None, None), 0)
|
|
```
|
|
|
|
**IndexingError** { .api }
|
|
```python
|
|
class IndexingError(Exception)
|
|
```
|
|
Exception for dimension mismatches and invalid indexing operations.
|
|
|
|
```python
|
|
df = pd.DataFrame({'A': [1, 1, 1]})
|
|
df.loc[..., ..., 'A'] # IndexingError: indexer may only contain one '...' entry
|
|
```
|
|
|
|
**DuplicateLabelError** { .api }
|
|
```python
|
|
class DuplicateLabelError(ValueError)
|
|
```
|
|
Error when operations would introduce duplicate labels on objects with `allows_duplicate_labels=False`.
|
|
|
|
```python
|
|
s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags(allows_duplicate_labels=False)
|
|
s.reindex(['a', 'a', 'b']) # DuplicateLabelError: Index has duplicates
|
|
```
|
|
|
|
## Performance and Copy Warnings
|
|
|
|
### Assignment and Copy Behavior
|
|
|
|
**PerformanceWarning** { .api }
|
|
```python
|
|
class PerformanceWarning(Warning)
|
|
```
|
|
Warning for operations that may impact performance, such as indexing past lexsort depth.
|
|
|
|
```python
|
|
# MultiIndex performance warning
|
|
df = pd.DataFrame({"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"]})
|
|
df = df.set_index(["jim", "joe"])
|
|
df.loc[(1, 'z')] # PerformanceWarning: indexing past lexsort depth may impact performance
|
|
```
|
|
|
|
**SettingWithCopyWarning** { .api }
|
|
```python
|
|
class SettingWithCopyWarning(Warning)
|
|
```
|
|
Warning when setting values on a copied slice from a DataFrame (chained assignment).
|
|
|
|
```python
|
|
df = pd.DataFrame({'A': [1, 1, 1, 2, 2]})
|
|
df.loc[0:3]['A'] = 'a' # SettingWithCopyWarning: A value is trying to be set on a copy
|
|
```
|
|
|
|
**SettingWithCopyError** { .api }
|
|
```python
|
|
class SettingWithCopyError(ValueError)
|
|
```
|
|
Exception version of SettingWithCopyWarning when `mode.chained_assignment` is set to 'raise'.
|
|
|
|
```python
|
|
pd.options.mode.chained_assignment = 'raise'
|
|
df = pd.DataFrame({'A': [1, 1, 1, 2, 2]})
|
|
df.loc[0:3]['A'] = 'a' # SettingWithCopyError: A value is trying to be set on a copy
|
|
```
|
|
|
|
**ChainedAssignmentError** { .api }
|
|
```python
|
|
class ChainedAssignmentError(Warning)
|
|
```
|
|
Warning for chained assignment when Copy-on-Write mode is enabled, indicating the assignment won't update the original object.
|
|
|
|
```python
|
|
pd.options.mode.copy_on_write = True
|
|
df = pd.DataFrame({'A': [1, 1, 1, 2, 2]})
|
|
df["A"][0:3] = 10 # ChainedAssignmentError: chained assignment never works with Copy-on-Write
|
|
```
|
|
|
|
## Computation and Analysis Errors
|
|
|
|
### Data Operation Errors
|
|
|
|
**DataError** { .api }
|
|
```python
|
|
class DataError(Exception)
|
|
```
|
|
Exception for operations on non-numerical data where numerical data is required.
|
|
|
|
```python
|
|
ser = pd.Series(['a', 'b', 'c'])
|
|
ser.rolling(2).sum() # DataError: No numeric types to aggregate
|
|
```
|
|
|
|
**SpecificationError** { .api }
|
|
```python
|
|
class SpecificationError(Exception)
|
|
```
|
|
Exception raised by `agg()` when aggregation functions are incorrectly specified.
|
|
|
|
```python
|
|
df = pd.DataFrame({'A': [1, 1, 1, 2, 2], 'B': range(5)})
|
|
df.groupby('A').B.agg({'foo': 'count'}) # SpecificationError: nested renamer is not supported
|
|
```
|
|
|
|
**MergeError** { .api }
|
|
```python
|
|
class MergeError(ValueError)
|
|
```
|
|
Exception during DataFrame merge operations, particularly validation failures.
|
|
|
|
```python
|
|
left = pd.DataFrame({"a": ["a", "b", "b", "d"], "b": ["cat", "dog", "weasel", "horse"]})
|
|
right = pd.DataFrame({"a": ["a", "b", "c", "d"], "c": ["meow", "bark", "chirp", "nay"]}).set_index("a")
|
|
left.join(right, on="a", validate="one_to_one") # MergeError: Merge keys are not unique in left dataset
|
|
```
|
|
|
|
## Frequency and Time Series Errors
|
|
|
|
### Temporal Data Errors
|
|
|
|
**NullFrequencyError** { .api }
|
|
```python
|
|
class NullFrequencyError(ValueError)
|
|
```
|
|
Exception when a frequency cannot be null for time series operations like `shift()`.
|
|
|
|
```python
|
|
df = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None)
|
|
df.shift(2) # NullFrequencyError: Cannot shift with no freq
|
|
```
|
|
|
|
**OutOfBoundsDatetime** { .api }
|
|
```python
|
|
class OutOfBoundsDatetime(ValueError)
|
|
```
|
|
Exception for datetime values outside pandas' supported range (imported from pandas._libs.tslibs).
|
|
|
|
**OutOfBoundsTimedelta** { .api }
|
|
```python
|
|
class OutOfBoundsTimedelta(ValueError)
|
|
```
|
|
Exception for timedelta values outside pandas' supported range (imported from pandas._libs.tslibs).
|
|
|
|
## Engine and Backend Errors
|
|
|
|
### Computational Engine Errors
|
|
|
|
**NumbaUtilError** { .api }
|
|
```python
|
|
class NumbaUtilError(Exception)
|
|
```
|
|
Error for unsupported Numba engine routines in pandas operations.
|
|
|
|
```python
|
|
df = pd.DataFrame({"key": ["a", "a", "b", "b"], "data": [1, 2, 3, 4]})
|
|
def incorrect_function(x):
|
|
return sum(x) * 2.7
|
|
df.groupby("key").agg(incorrect_function, engine="numba") # NumbaUtilError: first 2 arguments must be ['values', 'index']
|
|
```
|
|
|
|
**NumExprClobberingError** { .api }
|
|
```python
|
|
class NumExprClobberingError(NameError)
|
|
```
|
|
Exception when using built-in numexpr names as variable names in `eval()` or `query()`.
|
|
|
|
```python
|
|
df = pd.DataFrame({'abs': [1, 1, 1]})
|
|
df.query("abs > 2") # NumExprClobberingError: Variables overlap with builtins
|
|
```
|
|
|
|
**UndefinedVariableError** { .api }
|
|
```python
|
|
class UndefinedVariableError(NameError)
|
|
```
|
|
Exception for undefined variable names in `query()` or `eval()` expressions.
|
|
|
|
```python
|
|
df = pd.DataFrame({'A': [1, 1, 1]})
|
|
df.query("A > x") # UndefinedVariableError: name 'x' is not defined
|
|
```
|
|
|
|
**UnsupportedFunctionCall** { .api }
|
|
```python
|
|
class UnsupportedFunctionCall(ValueError)
|
|
```
|
|
Exception for calling unsupported NumPy functions on pandas objects.
|
|
|
|
```python
|
|
df = pd.DataFrame({"A": [0, 0, 1, 1], "B": ["x", "x", "z", "y"]})
|
|
import numpy as np
|
|
np.cumsum(df.groupby(["A"])) # UnsupportedFunctionCall: numpy operations not valid with groupby
|
|
```
|
|
|
|
## File Format Specific Errors
|
|
|
|
### Database and Storage Errors
|
|
|
|
**DatabaseError** { .api }
|
|
```python
|
|
class DatabaseError(OSError)
|
|
```
|
|
Error when executing SQL with bad syntax or database errors.
|
|
|
|
```python
|
|
from sqlite3 import connect
|
|
conn = connect(':memory:')
|
|
pd.read_sql('select * test', conn) # DatabaseError: Execution failed on sql
|
|
```
|
|
|
|
**PossibleDataLossError** { .api }
|
|
```python
|
|
class PossibleDataLossError(Exception)
|
|
```
|
|
Exception when trying to open an HDFStore file that's already opened with a different mode.
|
|
|
|
**ClosedFileError** { .api }
|
|
```python
|
|
class ClosedFileError(Exception)
|
|
```
|
|
Exception when performing operations on a closed HDFStore file.
|
|
|
|
### Clipboard and System Integration
|
|
|
|
**PyperclipException** { .api }
|
|
```python
|
|
class PyperclipException(RuntimeError)
|
|
```
|
|
Exception for unsupported clipboard functionality in `to_clipboard()` and `read_clipboard()`.
|
|
|
|
**PyperclipWindowsException** { .api }
|
|
```python
|
|
class PyperclipWindowsException(PyperclipException)
|
|
```
|
|
Windows-specific exception when clipboard access is denied due to other processes.
|
|
|
|
## File Format Warnings
|
|
|
|
### HDF5 and Storage Warnings
|
|
|
|
**IncompatibilityWarning** { .api }
|
|
```python
|
|
class IncompatibilityWarning(Warning)
|
|
```
|
|
Warning for incompatible HDF5 file operations with where criteria.
|
|
|
|
**AttributeConflictWarning** { .api }
|
|
```python
|
|
class AttributeConflictWarning(Warning)
|
|
```
|
|
Warning when index attributes conflict during HDFStore operations.
|
|
|
|
### Stata File Warnings
|
|
|
|
**PossiblePrecisionLoss** { .api }
|
|
```python
|
|
class PossiblePrecisionLoss(Warning)
|
|
```
|
|
Warning when `to_stata()` converts int64 values to float64 due to range limitations.
|
|
|
|
**ValueLabelTypeMismatch** { .api }
|
|
```python
|
|
class ValueLabelTypeMismatch(Warning)
|
|
```
|
|
Warning when Stata export encounters non-string category values.
|
|
|
|
**InvalidColumnName** { .api }
|
|
```python
|
|
class InvalidColumnName(Warning)
|
|
```
|
|
Warning when column names are invalid Stata variables and need conversion.
|
|
|
|
**CategoricalConversionWarning** { .api }
|
|
```python
|
|
class CategoricalConversionWarning(Warning)
|
|
```
|
|
Warning when reading partially labeled Stata files with iterators.
|
|
|
|
### Style and Formatting Warnings
|
|
|
|
**CSSWarning** { .api }
|
|
```python
|
|
class CSSWarning(UserWarning)
|
|
```
|
|
Warning when CSS styling conversion fails or encounters unhandled formats.
|
|
|
|
```python
|
|
df = pd.DataFrame({'A': [1, 1, 1]})
|
|
df.style.applymap(lambda x: 'background-color: blueGreenRed;').to_excel('styled.xlsx')
|
|
# CSSWarning: Unhandled color format: 'blueGreenRed'
|
|
```
|
|
|
|
## Development and Internal Errors
|
|
|
|
### Abstract Method and Development Errors
|
|
|
|
**AbstractMethodError** { .api }
|
|
```python
|
|
class AbstractMethodError(NotImplementedError)
|
|
def __init__(self, class_instance, methodtype: str = "method") -> None
|
|
```
|
|
Error for abstract methods that must be implemented in concrete classes. Supports different method types: 'method', 'classmethod', 'staticmethod', 'property'.
|
|
|
|
```python
|
|
class Foo:
|
|
@classmethod
|
|
def classmethod(cls):
|
|
raise pd.errors.AbstractMethodError(cls, methodtype="classmethod")
|
|
|
|
def method(self):
|
|
raise pd.errors.AbstractMethodError(self)
|
|
|
|
Foo.classmethod() # AbstractMethodError: This classmethod must be defined in the concrete class Foo
|
|
```
|
|
|
|
### Internal Implementation Errors
|
|
|
|
**InvalidComparison** { .api }
|
|
```python
|
|
class InvalidComparison(Exception)
|
|
```
|
|
Internal exception for invalid comparison operations (internal use only).
|
|
|
|
**LossySetitemError** { .api }
|
|
```python
|
|
class LossySetitemError(Exception)
|
|
```
|
|
Internal exception for non-lossless `__setitem__` operations on numpy arrays (internal use only).
|
|
|
|
**NoBufferPresent** { .api }
|
|
```python
|
|
class NoBufferPresent(Exception)
|
|
```
|
|
Internal exception signaling absence of requested buffer in `_get_data_buffer` (internal use only).
|
|
|
|
## Configuration Errors
|
|
|
|
### Options and Configuration
|
|
|
|
**OptionError** { .api }
|
|
```python
|
|
class OptionError(AttributeError)
|
|
```
|
|
Exception for pandas configuration option errors (imported from pandas._config.config).
|
|
|
|
**InvalidVersion** { .api }
|
|
```python
|
|
class InvalidVersion(ValueError)
|
|
```
|
|
Exception for invalid version strings (imported from pandas.util.version).
|
|
|
|
## Type Definitions
|
|
|
|
```python
|
|
# Error Categories
|
|
DataTypeError = Union[IntCastingNaNError, DtypeWarning]
|
|
ParsingError = Union[ParserError, ParserWarning, EmptyDataError]
|
|
IndexError = Union[UnsortedIndexError, InvalidIndexError, IndexingError, DuplicateLabelError]
|
|
CopyWarning = Union[PerformanceWarning, SettingWithCopyWarning, SettingWithCopyError, ChainedAssignmentError]
|
|
ComputationError = Union[DataError, SpecificationError, MergeError]
|
|
TimeSeriesError = Union[NullFrequencyError, OutOfBoundsDatetime, OutOfBoundsTimedelta]
|
|
EngineError = Union[NumbaUtilError, NumExprClobberingError, UndefinedVariableError, UnsupportedFunctionCall]
|
|
FileFormatError = Union[DatabaseError, PossibleDataLossError, ClosedFileError, PyperclipException]
|
|
FormatWarning = Union[IncompatibilityWarning, AttributeConflictWarning, PossiblePrecisionLoss, ValueLabelTypeMismatch, InvalidColumnName, CategoricalConversionWarning, CSSWarning]
|
|
DevelopmentError = Union[AbstractMethodError, InvalidComparison, LossySetitemError, NoBufferPresent]
|
|
ConfigurationError = Union[OptionError, InvalidVersion]
|
|
|
|
# All pandas errors and warnings
|
|
PandasError = Union[
|
|
DataTypeError, ParsingError, IndexError, CopyWarning, ComputationError,
|
|
TimeSeriesError, EngineError, FileFormatError, FormatWarning,
|
|
DevelopmentError, ConfigurationError
|
|
]
|
|
```
|
|
|
|
The pandas.errors module provides comprehensive error handling for all aspects of pandas operations, from basic data type conversions to complex multi-index operations and file I/O. Understanding these error types helps in writing robust pandas applications with proper exception handling and performance optimization. |