codeflash-agent/.tessl/tiles/tessl/pypi-pandas/docs/core-data-structures.md
codeflash-ci-bot[bot] c249bcd0ce
chore: update tessl tiles 2026-04-23 (#35)
Co-authored-by: codeflash-ci-bot[bot] <codeflash-ci-bot[bot]@users.noreply.github.com>
2026-04-23 08:15:44 -05:00

14 KiB

Core Data Structures

The fundamental data structures that form the foundation of pandas: DataFrame, Series, and various Index types. These structures provide the building blocks for all data manipulation operations.

Core Imports

import pandas as pd
from pandas import DataFrame, Series, Index

Capabilities

DataFrame

Two-dimensional labeled data structure with heterogeneous columns, similar to a spreadsheet or SQL table. The primary pandas data structure for most use cases.

class DataFrame:
    def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None):
        """
        Two-dimensional, size-mutable, potentially heterogeneous tabular data.
        
        Parameters:
        - data: dict, list, ndarray, Series, or DataFrame
        - index: Index or array-like, row labels
        - columns: Index or array-like, column labels  
        - dtype: data type to force
        - copy: bool, copy data from inputs
        """

    def head(self, n=5):
        """Return the first n rows."""
        
    def tail(self, n=5):
        """Return the last n rows."""
        
    def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, show_counts=None, null_counts=None):
        """Print concise summary of DataFrame."""
        
    def describe(self, percentiles=None, include=None, exclude=None):
        """Generate descriptive statistics."""
        
    def shape(self):
        """Return tuple of (rows, columns)."""
        
    def size(self):
        """Return number of elements."""
        
    def columns(self):
        """Column labels."""
        
    def index(self):
        """Row labels."""
        
    def dtypes(self):
        """Data types of columns."""
        
    def values(self):
        """NumPy representation of DataFrame."""
        
    def empty(self):
        """True if DataFrame is empty."""
        
    def copy(self, deep=True):
        """Make a copy of DataFrame."""
        
    def select_dtypes(self, include=None, exclude=None):
        """Select columns based on data types."""
        
    def astype(self, dtype, copy=True, errors='raise'):
        """Cast DataFrame to specified dtype."""
        
    def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):
        """Sort by values along axis."""
        
    def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):
        """Sort by labels along axis."""
        
    def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):
        """Drop specified labels from rows or columns."""
        
    def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index=False):
        """Remove duplicate rows."""
        
    def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False):
        """Remove missing values."""
        
    def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
        """Fill missing values."""
        
    def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):
        """Group DataFrame by one or more columns."""
        
    def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwargs):
        """Apply function along axis."""
        
    def applymap(self, func, na_action=None, **kwargs):
        """Apply function element-wise."""
        
    def aggregate(self, func, axis=0, *args, **kwargs):
        """Aggregate using one or more operations."""
        
    def transform(self, func, axis=0, *args, **kwargs):
        """Transform using one or more operations."""
        
    def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False):
        """Set DataFrame index using existing columns."""
        
    def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''):
        """Reset index to default integer index."""
        
    def reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=None, limit=None, tolerance=None):
        """Conform DataFrame to new index."""
        
    def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator=None, chunksize=None, date_format=None, doublequote=True, escapechar=None, decimal='.', errors='strict', storage_options=None):
        """Write DataFrame to CSV file."""
        
    def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=None, freeze_panes=None, storage_options=None):
        """Write DataFrame to Excel file."""
        
    def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', index=True, indent=None, storage_options=None):
        """Write DataFrame to JSON."""
        
    def to_dict(self, orient='dict', into=dict):
        """Convert DataFrame to dictionary."""
        
    def to_numpy(self, dtype=None, copy=False, na_value=None):
        """Convert DataFrame to NumPy array."""

Series

One-dimensional labeled array capable of holding any data type. The basic building block of pandas data structures.

class Series:
    def __init__(self, data=None, index=None, dtype=None, name=None, copy=None, fastpath=False):
        """
        One-dimensional ndarray with axis labels.
        
        Parameters:
        - data: array-like, dict, or scalar value
        - index: array-like or Index, labels for the data
        - dtype: data type for the series
        - name: name for the Series
        - copy: bool, copy input data
        """

    def head(self, n=5):
        """Return the first n values."""
        
    def tail(self, n=5):
        """Return the last n values."""
        
    def describe(self, percentiles=None, include=None, exclude=None):
        """Generate descriptive statistics."""
        
    def shape(self):
        """Return tuple of shape."""
        
    def size(self):
        """Return number of elements."""
        
    def index(self):
        """Series index (labels)."""
        
    def values(self):
        """NumPy representation of Series."""
        
    def dtype(self):
        """Data type of Series."""
        
    def name(self):
        """Name of Series."""
        
    def empty(self):
        """True if Series is empty."""
        
    def copy(self, deep=True):
        """Make a copy of Series."""
        
    def astype(self, dtype, copy=True, errors='raise'):
        """Cast Series to specified dtype."""
        
    def sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None):
        """Sort by values."""
        
    def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None):
        """Sort by index labels."""
        
    def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'):
        """Drop specified labels."""
        
    def drop_duplicates(self, keep='first', inplace=False):
        """Remove duplicate values."""
        
    def dropna(self, axis=0, inplace=False, how=None):
        """Remove missing values."""
        
    def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None):
        """Fill missing values."""
        
    def apply(self, func, convert_dtype=True, args=(), **kwargs):
        """Apply function to Series values."""
        
    def map(self, arg, na_action=None):
        """Map values using input mapping or function."""
        
    def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True):
        """Group Series by values."""
        
    def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
        """Count unique values."""
        
    def unique(self):
        """Return unique values."""
        
    def nunique(self, dropna=True):
        """Count number of unique values."""
        
    def mean(self, axis=None, skipna=True, level=None, numeric_only=None):
        """Return mean of values."""
        
    def median(self, axis=None, skipna=True, level=None, numeric_only=None):
        """Return median of values."""
        
    def std(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
        """Return standard deviation."""
        
    def var(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
        """Return variance."""
        
    def sum(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
        """Return sum of values."""
        
    def min(self, axis=None, skipna=True, level=None, numeric_only=None):
        """Return minimum value."""
        
    def max(self, axis=None, skipna=True, level=None, numeric_only=None):
        """Return maximum value."""
        
    def count(self, level=None):
        """Count non-missing values."""
        
    def to_dict(self, into=dict):
        """Convert Series to dictionary."""
        
    def to_list(self):
        """Convert Series to list."""
        
    def to_numpy(self, dtype=None, copy=False, na_value=None):
        """Convert Series to NumPy array."""

Index

Immutable sequence used for indexing and alignment in pandas data structures.

class Index:
    def __init__(self, data=None, dtype=None, copy=False, name=None, tupleize_cols=True):
        """
        Immutable sequence used for indexing and alignment.
        
        Parameters:
        - data: array-like, sequence of labels
        - dtype: data type for the index
        - copy: bool, copy input data
        - name: name for the Index
        """

    def shape(self):
        """Return tuple of shape."""
        
    def size(self):
        """Return number of elements."""
        
    def dtype(self):
        """Data type of Index."""
        
    def name(self):
        """Name of Index."""
        
    def names(self):
        """Names of levels (for MultiIndex)."""
        
    def values(self):
        """NumPy representation of Index."""
        
    def empty(self):
        """True if Index is empty."""
        
    def copy(self, name=None, deep=False):
        """Make a copy of Index."""
        
    def astype(self, dtype, copy=True):
        """Cast Index to specified dtype."""
        
    def sort_values(self, return_indexer=False, ascending=True, na_position='last', key=None):
        """Sort Index values."""
        
    def drop(self, labels, errors='raise'):
        """Drop specified labels from Index."""
        
    def drop_duplicates(self, keep='first'):
        """Remove duplicate values."""
        
    def dropna(self, how='any'):
        """Remove missing values."""
        
    def fillna(self, value=None, downcast=None):
        """Fill missing values."""
        
    def unique(self, level=None):
        """Return unique values."""
        
    def nunique(self, dropna=True):
        """Count number of unique values."""
        
    def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True):
        """Count unique values."""
        
    def to_list(self):
        """Convert Index to list."""
        
    def to_numpy(self, dtype=None, copy=False, na_value=None):
        """Convert Index to NumPy array."""
        
    def to_series(self, index=None, name=None):
        """Convert Index to Series."""

Specialized Index Types

class RangeIndex(Index):
    """Immutable Index implementing a monotonic integer range."""
    def __init__(self, start=None, stop=None, step=None, dtype=None, copy=False, name=None): ...

class CategoricalIndex(Index):
    """Index based on an underlying Categorical."""
    def __init__(self, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None): ...

class MultiIndex(Index):
    """Multi-level or hierarchical index object."""
    def __init__(self, levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True): ...

class IntervalIndex(Index):
    """Index for intervals that are closed on the same side."""
    def __init__(self, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): ...

class DatetimeIndex(Index):
    """Index for datetime64 data."""
    def __init__(self, data=None, freq=None, tz=None, normalize=False, closed=None, ambiguous='raise', dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None): ...

class TimedeltaIndex(Index):
    """Index for timedelta64 data."""
    def __init__(self, data=None, unit=None, freq=None, closed=None, dtype=None, copy=False, name=None): ...

class PeriodIndex(Index):
    """Index for Period data."""
    def __init__(self, data=None, ordinal=None, freq=None, dtype=None, copy=False, name=None): ...

Types

# Index slicing helper
IndexSlice: object  # Slicing helper for MultiIndex

# Grouper for groupby operations
class Grouper:
    def __init__(self, key=None, level=None, freq=None, axis=0, sort=False, closed=None, label=None, how='mean', fill_method=None, limit=None, group_keys=True, origin='start_day', offset=None, dropna=True): ...

# Named aggregation helper
class NamedAgg:
    def __init__(self, column, aggfunc): ...

# Flags for pandas objects
class Flags:
    allows_duplicate_labels: bool