codeflash-agent/.tessl/tiles/tessl/pypi-pandas/docs/statistics-math.md
codeflash-ci-bot[bot] c249bcd0ce
chore: update tessl tiles 2026-04-23 (#35)
Co-authored-by: codeflash-ci-bot[bot] <codeflash-ci-bot[bot]@users.noreply.github.com>
2026-04-23 08:15:44 -05:00

19 KiB

Statistical and Mathematical Operations

Built-in statistical functions, mathematical operations, and data analysis utilities including descriptive statistics, correlation analysis, and numerical computations.

Core Imports

import pandas as pd
from pandas import cut, qcut, factorize, value_counts

Capabilities

Descriptive Statistics

Core statistical functions available on DataFrame and Series objects.

# These are methods available on DataFrame and Series:

# Central tendency
def mean(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the mean of the values over the requested axis."""

def median(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the median of the values over the requested axis."""

def mode(axis=0, numeric_only=False, dropna=True):
    """Return the mode(s) of each element along the selected axis."""

# Measures of spread
def std(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
    """Return sample standard deviation over requested axis."""

def var(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
    """Return unbiased variance over requested axis."""

def sem(axis=None, skipna=True, level=None, ddof=1, numeric_only=None):
    """Return unbiased standard error of the mean over requested axis."""

def mad(axis=None, skipna=True, level=None):
    """Return the mean absolute deviation of the values over the requested axis."""

# Distribution shape
def skew(axis=None, skipna=True, level=None, numeric_only=None):
    """Return unbiased skew over requested axis."""

def kurt(axis=None, skipna=True, level=None, numeric_only=None):
    """Return unbiased kurtosis over requested axis."""

def kurtosis(axis=None, skipna=True, level=None, numeric_only=None):
    """Return unbiased kurtosis over requested axis (alias for kurt)."""

# Extremes
def min(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the minimum of the values over the requested axis."""

def max(axis=None, skipna=True, level=None, numeric_only=None):
    """Return the maximum of the values over the requested axis."""

def idxmin(axis=0, skipna=True):
    """Return index of first occurrence of minimum over requested axis."""

def idxmax(axis=0, skipna=True):
    """Return index of first occurrence of maximum over requested axis."""

# Aggregation
def sum(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
    """Return the sum of the values over the requested axis."""

def prod(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
    """Return the product of the values over the requested axis."""

def product(axis=None, skipna=True, level=None, numeric_only=None, min_count=0):
    """Return the product of the values over the requested axis (alias for prod)."""

def count(axis=0, level=None, numeric_only=False):
    """Count non-NA cells for each column or row."""

def nunique(axis=0, dropna=True):
    """Count number of distinct elements in specified axis."""

# Quantiles and percentiles
def quantile(q=0.5, axis=0, numeric_only=True, interpolation='linear', method='single'):
    """Return values at the given quantile over requested axis."""

def describe(percentiles=None, include=None, exclude=None):
    """Generate descriptive statistics."""

# Cumulative operations
def cumsum(axis=None, skipna=True):
    """Return cumulative sum over a DataFrame or Series axis."""

def cumprod(axis=None, skipna=True):
    """Return cumulative product over a DataFrame or Series axis."""

def cummax(axis=None, skipna=True):
    """Return cumulative maximum over a DataFrame or Series axis."""

def cummin(axis=None, skipna=True):
    """Return cumulative minimum over a DataFrame or Series axis."""

Correlation and Covariance

Functions to compute relationships between variables.

# These are methods available on DataFrame and Series:

def corr(method='pearson', min_periods=1, numeric_only=True):
    """
    Compute pairwise correlation of columns.
    
    Parameters:
    - method: str, correlation method ('pearson', 'kendall', 'spearman')
    - min_periods: int, minimum number of observations for valid result
    - numeric_only: bool, include only numeric columns
    
    Returns:
    DataFrame, correlation matrix
    """

def cov(min_periods=None, ddof=1, numeric_only=True):
    """
    Compute pairwise covariance of columns.
    
    Parameters:
    - min_periods: int, minimum number of observations for valid result
    - ddof: int, delta degrees of freedom
    - numeric_only: bool, include only numeric columns
    
    Returns:
    DataFrame, covariance matrix
    """

def corrwith(other, axis=0, drop=False, method='pearson', numeric_only=True):
    """
    Compute pairwise correlation.
    
    Parameters:
    - other: DataFrame, Series, or array-like
    - axis: int, axis to use (0 or 1)
    - drop: bool, drop missing indices from result
    - method: str, correlation method ('pearson', 'kendall', 'spearman')
    - numeric_only: bool, include only numeric columns
    
    Returns:
    Series, correlations
    """

Mathematical Operations

Element-wise mathematical functions and operations.

# These are methods available on DataFrame and Series:

def abs():
    """Return a Series/DataFrame with absolute numeric value of each element."""

def round(decimals=0):
    """Round each value to the given number of decimals."""

def clip(lower=None, upper=None, axis=None, inplace=False):
    """Trim values at input threshold(s)."""

def rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False):
    """
    Compute numerical data ranks along axis.
    
    Parameters:
    - axis: int, axis to rank along
    - method: str, how to rank ('average', 'min', 'max', 'first', 'dense')
    - numeric_only: bool, include only numeric columns
    - na_option: str, how to rank NaN values ('keep', 'top', 'bottom')
    - ascending: bool, rank in ascending order
    - pct: bool, return percentile rank
    
    Returns:
    same type as caller, data ranks
    """

# Exponential and logarithmic functions (available via NumPy integration)
def exp():
    """Calculate exponential of elements."""

def log():
    """Calculate natural logarithm of elements."""

def log10():
    """Calculate base-10 logarithm of elements."""

def log2():
    """Calculate base-2 logarithm of elements."""

def sqrt():
    """Calculate square root of elements."""

def pow(other):
    """Calculate exponential power of elements."""

# Trigonometric functions (available via NumPy integration)
def sin():
    """Calculate sine of elements."""

def cos():
    """Calculate cosine of elements."""

def tan():
    """Calculate tangent of elements."""

def arcsin():
    """Calculate inverse sine of elements."""

def arccos():
    """Calculate inverse cosine of elements."""

def arctan():
    """Calculate inverse tangent of elements."""

Comparison Operations

Functions for comparing and ranking data.

# These are methods available on DataFrame and Series:

def eq(other, axis='columns', level=None):
    """Get equal to of dataframe and other, element-wise (binary operator ==)."""

def ne(other, axis='columns', level=None):
    """Get not equal to of dataframe and other, element-wise (binary operator !=)."""

def lt(other, axis='columns', level=None):
    """Get less than of dataframe and other, element-wise (binary operator <)."""

def le(other, axis='columns', level=None):
    """Get less than or equal to of dataframe and other, element-wise (binary operator <=)."""

def gt(other, axis='columns', level=None):
    """Get greater than of dataframe and other, element-wise (binary operator >)."""

def ge(other, axis='columns', level=None):
    """Get greater than or equal to of dataframe and other, element-wise (binary operator >=)."""

def between(left, right, inclusive='both'):
    """
    Return boolean Series equivalent to left <= series <= right.
    
    Parameters:
    - left: scalar or list-like, left boundary
    - right: scalar or list-like, right boundary
    - inclusive: str, include boundaries ('both', 'neither', 'left', 'right')
    
    Returns:
    Series, boolean values
    """

def isin(values):
    """
    Whether each element in the Series/DataFrame is contained in values.
    
    Parameters:
    - values: set or list-like, sequence of values to test
    
    Returns:
    Series/DataFrame of bools, boolean values
    """

Top-Level Statistical Functions

Standalone statistical functions that operate on array-like data.

def cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise', ordered=True):
    """
    Bin values into discrete intervals.
    
    Parameters:
    - x: array-like, input array to be binned
    - bins: int, sequence of scalars, or IntervalIndex
    - right: bool, whether bins include rightmost edge
    - labels: array or bool, labels for returned bins
    - retbins: bool, return bins
    - precision: int, precision for bin labels
    - include_lowest: bool, whether first interval is left-inclusive
    - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop')
    - ordered: bool, whether returned Categorical is ordered
    
    Returns:
    Categorical, Series, or array
    """

def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
    """
    Quantile-based discretization function.
    
    Parameters:
    - x: array-like, input array to be binned
    - q: int or list-like of float, quantiles to compute
    - labels: array or bool, labels for returned bins
    - retbins: bool, return (bins, labels)
    - precision: int, precision for bin labels
    - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop')
    
    Returns:
    Categorical, Series, or array
    """

def factorize(values, sort=False, na_sentinel=-1, use_na_sentinel=True, size_hint=None):
    """
    Encode the object as an enumerated type or categorical variable.
    
    Parameters:
    - values: sequence, 1-d array-like
    - sort: bool, sort uniques
    - na_sentinel: int, value for missing values
    - use_na_sentinel: bool, use na_sentinel for missing values
    - size_hint: int, hint for hashtable size
    
    Returns:
    tuple of (codes, uniques)
    """

def unique(values):
    """
    Return unique values based on a hash table.
    
    Parameters:
    - values: 1d array-like
    
    Returns:
    ndarray or ExtensionArray
    """

def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True):
    """
    Compute a histogram of the 1D array values.
    
    Parameters:
    - values: 1d array-like
    - sort: bool, sort by values
    - ascending: bool, sort in ascending order
    - normalize: bool, return relative frequencies
    - bins: int, group into half-open bins
    - dropna: bool, exclude NaN values
    
    Returns:
    Series
    """

Numeric Conversion

Functions for converting data to numeric types.

def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.
    
    Parameters:
    - arg: scalar, list, tuple, 1-d array, or Series
    - errors: str, error handling ('raise', 'coerce', 'ignore')
    - downcast: str, downcast resulting data ('integer', 'signed', 'unsigned', 'float')
    
    Returns:
    numeric, converted values
    """

Groupby Statistical Operations

Statistical methods available on GroupBy objects.

# Available on DataFrameGroupBy and SeriesGroupBy objects:

class GroupBy:
    """GroupBy object with statistical methods."""
    
    def mean(self, numeric_only=True, engine=None, engine_kwargs=None):
        """Compute mean of groups."""
    
    def median(self, numeric_only=True):
        """Compute median of groups."""
    
    def sum(self, numeric_only=True, min_count=0, engine=None, engine_kwargs=None):
        """Compute sum of groups."""
    
    def min(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
        """Compute min of groups."""
    
    def max(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None):
        """Compute max of groups."""
    
    def std(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True):
        """Compute standard deviation of groups."""
    
    def var(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True):
        """Compute variance of groups."""
    
    def count(self):
        """Compute count of group."""
    
    def size(self):
        """Compute group sizes."""
    
    def nunique(self, dropna=True):
        """Count number of unique values in each group."""
    
    def quantile(self, q=0.5, interpolation='linear', numeric_only=True):
        """Return values at given quantile for each group."""
    
    def describe(self, percentiles=None, include=None, exclude=None):
        """Generate descriptive statistics for each group."""
    
    def sem(self, ddof=1, numeric_only=True):
        """Compute standard error of the mean for each group."""
    
    def rank(self, method='average', ascending=True, na_option='keep', pct=False, axis=0):
        """Provide the rank of values within each group."""
    
    def cumcount(self, ascending=True):
        """Number each item in each group from 0 to the length of that group - 1."""
    
    def cumsum(self, axis=0, **kwargs):
        """Cumulative sum for each group."""
    
    def cumprod(self, axis=0, **kwargs):
        """Cumulative product for each group."""
    
    def cummax(self, axis=0, numeric_only=False, **kwargs):
        """Cumulative max for each group."""
    
    def cummin(self, axis=0, numeric_only=False, **kwargs):
        """Cumulative min for each group."""
    
    def skew(self, axis=0, skipna=True, numeric_only=True, **kwargs):
        """Return unbiased skew within groups."""
    
    def kurt(self, axis=0, skipna=True, numeric_only=True, **kwargs):
        """Return unbiased kurtosis within groups."""
    
    def mad(self, **kwargs):
        """Return mean absolute deviation within groups."""
    
    def prod(self, numeric_only=True, min_count=0):
        """Compute product of group values."""
    
    def ohlc(self):
        """Compute open, high, low and close values of a group."""
    
    def first(self, numeric_only=False, min_count=-1):
        """Return first value within each group."""
    
    def last(self, numeric_only=False, min_count=-1):
        """Return last value within each group."""
    
    def nth(self, n, dropna=None):
        """Take nth value, or subset if n is a list."""
    
    def idxmax(self, axis=0, skipna=True):
        """Return index of maximum value within each group."""
    
    def idxmin(self, axis=0, skipna=True):
        """Return index of minimum value within each group."""

Advanced Statistical Functions

More specialized statistical operations and utilities.

# These functions work with DataFrame/Series or can be called independently:

def pct_change(periods=1, fill_method='pad', limit=None, freq=None):
    """
    Percentage change between current and prior element.
    
    Parameters:
    - periods: int, periods to shift for forming percent change
    - fill_method: str, how to handle NaNs before computing percent changes
    - limit: int, number of consecutive NaNs to fill before stopping
    - freq: DateOffset, Timedelta or str, increment to use for time rule
    
    Returns:
    Series/DataFrame, percentage changes
    """

def diff(periods=1, axis=0):
    """
    First discrete difference of element.
    
    Parameters:
    - periods: int, periods to shift for calculating difference
    - axis: int, axis to shift along
    
    Returns:
    Series/DataFrame, differences
    """

def shift(periods=1, freq=None, axis=0, fill_value=None):
    """
    Shift index by desired number of periods.
    
    Parameters:
    - periods: int, number of periods to shift
    - freq: DateOffset, Timedelta, or str, offset to use from time series API
    - axis: int, axis to shift
    - fill_value: object, scalar value to use for missing values
    
    Returns:
    Series/DataFrame, shifted data
    """

def expanding(min_periods=1, center=None, axis=0, method='single'):
    """
    Provide expanding window calculations.
    
    Parameters:
    - min_periods: int, minimum number of observations in window
    - center: bool, whether result should be centered
    - axis: int, axis along which to slide window
    - method: str, execution method ('single' thread or 'table')
    
    Returns:
    Expanding object
    """

def rolling(window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None, method='single'):
    """
    Provide rolling window calculations.
    
    Parameters:
    - window: int, size of moving window
    - min_periods: int, minimum number of observations in window  
    - center: bool, whether result should be centered
    - win_type: str, window type
    - on: str, datetime-like column for DatetimeIndex
    - axis: int, axis along which to slide window
    - closed: str, make interval closed on 'right', 'left', 'both' or 'neither'
    - method: str, execution method ('single' or 'table')
    
    Returns:
    Rolling object
    """

def ewm(com=None, span=None, halflife=None, alpha=None, min_periods=0, adjust=True, ignore_na=False, axis=0, times=None, method='single'):
    """
    Provide exponentially weighted (EW) calculations.
    
    Parameters:
    - com: float, center of mass
    - span: float, span
    - halflife: float, decay in terms of half-life
    - alpha: float, smoothing factor
    - min_periods: int, minimum number of observations
    - adjust: bool, divide by decaying adjustment factor
    - ignore_na: bool, ignore missing values
    - axis: int, axis along which to calculate
    - times: array-like, times corresponding to observations
    - method: str, execution method ('single' or 'table')
    
    Returns:
    ExponentialMovingWindow object
    """

Types

# Statistical method options
StatMethod = Literal['average', 'min', 'max', 'first', 'dense']
CorrelationMethod = Literal['pearson', 'kendall', 'spearman']
InterpolationMethod = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest']
QuantileInterpolation = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest']

# Ranking options
RankMethod = Literal['average', 'min', 'max', 'first', 'dense']
RankNaOption = Literal['keep', 'top', 'bottom']

# Numeric conversion options
NumericErrors = Literal['raise', 'coerce', 'ignore']
DowncastOptions = Literal['integer', 'signed', 'unsigned', 'float']

# Binning options
BinningDuplicates = Literal['raise', 'drop']
IntervalInclusive = Literal['both', 'neither', 'left', 'right']

# Window calculation options
WindowMethod = Literal['single', 'table']
WindowType = Literal[
    'boxcar', 'triang', 'blackman', 'hamming', 'bartlett', 'parzen',
    'bohman', 'blackmanharris', 'nuttall', 'barthann', 'kaiser',
    'gaussian', 'general_gaussian', 'slepian', 'exponential'
]

# Percentile inclusion options
PercentileInclusive = Literal['both', 'neither', 'left', 'right']

# Axis specification
AxisOption = Union[int, str, None]