mirror of
https://github.com/codeflash-ai/codeflash-agent.git
synced 2026-05-04 18:25:19 +00:00
493 lines
No EOL
16 KiB
Markdown
493 lines
No EOL
16 KiB
Markdown
# Image Processing
|
|
|
|
Comprehensive image manipulation, transformation, and computer vision operations for preprocessing and augmentation. These operations provide the tools needed for image-based machine learning workflows.
|
|
|
|
## Capabilities
|
|
|
|
### Image Decoding and Encoding
|
|
|
|
Operations for reading and writing images in various formats.
|
|
|
|
```python { .api }
|
|
def decode_image(contents, channels=None, dtype=tf.uint8, name=None, expand_animations=True):
|
|
"""
|
|
Function for decode_bmp, decode_gif, decode_jpeg, and decode_png.
|
|
|
|
Parameters:
|
|
- contents: 0-D. The encoded image bytes
|
|
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
|
|
- dtype: The desired DType of the returned Tensor
|
|
- name: A name for the operation
|
|
- expand_animations: Controls the shape of the returned op's output
|
|
|
|
Returns:
|
|
Tensor with type dtype and a 3- or 4-dimensional shape
|
|
"""
|
|
|
|
def decode_jpeg(contents, channels=0, ratio=1, fancy_upsampling=True,
|
|
try_recover_truncated=False, acceptable_fraction=1,
|
|
dct_method="", name=None):
|
|
"""
|
|
Decode a JPEG-encoded image to a uint8 tensor.
|
|
|
|
Parameters:
|
|
- contents: A Tensor of type string. 0-D. The JPEG-encoded image
|
|
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
|
|
- ratio: An optional int. Defaults to 1. Downscaling ratio
|
|
- fancy_upsampling: An optional bool. Defaults to True. If true use a slower but nicer upsampling
|
|
- try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input
|
|
- acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted
|
|
- dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
A Tensor of type uint8
|
|
"""
|
|
|
|
def decode_png(contents, channels=0, dtype=tf.uint8, name=None):
|
|
"""
|
|
Decode a PNG-encoded image to a uint8 or uint16 tensor.
|
|
|
|
Parameters:
|
|
- contents: A Tensor of type string. 0-D. The PNG-encoded image
|
|
- channels: An optional int. Defaults to 0. Number of color channels for the decoded image
|
|
- dtype: An optional tf.DType from: tf.uint8, tf.uint16. Defaults to tf.uint8
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
A Tensor of type dtype
|
|
"""
|
|
|
|
def encode_jpeg(image, format="", quality=95, progressive=False,
|
|
optimize_size=False, chroma_downsampling=True,
|
|
density_unit="in", x_density=300, y_density=300,
|
|
xmp_metadata="", name=None):
|
|
"""
|
|
JPEG-encode an image.
|
|
|
|
Parameters:
|
|
- image: A Tensor of type uint8. 3-D with shape [height, width, channels]
|
|
- format: An optional string from: "", "grayscale", "rgb". Defaults to ""
|
|
- quality: An optional int. Defaults to 95. Quality of the compression from 0 to 100
|
|
- progressive: An optional bool. Defaults to False. If True, create a JPEG that loads progressively
|
|
- optimize_size: An optional bool. Defaults to False. If True, spend CPU/RAM to reduce size with no quality change
|
|
- chroma_downsampling: An optional bool. Defaults to True. See http://en.wikipedia.org/wiki/Chroma_subsampling
|
|
- density_unit: An optional string from: "in", "cm". Defaults to "in". Unit used to specify x_density and y_density
|
|
- x_density: An optional int. Defaults to 300. Horizontal pixels per density unit
|
|
- y_density: An optional int. Defaults to 300. Vertical pixels per density unit
|
|
- xmp_metadata: An optional string. Defaults to "". If not empty, embed this XMP metadata in the image header
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
A Tensor of type string
|
|
"""
|
|
|
|
def encode_png(image, compression=-1, name=None):
|
|
"""
|
|
PNG-encode an image.
|
|
|
|
Parameters:
|
|
- image: A Tensor. Must be one of the following types: uint8, uint16. 3-D with shape [height, width, channels]
|
|
- compression: An optional int. Defaults to -1. Compression level
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
A Tensor of type string
|
|
"""
|
|
```
|
|
|
|
### Image Resizing and Cropping
|
|
|
|
Operations for resizing and cropping images.
|
|
|
|
```python { .api }
|
|
def resize(images, size, method=ResizeMethod.BILINEAR, preserve_aspect_ratio=False,
|
|
antialias=False, name=None):
|
|
"""
|
|
Resize images to size using the specified method.
|
|
|
|
Parameters:
|
|
- images: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
- size: A 1-D int32 Tensor of 2 elements: new_height, new_width
|
|
- method: An image.ResizeMethod, or string equivalent
|
|
- preserve_aspect_ratio: Whether to preserve the aspect ratio
|
|
- antialias: Whether to use an anti-aliasing filter when downsampling an image
|
|
- name: A name for this operation
|
|
|
|
Returns:
|
|
If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels]
|
|
"""
|
|
|
|
def resize_with_pad(image, target_height, target_width, method=ResizeMethod.BILINEAR, antialias=False):
|
|
"""
|
|
Resizes and pads an image to a target width and height.
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
- target_height: Target height
|
|
- target_width: Target width
|
|
- method: An image.ResizeMethod, or string equivalent
|
|
- antialias: Whether to use an anti-aliasing filter when downsampling an image
|
|
|
|
Returns:
|
|
Resized and padded image
|
|
"""
|
|
|
|
def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width):
|
|
"""
|
|
Crops an image to a specified bounding box.
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
- offset_height: Vertical coordinate of the top-left corner of the result in the input
|
|
- offset_width: Horizontal coordinate of the top-left corner of the result in the input
|
|
- target_height: Height of the result
|
|
- target_width: Width of the result
|
|
|
|
Returns:
|
|
Cropped image(s)
|
|
"""
|
|
|
|
def central_crop(image, central_fraction):
|
|
"""
|
|
Crop the central region of the image(s).
|
|
|
|
Parameters:
|
|
- image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D Tensor of shape [batch_size, height, width, depth]
|
|
- central_fraction: float (0, 1], fraction of size to crop
|
|
|
|
Returns:
|
|
3-D / 4-D float Tensor, as per the input
|
|
"""
|
|
|
|
def random_crop(value, size, seed=None, name=None):
|
|
"""
|
|
Randomly crops a tensor to a given size.
|
|
|
|
Parameters:
|
|
- value: Input tensor to crop
|
|
- size: 1-D tensor with size the rank of value
|
|
- seed: A shape [2] Tensor, the seed to the random number generator
|
|
- name: A name for this operation
|
|
|
|
Returns:
|
|
A cropped tensor of the same rank as value and shape size
|
|
"""
|
|
```
|
|
|
|
### Image Transformations
|
|
|
|
Geometric transformations and spatial manipulations.
|
|
|
|
```python { .api }
|
|
def flip_left_right(image):
|
|
"""
|
|
Flip an image horizontally (left to right).
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
|
|
Returns:
|
|
A tensor of the same type and shape as image
|
|
"""
|
|
|
|
def flip_up_down(image):
|
|
"""
|
|
Flip an image vertically (upside down).
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
|
|
Returns:
|
|
A tensor of the same type and shape as image
|
|
"""
|
|
|
|
def transpose(image, name=None):
|
|
"""
|
|
Transpose image(s) by swapping the height and width dimension.
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
- name: A name for this operation
|
|
|
|
Returns:
|
|
A tensor of the same type and shape as image, transposed
|
|
"""
|
|
|
|
def rot90(image, k=1, name=None):
|
|
"""
|
|
Rotate image(s) counter-clockwise by 90 degrees.
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
- k: A scalar integer tensor. The number of times the image is rotated by 90 degrees
|
|
- name: A name for this operation
|
|
|
|
Returns:
|
|
A rotated tensor of the same type and shape as image
|
|
"""
|
|
|
|
def random_flip_left_right(image, seed=None):
|
|
"""
|
|
Randomly flip an image horizontally (left to right).
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
- seed: A Python integer. Used to create a random seed
|
|
|
|
Returns:
|
|
A tensor of the same type and shape as image
|
|
"""
|
|
|
|
def random_flip_up_down(image, seed=None):
|
|
"""
|
|
Randomly flips an image vertically (upside down).
|
|
|
|
Parameters:
|
|
- image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
|
|
- seed: A Python integer. Used to create a random seed
|
|
|
|
Returns:
|
|
A tensor of the same type and shape as image
|
|
"""
|
|
```
|
|
|
|
### Color Space and Enhancement
|
|
|
|
Operations for color manipulation and image enhancement.
|
|
|
|
```python { .api }
|
|
def rgb_to_grayscale(images, name=None):
|
|
"""
|
|
Converts one or more images from RGB to Grayscale.
|
|
|
|
Parameters:
|
|
- images: The RGB tensor to convert. The last dimension must have size 3 and should contain RGB values
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
The converted grayscale image(s)
|
|
"""
|
|
|
|
def grayscale_to_rgb(images, name=None):
|
|
"""
|
|
Converts one or more images from Grayscale to RGB.
|
|
|
|
Parameters:
|
|
- images: The Grayscale tensor to convert. Last dimension must be size 1
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
The converted RGB image(s)
|
|
"""
|
|
|
|
def rgb_to_hsv(images, name=None):
|
|
"""
|
|
Converts one or more images from RGB to HSV.
|
|
|
|
Parameters:
|
|
- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
A Tensor. Has the same type as images
|
|
"""
|
|
|
|
def hsv_to_rgb(images, name=None):
|
|
"""
|
|
Converts one or more images from HSV to RGB.
|
|
|
|
Parameters:
|
|
- images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
A Tensor. Has the same type as images
|
|
"""
|
|
|
|
def adjust_brightness(image, delta):
|
|
"""
|
|
Adjust the brightness of RGB or Grayscale images.
|
|
|
|
Parameters:
|
|
- image: RGB image or images to adjust
|
|
- delta: A scalar. Amount to add to the pixel values
|
|
|
|
Returns:
|
|
The brightness-adjusted image(s)
|
|
"""
|
|
|
|
def adjust_contrast(images, contrast_factor):
|
|
"""
|
|
Adjust contrast of RGB or grayscale images.
|
|
|
|
Parameters:
|
|
- images: Images to adjust. At least 3-D
|
|
- contrast_factor: A float multiplier for adjusting contrast
|
|
|
|
Returns:
|
|
The contrast-adjusted image or images
|
|
"""
|
|
|
|
def adjust_hue(image, delta, name=None):
|
|
"""
|
|
Adjust hue of RGB images.
|
|
|
|
Parameters:
|
|
- image: RGB image or images. The image hue is adjusted by converting the image(s) to HSV and rotating the hue channel (H)
|
|
- delta: float. How much to add to the hue channel
|
|
- name: A name for this operation
|
|
|
|
Returns:
|
|
The hue-adjusted image or images
|
|
"""
|
|
|
|
def adjust_saturation(image, saturation_factor, name=None):
|
|
"""
|
|
Adjust saturation of RGB images.
|
|
|
|
Parameters:
|
|
- image: RGB image or images. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S)
|
|
- saturation_factor: float. Factor to multiply the saturation by
|
|
- name: A name for this operation
|
|
|
|
Returns:
|
|
The saturation-adjusted image or images
|
|
"""
|
|
|
|
def random_brightness(image, max_delta, seed=None):
|
|
"""
|
|
Adjust the brightness of images by a random factor.
|
|
|
|
Parameters:
|
|
- image: An image or images to adjust
|
|
- max_delta: float, must be non-negative
|
|
- seed: A Python integer. Used to create a random seed
|
|
|
|
Returns:
|
|
The brightness-adjusted image(s)
|
|
"""
|
|
|
|
def random_contrast(image, lower, upper, seed=None):
|
|
"""
|
|
Adjust the contrast of an image or images by a random factor.
|
|
|
|
Parameters:
|
|
- image: An image tensor with 3 or more dimensions
|
|
- lower: float. Lower bound for the random contrast factor
|
|
- upper: float. Upper bound for the random contrast factor
|
|
- seed: A Python integer. Used to create a random seed
|
|
|
|
Returns:
|
|
The contrast-adjusted tensor
|
|
"""
|
|
```
|
|
|
|
### Image Quality and Metrics
|
|
|
|
Operations for measuring image quality and computing metrics.
|
|
|
|
```python { .api }
|
|
def psnr(a, b, max_val, name=None):
|
|
"""
|
|
Returns the Peak Signal-to-Noise Ratio between a and b.
|
|
|
|
Parameters:
|
|
- a: First set of images
|
|
- b: Second set of images
|
|
- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
|
|
- name: Namespace to embed the computation in
|
|
|
|
Returns:
|
|
The scalar PSNR between a and b. The returned tensor has type tf.float32 and shape [batch_size, 1]
|
|
"""
|
|
|
|
def ssim(img1, img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
|
|
"""
|
|
Computes SSIM index between img1 and img2.
|
|
|
|
Parameters:
|
|
- img1: First image batch
|
|
- img2: Second image batch
|
|
- max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
|
|
- filter_size: Default value 11 (size of gaussian filter)
|
|
- filter_sigma: Default value 1.5 (width of gaussian filter)
|
|
- k1: Default value 0.01
|
|
- k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so it should be larger that K1)
|
|
|
|
Returns:
|
|
A tensor containing an SSIM value for each image in batch
|
|
"""
|
|
|
|
def total_variation(images, name=None):
|
|
"""
|
|
Calculate and return the total variation for one or more images.
|
|
|
|
Parameters:
|
|
- images: A Tensor. Must be one of the following types: half, float32, float64
|
|
- name: A name for the operation
|
|
|
|
Returns:
|
|
A Tensor. Has the same type as images
|
|
"""
|
|
```
|
|
|
|
## Usage Examples
|
|
|
|
```python
|
|
import tensorflow as tf
|
|
import numpy as np
|
|
|
|
# Read and decode images
|
|
image_string = tf.io.read_file('path/to/image.jpg')
|
|
image = tf.image.decode_jpeg(image_string, channels=3)
|
|
|
|
# Resize image
|
|
resized_image = tf.image.resize(image, [224, 224])
|
|
|
|
# Random augmentations
|
|
augmented_image = tf.image.random_flip_left_right(image)
|
|
augmented_image = tf.image.random_brightness(augmented_image, max_delta=0.1)
|
|
augmented_image = tf.image.random_contrast(augmented_image, lower=0.8, upper=1.2)
|
|
|
|
# Crop operations
|
|
central_cropped = tf.image.central_crop(image, central_fraction=0.8)
|
|
random_cropped = tf.image.random_crop(image, size=[100, 100, 3])
|
|
|
|
# Color space conversions
|
|
grayscale = tf.image.rgb_to_grayscale(image)
|
|
hsv_image = tf.image.rgb_to_hsv(image)
|
|
|
|
# Image processing pipeline for training
|
|
def preprocess_image(image_path, label):
|
|
image = tf.io.read_file(image_path)
|
|
image = tf.image.decode_jpeg(image, channels=3)
|
|
image = tf.image.resize(image, [224, 224])
|
|
image = tf.cast(image, tf.float32) / 255.0
|
|
|
|
# Data augmentation
|
|
image = tf.image.random_flip_left_right(image)
|
|
image = tf.image.random_brightness(image, max_delta=0.1)
|
|
image = tf.image.random_contrast(image, lower=0.9, upper=1.1)
|
|
|
|
return image, label
|
|
|
|
# Batch processing
|
|
batch_size = 32
|
|
image_paths = ["path1.jpg", "path2.jpg", ...] # List of image paths
|
|
labels = [0, 1, ...] # Corresponding labels
|
|
|
|
dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
|
|
dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
|
|
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
|
|
|
|
# Quality metrics
|
|
img1 = tf.random.uniform([1, 256, 256, 3])
|
|
img2 = tf.random.uniform([1, 256, 256, 3])
|
|
|
|
psnr_value = tf.image.psnr(img1, img2, max_val=1.0)
|
|
ssim_value = tf.image.ssim(img1, img2, max_val=1.0)
|
|
|
|
print(f"PSNR: {psnr_value.numpy()}")
|
|
print(f"SSIM: {ssim_value.numpy()}")
|
|
``` |