mirror of
https://git.datalinker.icu/ali-vilab/TeaCache
synced 2025-12-09 21:04:25 +08:00
117 lines
3.3 KiB
Python
117 lines
3.3 KiB
Python
import cv2
|
|
import numpy as np
|
|
import torch
|
|
|
|
|
|
def ssim(img1, img2):
|
|
C1 = 0.01**2
|
|
C2 = 0.03**2
|
|
img1 = img1.astype(np.float64)
|
|
img2 = img2.astype(np.float64)
|
|
kernel = cv2.getGaussianKernel(11, 1.5)
|
|
window = np.outer(kernel, kernel.transpose())
|
|
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid
|
|
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
|
|
mu1_sq = mu1**2
|
|
mu2_sq = mu2**2
|
|
mu1_mu2 = mu1 * mu2
|
|
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
|
|
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
|
|
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
|
|
ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
|
|
return ssim_map.mean()
|
|
|
|
|
|
def calculate_ssim_function(img1, img2):
|
|
# [0,1]
|
|
# ssim is the only metric extremely sensitive to gray being compared to b/w
|
|
if not img1.shape == img2.shape:
|
|
raise ValueError("Input images must have the same dimensions.")
|
|
if img1.ndim == 2:
|
|
return ssim(img1, img2)
|
|
elif img1.ndim == 3:
|
|
if img1.shape[0] == 3:
|
|
ssims = []
|
|
for i in range(3):
|
|
ssims.append(ssim(img1[i], img2[i]))
|
|
return np.array(ssims).mean()
|
|
elif img1.shape[0] == 1:
|
|
return ssim(np.squeeze(img1), np.squeeze(img2))
|
|
else:
|
|
raise ValueError("Wrong input image dimensions.")
|
|
|
|
|
|
def trans(x):
|
|
return x
|
|
|
|
|
|
def calculate_ssim(videos1, videos2):
|
|
# videos [batch_size, timestamps, channel, h, w]
|
|
|
|
assert videos1.shape == videos2.shape
|
|
|
|
videos1 = trans(videos1)
|
|
videos2 = trans(videos2)
|
|
|
|
ssim_results = []
|
|
|
|
for video_num in range(videos1.shape[0]):
|
|
# get a video
|
|
# video [timestamps, channel, h, w]
|
|
video1 = videos1[video_num]
|
|
video2 = videos2[video_num]
|
|
|
|
ssim_results_of_a_video = []
|
|
for clip_timestamp in range(len(video1)):
|
|
# get a img
|
|
# img [timestamps[x], channel, h, w]
|
|
# img [channel, h, w] numpy
|
|
|
|
img1 = video1[clip_timestamp].numpy()
|
|
img2 = video2[clip_timestamp].numpy()
|
|
|
|
# calculate ssim of a video
|
|
ssim_results_of_a_video.append(calculate_ssim_function(img1, img2))
|
|
|
|
ssim_results.append(ssim_results_of_a_video)
|
|
|
|
ssim_results = np.array(ssim_results)
|
|
|
|
ssim = {}
|
|
ssim_std = {}
|
|
|
|
for clip_timestamp in range(len(video1)):
|
|
ssim[clip_timestamp] = np.mean(ssim_results[:, clip_timestamp])
|
|
ssim_std[clip_timestamp] = np.std(ssim_results[:, clip_timestamp])
|
|
|
|
result = {
|
|
"value": ssim,
|
|
"value_std": ssim_std,
|
|
"video_setting": video1.shape,
|
|
"video_setting_name": "time, channel, heigth, width",
|
|
}
|
|
|
|
return result
|
|
|
|
|
|
# test code / using example
|
|
|
|
|
|
def main():
|
|
NUMBER_OF_VIDEOS = 8
|
|
VIDEO_LENGTH = 50
|
|
CHANNEL = 3
|
|
SIZE = 64
|
|
videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
|
|
videos2 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False)
|
|
torch.device("cuda")
|
|
|
|
import json
|
|
|
|
result = calculate_ssim(videos1, videos2)
|
|
print(json.dumps(result, indent=4))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|