bg_replace_video.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import math
  16. import time
  17. from collections.abc import Iterable
  18. import cv2
  19. import numpy as np
  20. import paddle
  21. import paddle.nn.functional as F
  22. from paddleseg import utils
  23. from paddleseg.core import infer
  24. from paddleseg.utils import logger, progbar, TimeAverager
  25. import ppmatting.transforms as T
  26. from ppmatting.utils import mkdir, estimate_foreground_ml, VideoReader, VideoWriter
  27. def build_loader_writter(video_path, transforms, save_dir):
  28. reader = VideoReader(video_path, transforms)
  29. loader = paddle.io.DataLoader(reader)
  30. base_name = os.path.basename(video_path)
  31. name = os.path.splitext(base_name)[0]
  32. save_path = os.path.join(save_dir, name + '.avi')
  33. writer = VideoWriter(
  34. save_path,
  35. reader.fps,
  36. frame_size=(reader.width, reader.height),
  37. is_color=True)
  38. return loader, writer
  39. def reverse_transform(img, trans_info):
  40. """recover pred to origin shape"""
  41. for item in trans_info[::-1]:
  42. if item[0][0] == 'resize':
  43. h, w = item[1][0], item[1][1]
  44. img = F.interpolate(img, [h, w], mode='bilinear')
  45. elif item[0][0] == 'padding':
  46. h, w = item[1][0], item[1][1]
  47. img = img[:, :, 0:h, 0:w]
  48. else:
  49. raise Exception("Unexpected info '{}' in im_info".format(item[0]))
  50. return img
  51. def postprocess(fg, alpha, img, bg, trans_info, writer, fg_estimate):
  52. """
  53. Postprocess for prediction results.
  54. Args:
  55. fg (Tensor): The foreground, value should be in [0, 1].
  56. alpha (Tensor): The alpha, value should be in [0, 1].
  57. img (Tensor): The original image, value should be in [0, 1].
  58. trans_info (list): A list of the shape transformations.
  59. writers (dict): A dict of VideoWriter instance.
  60. fg_estimate (bool): Whether to estimate foreground. It is invalid when fg is not None.
  61. """
  62. alpha = reverse_transform(alpha, trans_info)
  63. bg = F.interpolate(bg, size=alpha.shape[-2:], mode='bilinear')
  64. if fg is None:
  65. if fg_estimate:
  66. img = img.transpose((0, 2, 3, 1)).squeeze().numpy()
  67. alpha = alpha.squeeze().numpy()
  68. fg = estimate_foreground_ml(img, alpha)
  69. bg = bg.transpose((0, 2, 3, 1)).squeeze().numpy()
  70. else:
  71. fg = img
  72. else:
  73. fg = reverse_transform(fg, trans_info)
  74. if len(alpha.shape) == 2:
  75. alpha = alpha[:, :, None]
  76. new_img = alpha * fg + (1 - alpha) * bg
  77. writer.write(new_img)
  78. def get_bg(bg_path, shape):
  79. bg = paddle.zeros((1, 3, shape[0], shape[1]))
  80. # special color
  81. if bg_path == 'r':
  82. bg[:, 2, :, :] = 1
  83. elif bg_path == 'g':
  84. bg[:, 1, :, :] = 1
  85. elif bg_path == 'b':
  86. bg[:, 0, :, :] = 1
  87. elif bg_path == 'w':
  88. bg = bg + 1
  89. elif not os.path.exists(bg_path):
  90. raise Exception('The background path is not found: {}'.format(bg_path))
  91. # image
  92. elif bg_path.endswith(
  93. ('.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png')):
  94. bg = cv2.imread(bg_path)
  95. bg = bg[np.newaxis, :, :, :]
  96. bg = paddle.to_tensor(bg) / 255.
  97. bg = bg.transpose((0, 3, 1, 2))
  98. elif bg_path.lower().endswith(
  99. ('.mp4', '.avi', '.mov', '.m4v', '.dat', '.rm', '.rmvb', '.wmv', '.asf',
  100. '.asx', '.3gp', '.mkv', '.flv', '.vob')):
  101. transforms = T.Compose([T.Normalize(mean=(0, 0, 0), std=(1, 1, 1))])
  102. bg = VideoReader(bg_path, transforms=transforms)
  103. bg = paddle.io.DataLoader(bg)
  104. bg = iter(bg)
  105. else:
  106. raise IOError('The background path is invalid, please check it')
  107. return bg
  108. def bg_replace_video(model,
  109. model_path,
  110. transforms,
  111. video_path,
  112. bg_path='g',
  113. save_dir='output',
  114. fg_estimate=True):
  115. """
  116. predict and visualize the video.
  117. Args:
  118. model (nn.Layer): Used to predict for input video.
  119. model_path (str): The path of pretrained model.
  120. transforms (transforms.Compose): Preprocess for frames of video.
  121. video_path (str): The video path to be predicted.
  122. bg_path (str): The background. It can be image path or video path or a string of (r,g,b,w). Default: 'g'.
  123. save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
  124. fg_estimate (bool, optional): Whether to estimate foreground when predicting. It is invalid if the foreground is predicted by model. Default: True
  125. """
  126. utils.utils.load_entire_model(model, model_path)
  127. model.eval()
  128. # Build loader and writer for video
  129. loader, writer = build_loader_writter(
  130. video_path, transforms, save_dir=save_dir)
  131. # Get bg
  132. bg_reader = get_bg(
  133. bg_path, shape=(loader.dataset.height, loader.dataset.width))
  134. logger.info("Start to predict...")
  135. progbar_pred = progbar.Progbar(target=len(loader), verbose=1)
  136. preprocess_cost_averager = TimeAverager()
  137. infer_cost_averager = TimeAverager()
  138. postprocess_cost_averager = TimeAverager()
  139. batch_start = time.time()
  140. with paddle.no_grad():
  141. for i, data in enumerate(loader):
  142. preprocess_cost_averager.record(time.time() - batch_start)
  143. infer_start = time.time()
  144. result = model(data) # result maybe a Tensor or a dict
  145. if isinstance(result, paddle.Tensor):
  146. alpha = result
  147. fg = None
  148. else:
  149. alpha = result['alpha']
  150. fg = result.get('fg', None)
  151. infer_cost_averager.record(time.time() - infer_start)
  152. # postprocess
  153. postprocess_start = time.time()
  154. if isinstance(bg_reader, Iterable):
  155. try:
  156. bg = next(bg_reader)
  157. except StopIteration:
  158. bg_reader = get_bg(
  159. bg_path,
  160. shape=(loader.dataset.height, loader.dataset.width))
  161. bg = next(bg_reader)
  162. finally:
  163. bg = bg['ori_img']
  164. else:
  165. bg = bg_reader
  166. postprocess(
  167. fg,
  168. alpha,
  169. data['ori_img'],
  170. bg=bg,
  171. trans_info=data['trans_info'],
  172. writer=writer,
  173. fg_estimate=fg_estimate)
  174. postprocess_cost_averager.record(time.time() - postprocess_start)
  175. preprocess_cost = preprocess_cost_averager.get_average()
  176. infer_cost = infer_cost_averager.get_average()
  177. postprocess_cost = postprocess_cost_averager.get_average()
  178. progbar_pred.update(i + 1, [('preprocess_cost', preprocess_cost),
  179. ('infer_cost cost', infer_cost),
  180. ('postprocess_cost', postprocess_cost)])
  181. preprocess_cost_averager.reset()
  182. infer_cost_averager.reset()
  183. postprocess_cost_averager.reset()
  184. batch_start = time.time()
  185. if hasattr(model, 'reset'):
  186. model.reset()
  187. loader.dataset.release()
  188. if isinstance(bg, VideoReader):
  189. bg_reader.release()