123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import os
- import math
- import time
- import cv2
- import numpy as np
- import paddle
- import paddle.nn.functional as F
- from paddleseg import utils
- from paddleseg.core import infer
- from paddleseg.utils import logger, progbar, TimeAverager
- from ppmatting.utils import mkdir, estimate_foreground_ml, VideoReader, VideoWriter
- def build_loader_writter(video_path, transforms, save_dir):
- reader = VideoReader(video_path, transforms)
- loader = paddle.io.DataLoader(reader)
- base_name = os.path.basename(video_path)
- name = os.path.splitext(base_name)[0]
- alpha_save_path = os.path.join(save_dir, name + '_alpha.avi')
- fg_save_path = os.path.join(save_dir, name + '_fg.avi')
- writer_alpha = VideoWriter(
- alpha_save_path,
- reader.fps,
- frame_size=(reader.width, reader.height),
- is_color=False)
- writer_fg = VideoWriter(
- fg_save_path,
- reader.fps,
- frame_size=(reader.width, reader.height),
- is_color=True)
- writers = {'alpha': writer_alpha, 'fg': writer_fg}
- return loader, writers
- def reverse_transform(img, trans_info):
- """recover pred to origin shape"""
- for item in trans_info[::-1]:
- if item[0][0] == 'resize':
- h, w = item[1][0], item[1][1]
- img = F.interpolate(img, [h, w], mode='bilinear')
- elif item[0][0] == 'padding':
- h, w = item[1][0], item[1][1]
- img = img[:, :, 0:h, 0:w]
- else:
- raise Exception("Unexpected info '{}' in im_info".format(item[0]))
- return img
- def postprocess(fg, alpha, img, trans_info, writers, fg_estimate):
- """
- Postprocess for prediction results.
- Args:
- fg (Tensor): The foreground, value should be in [0, 1].
- alpha (Tensor): The alpha, value should be in [0, 1].
- img (Tensor): The original image, value should be in [0, 1].
- trans_info (list): A list of the shape transformations.
- writers (dict): A dict of VideoWriter instance.
- fg_estimate (bool): Whether to estimate foreground. It is invalid when fg is not None.
- """
- alpha = reverse_transform(alpha, trans_info)
- if fg is None:
- if fg_estimate:
- img = img.transpose((0, 2, 3, 1)).squeeze().numpy()
- alpha = alpha.squeeze().numpy()
- fg = estimate_foreground_ml(img, alpha)
- else:
- fg = img
- else:
- fg = reverse_transform(fg, trans_info)
- if len(alpha.shape) == 2:
- fg = alpha[:, :, None] * fg
- else:
- fg = alpha * fg
- writers['alpha'].write(alpha)
- writers['fg'].write(fg)
- def predict_video(model,
- model_path,
- transforms,
- video_path,
- save_dir='output',
- fg_estimate=True):
- """
- predict and visualize the video.
- Args:
- model (nn.Layer): Used to predict for input video.
- model_path (str): The path of pretrained model.
- transforms (transforms.Compose): Preprocess for frames of video.
- video_path (str): the video path to be predicted.
- save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
- fg_estimate (bool, optional): Whether to estimate foreground when predicting. It is invalid if the foreground is predicted by model. Default: True
- """
- utils.utils.load_entire_model(model, model_path)
- model.eval()
- # Build loader and writer for video
- loader, writers = build_loader_writter(
- video_path, transforms, save_dir=save_dir)
- logger.info("Start to predict...")
- progbar_pred = progbar.Progbar(target=len(loader), verbose=1)
- preprocess_cost_averager = TimeAverager()
- infer_cost_averager = TimeAverager()
- postprocess_cost_averager = TimeAverager()
- batch_start = time.time()
- with paddle.no_grad():
- for i, data in enumerate(loader):
- preprocess_cost_averager.record(time.time() - batch_start)
- infer_start = time.time()
- result = model(data) # result maybe a Tensor or a dict
- if isinstance(result, paddle.Tensor):
- alpha = result
- fg = None
- else:
- alpha = result['alpha']
- fg = result.get('fg', None)
- infer_cost_averager.record(time.time() - infer_start)
- postprocess_start = time.time()
- postprocess(
- fg,
- alpha,
- data['ori_img'],
- trans_info=data['trans_info'],
- writers=writers,
- fg_estimate=fg_estimate)
- postprocess_cost_averager.record(time.time() - postprocess_start)
- preprocess_cost = preprocess_cost_averager.get_average()
- infer_cost = infer_cost_averager.get_average()
- postprocess_cost = postprocess_cost_averager.get_average()
- progbar_pred.update(i + 1, [('preprocess_cost', preprocess_cost),
- ('infer_cost cost', infer_cost),
- ('postprocess_cost', postprocess_cost)])
- preprocess_cost_averager.reset()
- infer_cost_averager.reset()
- postprocess_cost_averager.reset()
- batch_start = time.time()
- if hasattr(model, 'reset'):
- model.reset()
- loader.dataset.release()
- for k, v in writers.items():
- v.release()
|