303 lines
11 KiB
Python
303 lines
11 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
视频预处理脚本 - 将MP4视频转换为224x224帧图像
|
||
支持多线程并发处理、进度条显示和中断恢复功能
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import json
|
||
import argparse
|
||
import subprocess
|
||
import threading
|
||
from pathlib import Path
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
from tqdm import tqdm
|
||
import time
|
||
from typing import List, Dict, Optional
|
||
|
||
|
||
class VideoPreprocessor:
|
||
"""视频预处理器,支持多线程和中断恢复"""
|
||
|
||
def __init__(self,
|
||
input_dir: str,
|
||
output_dir: str,
|
||
frame_size: int = 224,
|
||
fps: int = 30,
|
||
num_workers: int = 4,
|
||
quality: int = 2,
|
||
resume: bool = True):
|
||
"""
|
||
初始化预处理器
|
||
|
||
Args:
|
||
input_dir: 输入视频目录
|
||
output_dir: 输出帧目录
|
||
frame_size: 帧大小(正方形)
|
||
fps: 提取帧率
|
||
num_workers: 并发工作线程数
|
||
quality: JPEG质量 (1-31, 数值越小质量越高)
|
||
resume: 是否启用中断恢复
|
||
"""
|
||
self.input_dir = Path(input_dir)
|
||
self.output_dir = Path(output_dir)
|
||
self.frame_size = frame_size
|
||
self.fps = fps
|
||
self.num_workers = num_workers
|
||
self.quality = quality
|
||
self.resume = resume
|
||
|
||
# 状态文件路径
|
||
self.state_file = self.output_dir / ".preprocessing_state.json"
|
||
|
||
# 创建输出目录
|
||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# 初始化状态
|
||
self.state = self._load_state()
|
||
|
||
# 收集所有视频文件
|
||
self.video_files = self._collect_video_files()
|
||
|
||
def _load_state(self) -> Dict:
|
||
"""加载处理状态"""
|
||
if self.resume and self.state_file.exists():
|
||
try:
|
||
with open(self.state_file, 'r') as f:
|
||
return json.load(f)
|
||
except (json.JSONDecodeError, IOError):
|
||
print(f"警告: 无法读取状态文件,将重新开始处理")
|
||
|
||
return {
|
||
"completed": [],
|
||
"failed": [],
|
||
"total_processed": 0,
|
||
"start_time": None,
|
||
"last_update": None
|
||
}
|
||
|
||
def _save_state(self):
|
||
"""保存处理状态"""
|
||
self.state["last_update"] = time.time()
|
||
try:
|
||
with open(self.state_file, 'w') as f:
|
||
json.dump(self.state, f, indent=2)
|
||
except IOError as e:
|
||
print(f"警告: 无法保存状态文件: {e}")
|
||
|
||
def _collect_video_files(self) -> List[Path]:
|
||
"""收集所有需要处理的视频文件"""
|
||
video_files = []
|
||
for file_path in self.input_dir.glob("*.mp4"):
|
||
if file_path.name not in self.state["completed"]:
|
||
video_files.append(file_path)
|
||
|
||
return sorted(video_files)
|
||
|
||
def _parse_video_name(self, video_path: Path) -> Dict[str, str]:
|
||
"""解析视频文件名,使用完整文件名作为ID"""
|
||
name_without_ext = video_path.stem
|
||
|
||
# 直接使用完整文件名作为ID,确保每个mp4文件有独立的输出目录
|
||
return {
|
||
"video_id": name_without_ext,
|
||
"start_frame": "unknown",
|
||
"end_frame": "unknown",
|
||
"full_name": name_without_ext
|
||
}
|
||
|
||
def _extract_frames(self, video_path: Path) -> bool:
|
||
"""提取单个视频的帧"""
|
||
try:
|
||
# 解析视频名称
|
||
video_info = self._parse_video_name(video_path)
|
||
output_subdir = self.output_dir / video_info["video_id"]
|
||
output_subdir.mkdir(exist_ok=True)
|
||
|
||
# 构建FFmpeg命令
|
||
output_pattern = output_subdir / "frame_%04d.jpg"
|
||
|
||
cmd = [
|
||
"ffmpeg",
|
||
"-i", str(video_path),
|
||
"-vf", f"fps={self.fps},scale={self.frame_size}:{self.frame_size}",
|
||
"-q:v", str(self.quality),
|
||
"-y", # 覆盖输出文件
|
||
str(output_pattern)
|
||
]
|
||
|
||
# 执行FFmpeg命令
|
||
result = subprocess.run(
|
||
cmd,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=300 # 5分钟超时
|
||
)
|
||
|
||
if result.returncode != 0:
|
||
print(f"FFmpeg错误处理 {video_path.name}: {result.stderr}")
|
||
return False
|
||
|
||
# 验证输出帧数量
|
||
output_frames = list(output_subdir.glob("frame_*.jpg"))
|
||
if len(output_frames) == 0:
|
||
print(f"警告: {video_path.name} 没有生成任何帧")
|
||
return False
|
||
|
||
return True
|
||
|
||
except subprocess.TimeoutExpired:
|
||
print(f"超时处理 {video_path.name}")
|
||
return False
|
||
except Exception as e:
|
||
print(f"处理 {video_path.name} 时发生错误: {e}")
|
||
return False
|
||
|
||
def _process_video(self, video_path: Path) -> tuple[bool, str]:
|
||
"""处理单个视频文件"""
|
||
video_name = video_path.name
|
||
|
||
try:
|
||
success = self._extract_frames(video_path)
|
||
|
||
if success:
|
||
self.state["completed"].append(video_name)
|
||
if video_name in self.state["failed"]:
|
||
self.state["failed"].remove(video_name)
|
||
return True, video_name
|
||
else:
|
||
self.state["failed"].append(video_name)
|
||
return False, video_name
|
||
|
||
except Exception as e:
|
||
print(f"处理 {video_name} 时发生异常: {e}")
|
||
self.state["failed"].append(video_name)
|
||
return False, video_name
|
||
|
||
def process_all_videos(self):
|
||
"""处理所有视频文件"""
|
||
if not self.video_files:
|
||
print("没有找到需要处理的视频文件")
|
||
return
|
||
|
||
print(f"找到 {len(self.video_files)} 个待处理视频文件")
|
||
print(f"输出目录: {self.output_dir}")
|
||
print(f"帧大小: {self.frame_size}x{self.frame_size}")
|
||
print(f"帧率: {self.fps} fps")
|
||
print(f"并发线程数: {self.num_workers}")
|
||
|
||
if self.state["completed"]:
|
||
print(f"跳过 {len(self.state['completed'])} 个已处理的视频")
|
||
|
||
# 记录开始时间
|
||
if self.state["start_time"] is None:
|
||
self.state["start_time"] = time.time()
|
||
|
||
# 创建进度条
|
||
with tqdm(total=len(self.video_files), desc="处理视频", unit="个") as pbar:
|
||
with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
|
||
# 提交所有任务
|
||
future_to_video = {
|
||
executor.submit(self._process_video, video_path): video_path
|
||
for video_path in self.video_files
|
||
}
|
||
|
||
# 处理完成的任务
|
||
for future in as_completed(future_to_video):
|
||
video_path = future_to_video[future]
|
||
try:
|
||
success, video_name = future.result()
|
||
if success:
|
||
pbar.set_postfix({"状态": "成功", "文件": video_name[:20]})
|
||
else:
|
||
pbar.set_postfix({"状态": "失败", "文件": video_name[:20]})
|
||
except Exception as e:
|
||
print(f"处理 {video_path.name} 时发生异常: {e}")
|
||
pbar.set_postfix({"状态": "异常", "文件": video_path.name[:20]})
|
||
|
||
pbar.update(1)
|
||
self.state["total_processed"] += 1
|
||
|
||
# 定期保存状态
|
||
if self.state["total_processed"] % 5 == 0:
|
||
self._save_state()
|
||
|
||
# 最终保存状态
|
||
self._save_state()
|
||
|
||
# 打印处理结果
|
||
self._print_summary()
|
||
|
||
def _print_summary(self):
|
||
"""打印处理摘要"""
|
||
print("\n" + "="*50)
|
||
print("处理完成摘要:")
|
||
print(f"总处理视频数: {len(self.state['completed'])}")
|
||
print(f"失败视频数: {len(self.state['failed'])}")
|
||
|
||
if self.state["failed"]:
|
||
print("\n失败的视频:")
|
||
for video_name in self.state["failed"]:
|
||
print(f" - {video_name}")
|
||
|
||
if self.state["start_time"]:
|
||
elapsed_time = time.time() - self.state["start_time"]
|
||
print(f"\n总耗时: {elapsed_time:.2f} 秒")
|
||
if self.state["total_processed"] > 0:
|
||
avg_time = elapsed_time / self.state["total_processed"]
|
||
print(f"平均每个视频: {avg_time:.2f} 秒")
|
||
|
||
print("="*50)
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
parser = argparse.ArgumentParser(description="视频预处理脚本")
|
||
parser.add_argument("--input_dir", type=str, default="/home/hexone/Workplace/ws_asmo/vhead/sekai-real-drone/sekai-real-drone", help="输入视频目录")
|
||
parser.add_argument("--output_dir", type=str, default="/home/hexone/Workplace/ws_asmo/vhead/sekai-real-drone/processed", help="输出帧目录")
|
||
parser.add_argument("--size", type=int, default=224, help="帧大小 (默认: 224)")
|
||
parser.add_argument("--fps", type=int, default=10, help="提取帧率 (默认: 30)")
|
||
parser.add_argument("--workers", type=int, default=32, help="并发线程数 (默认: 4)")
|
||
parser.add_argument("--quality", type=int, default=2, help="JPEG质量 1-31 (默认: 2)")
|
||
parser.add_argument("--no-resume", action="store_true", help="不启用中断恢复")
|
||
|
||
args = parser.parse_args()
|
||
|
||
# 检查输入目录
|
||
if not Path(args.input_dir).exists():
|
||
print(f"错误: 输入目录不存在: {args.input_dir}")
|
||
sys.exit(1)
|
||
|
||
# 检查FFmpeg是否可用
|
||
try:
|
||
subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
|
||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||
print("错误: FFmpeg未安装或不在PATH中")
|
||
sys.exit(1)
|
||
|
||
# 创建预处理器并开始处理
|
||
preprocessor = VideoPreprocessor(
|
||
input_dir=args.input_dir,
|
||
output_dir=args.output_dir,
|
||
frame_size=args.size,
|
||
fps=args.fps,
|
||
num_workers=args.workers,
|
||
quality=args.quality,
|
||
resume=not args.no_resume
|
||
)
|
||
|
||
try:
|
||
preprocessor.process_all_videos()
|
||
except KeyboardInterrupt:
|
||
print("\n\n用户中断处理,状态已保存")
|
||
preprocessor._save_state()
|
||
print("可以使用相同命令恢复处理")
|
||
except Exception as e:
|
||
print(f"\n处理过程中发生错误: {e}")
|
||
preprocessor._save_state()
|
||
sys.exit(1)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main() |