按照源码进行部署,方便接口调用。
环境:python37+cuda10.1+nvdia driver 418.87
1.安装
torch==1.8.1_cu101
torchvision==0.9.1_cu101
tokenizers==0.11.6
transformers==4.20.0
huggingface-hub==0.2.0
timm
ftfy
sentencepiece
open_clip==2.16.0 自行编译一下,在git上下载源码,用python setup.py install --user安装,在requirement中降低pytorch的版本,改成>1.8
opencv-python
pytorch_lightning
更新libstdc++.so.6 1.3.9
更新glibc 2.18
addict
yapf
prettytable
omegaconf==2.1.1
xformers==0.0.2
更改ldm/modules/encoders/modules.py中88行的openai的clip的地址
有几处autocast和pytorch1.8其冲突的几个,全部删掉autocast的推理即可。
ldm/modules/diffusionmodules/util.py", line 126 注掉autocast,
ctx.gpu_autocast_kwargs = {"enabled": torch.is_autocast_enabled(),
# "dtype": _autocast_gpu_dtype(),
# 'dtype':torch.cuda.amp(),
# "cache_enabled": torch.is_autocast_cache_enabled()}
ldm/modules/attention.py", line 175,直接注掉
from share import *
import configimport cv2
import einops
# import gradio as gr
import numpy as np
import torch
import random
from PIL import Image
import timefrom pytorch_lightning import seed_everything
from annotator.util import resize_image, HWC3
from annotator.uniformer import UniformerDetector
del import create_model, load_state_dict
from cldm.ddim_hacked import DDIMSamplerapply_uniformer = UniformerDetector()model = create_model('./models/cldm_v15.yaml').cpu()
model.load_state_dict(load_state_dict('./models/control_sd15_seg.pth', location='cuda'))
model = model.cuda()
ddim_sampler = DDIMSampler(model)def process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, detect_resolution, ddim_steps, guess_mode, strength,scale, seed, eta):_grad():
# import pdb;pdb.set_trace()input_image = HWC3(input_image)detected_map = apply_uniformer(resize_image(input_image, detect_resolution))img = resize_image(input_image, image_resolution)H, W, C = img.shape# import pdb;pdb.set_trace()detected_map = size(detected_map, (W, H), interpolation=cv2.INTER_NEAREST)control = torch.from_numpy(py()).float().cuda() / 255.0control = torch.stack([control for _ in range(num_samples)], dim=0)control = arrange(control, 'b h w c -> b c h w').clone()if seed == -1:seed = random.randint(0, 65535)seed_everything(seed)if config.save_memory:model.low_vram_shift(is_diffusing=False)cond = {"c_concat": [control], "c_crossattn": [_learned_conditioning([prompt + ', ' + a_prompt] * num_samples)]}un_cond = {"c_concat": None if guess_mode else [control], "c_crossattn": [_learned_conditioning([n_prompt] * num_samples)]}shape = (4, H // 8, W // 8)if config.save_memory:model.low_vram_shift(is_diffusing=l_scales = [strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([strength] * 13) # Magic number. IDK why. Perhaps because 0.825**12<0.01 but 0.826**12>0.01samples, intermediates = ddim_sampler.sample(ddim_steps, num_samples,shape, cond, verbose=False, eta=eta,unconditional_guidance_scale=scale,unconditional_conditioning=un_cond)if config.save_memory:model.low_vram_shift(is_diffusing=False)x_samples = model.decode_first_stage(samples)x_samples = (arrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)results = [x_samples[i] for i in range(num_samples)]return [detected_map] + results# Modern style -- 现代风格
# European style -- 欧式风格
# Chinese style -- 中式风格
# Mediterranean style -- 地中海风格
# Industrial style -- 工业风格
# Nordic style -- 北欧风格
# Bohemian style -- 波西米亚风格
# living room
# Dining room
# office
# bedroom
# bathroom
# gaming roomimg = "room.png"
prompt = "Modern style "
a_prompt = "best quality, extremely detailed"
n_prompt = "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality"
num_samples = 1
image_resolution = 512
detect_resolution = 512 # 512
ddim_steps = 20
guess_mode = False
strength = 1.0
scale = 9.0
seed = 2023
eta = 0.0
input_image = np.array(Image.open(img))
start = time.time()
for i, prompt in enumerate(["Modern style", "European style", "Chinese style", "Mediterranean style", "Industrial style", "Nordic style", "Bohemian style","Italian style","French style","Cream style","wood style","Romanesque style","Japanese style","American style","Metal style","pastoral style","country style","Luxury style","Southeast Asian style","Postmodern style","National style"]):image = process(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, detect_resolution,ddim_steps, guess_mode, strength, scale, seed, eta)cv2.imwrite(f"out_seg_{prompt}.png", image[0])cv2.imwrite(f"out_{prompt}.png", image[1][:,:,::-1])
2. 更新oneformer的seg推理
hydra-core==1.1.2
detectron2:自行去git上下载detectron2的源码编译,python -m pip install -e detectron2
natten==0.14.1
编译一下oneformer的扩展模块,在controlnet/annotator/Oneformer/oneformer/modeling/pixel-decoder/ops中,直接运行python setup.py install --user.
wandb
diffdist
import torch# print("Installed the dependencies!")import numpy as np
from PIL import Image
import cv2
# import fig import get_cfg
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.data import MetadataCatalogfrom former import (add_oneformer_config,add_common_config,add_swin_config,add_dinat_config,
)from annotator.OneFormer.demo.defaults import DefaultPredictor
from annotator.OneFormer.demo.visualizer import Visualizer, ColorMode# import gradio as gr
from huggingface_hub import hf_hub_downloadfrom annotator.OneFormer.demo.visualizer import VisualizerKEY_DICT = {"Cityscapes (19 classes)": "cityscapes","COCO (133 classes)": "coco","ADE20K (150 classes)": "ade20k", }SWIN_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_swin_large_IN21k_384_bs16_90k.yaml","coco": "configs/coco/oneformer_swin_large_IN21k_384_bs16_100ep.yaml","ade20k": "configs/ade20k/oneformer_swin_large_IN21k_384_bs16_160k.yaml", }SWIN_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_swin_large",filename="250_16_swin_l_oneformer_cityscapes_90k.pth"),"coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_swin_large",filename="150_16_swin_l_oneformer_coco_100ep.pth"),"ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_swin_large",filename="250_16_swin_l_oneformer_ade20k_160k.pth")
}DINAT_CFG_DICT = {"cityscapes": "configs/cityscapes/oneformer_dinat_large_bs16_90k.yaml","coco": "configs/coco/oneformer_dinat_large_bs16_100ep.yaml","ade20k": "configs/ade20k/oneformer_dinat_large_IN21k_384_bs16_160k.yaml", }DINAT_MODEL_DICT = {"cityscapes": hf_hub_download(repo_id="shi-labs/oneformer_cityscapes_dinat_large",filename="250_16_dinat_l_oneformer_cityscapes_90k.pth"),"coco": hf_hub_download(repo_id="shi-labs/oneformer_coco_dinat_large",filename="150_16_dinat_l_oneformer_coco_100ep.pth"),"ade20k": hf_hub_download(repo_id="shi-labs/oneformer_ade20k_dinat_large",filename="250_16_dinat_l_oneformer_ade20k_160k.pth")}MODEL_DICT = {"DiNAT-L": DINAT_MODEL_DICT,"Swin-L": SWIN_MODEL_DICT}CFG_DICT = {"DiNAT-L": DINAT_CFG_DICT,"Swin-L": SWIN_CFG_DICT}WIDTH_DICT = {"cityscapes": 512,"coco": 512,"ade20k": 640}cpu_device = torch.device("cpu")PREDICTORS = {"DiNAT-L": {"Cityscapes (19 classes)": None,"COCO (133 classes)": None,"ADE20K (150 classes)": None},"Swin-L": {"Cityscapes (19 classes)": None,"COCO (133 classes)": None,"ADE20K (150 classes)": None}
}METADATA = {"DiNAT-L": {"Cityscapes (19 classes)": None,"COCO (133 classes)": None,"ADE20K (150 classes)": None},"Swin-L": {"Cityscapes (19 classes)": None,"COCO (133 classes)": None,"ADE20K (150 classes)": None}
}def setup_modules():for dataset in ["Cityscapes (19 classes)", "COCO (133 classes)", "ADE20K (150 classes)"]:for backbone in ["DiNAT-L", "Swin-L"]:cfg = setup_cfg(dataset, backbone)metadata = (cfg.DATASETS.TEST_PANOPTIC[0] if len(cfg.DATASETS.TEST_PANOPTIC) else "__unused")if 'cityscapes_fine_sem_seg_val' in cfg.DATASETS.TEST_PANOPTIC[0]:from cityscapesscripts.helpers.labels import labelsstuff_colors = [k.color for k in labels ainId != 255]metadata = metadata.set(stuff_colors=stuff_colors)PREDICTORS[backbone][dataset] = DefaultPredictor(cfg)METADATA[backbone][dataset] = metadatadef setup_cfg(dataset, backbone):# load config from file and command-line argumentscfg = get_cfg()add_deeplab_config(cfg)add_common_config(cfg)add_swin_config(cfg)add_oneformer_config(cfg)add_dinat_config(cfg)dataset = KEY_DICT[dataset]cfg_path = CFG_DICT[backbone][_from_file(cfg_path)if torch.cuda.is_available():cfg.MODEL.DEVICE = 'cuda'else:cfg.MODEL.DEVICE = 'cpu'cfg.MODEL.WEIGHTS = MODEL_DICT[backbone][dataset]cfg.freeze()return cfgsetup_modules()class OneformerDetector:def __init__(self, ):backbone = "DiNAT-L"dataset = "ADE20K (150 classes)"self.predictor = PREDICTORS[backbone][adata = METADATA[backbone][dataset]def __call__(self, img):visualizer = Visualizer(img[:, :, ::-1], metadataadata, instance_mode=ColorMode.IMAGE)predictions = self.predictor(img, "semantic")out = visualizer.draw_sem_seg(predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=0.5)visualizer_map = Visualizer(img[:, :, ::-1], is_img=False, metadataadata, instance_mode=ColorMode.IMAGE)out_map = visualizer_map.draw_sem_seg(predictions["sem_seg"].argmax(dim=0).to(cpu_device), alpha=1, is_text=False)return out_map
本文发布于:2024-02-01 14:16:05,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170676816437184.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |