blob: b79e0bf0002df39ddf41c2e329aabb5876ea063b [file] [log] [blame]
#! /usr/bin/env python3
import argparse
import glob
import json
import os
import shutil
import tarfile
import tempfile
from urllib.request import urlretrieve
import boto3
import caffe2.python.onnx.backend
import caffe2.python.onnx.frontend
import caffe2.python.workspace as c2_workspace
import numpy as np
import onnx
import onnx.backend
from caffe2.proto import caffe2_pb2
from caffe2.python.models.download import (
deleteDirectory,
downloadFromURLToFile,
getURLFromName,
)
from onnx import numpy_helper
"""A script converting Caffe2 models to ONNX, and updating ONNX model zoos.
Arguments:
-v, verbose
--local-dir, where we store the ONNX and Caffe2 models
--no-cache, ignore existing models in local-dir
--clean-test-data, delete all the existing test data when updating ONNX model zoo
--add-test-data, add add-test-data sets of test data for each ONNX model
--only-local, run locally (for testing purpose)
Examples:
# store the data in /home/username/zoo-dir, delete existing test data, ignore local cache,
# and generate 3 sets of new test data
python update-caffe2-models.py --local-dir /home/username/zoo-dir --clean-test-data --no-cache --add-test-data 3
"""
# TODO: Add GPU support
def upload_onnx_model(model_name, zoo_dir, backup=False, only_local=False):
if only_local:
print("No uploading in local only mode.")
return
model_dir = os.path.join(zoo_dir, model_name)
suffix = "-backup" if backup else ""
if backup:
print(f"Backing up the previous version of ONNX model {model_name}...")
rel_file_name = f"{model_name}{suffix}.tar.gz"
abs_file_name = os.path.join(zoo_dir, rel_file_name)
print(f"Compressing {model_name} model to {abs_file_name}")
with tarfile.open(abs_file_name, "w:gz") as f:
f.add(model_dir, arcname=model_name)
file_size = os.stat(abs_file_name).st_size
print(
f"Uploading {abs_file_name} ({float(file_size) / 1024 / 1024} MB) to s3 cloud..."
)
client = boto3.client("s3", "us-east-1")
transfer = boto3.s3.transfer.S3Transfer(client)
transfer.upload_file(
abs_file_name,
"download.onnx",
f"models/latest/{rel_file_name}",
extra_args={"ACL": "public-read"},
)
print(f"Successfully uploaded {rel_file_name} to s3!")
def download_onnx_model(model_name, zoo_dir, use_cache=True, only_local=False):
model_dir = os.path.join(zoo_dir, model_name)
if os.path.exists(model_dir):
if use_cache:
upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
return
else:
shutil.rmtree(model_dir)
url = f"https://s3.amazonaws.com/download.onnx/models/latest/{model_name}.tar.gz"
download_file = tempfile.NamedTemporaryFile(delete=False)
try:
download_file.close()
print(
f"Downloading ONNX model {model_name} from {url} and save in {download_file.name} ...\n"
)
urlretrieve(url, download_file.name)
with tarfile.open(download_file.name) as t:
print(f"Extracting ONNX model {model_name} to {zoo_dir} ...\n")
t.extractall(zoo_dir)
except Exception as e:
print(f"Failed to download/backup data for ONNX model {model_name}: {e}")
if not os.path.exists(model_dir):
os.makedirs(model_dir)
finally:
os.remove(download_file.name)
if not only_local:
upload_onnx_model(model_name, zoo_dir, backup=True, only_local=only_local)
def download_caffe2_model(model_name, zoo_dir, use_cache=True):
model_dir = os.path.join(zoo_dir, model_name)
if os.path.exists(model_dir):
if use_cache:
return
else:
shutil.rmtree(model_dir)
os.makedirs(model_dir)
for f in ["predict_net.pb", "init_net.pb", "value_info.json"]:
url = getURLFromName(model_name, f)
dest = os.path.join(model_dir, f)
try:
try:
downloadFromURLToFile(url, dest, show_progress=False)
except TypeError:
# show_progress not supported prior to
# Caffe2 78c014e752a374d905ecfb465d44fa16e02a28f1
# (Sep 17, 2017)
downloadFromURLToFile(url, dest)
except Exception as e:
print(f"Abort: {e}")
print("Cleaning up...")
deleteDirectory(model_dir)
raise
def caffe2_to_onnx(caffe2_model_name, caffe2_model_dir):
caffe2_init_proto = caffe2_pb2.NetDef()
caffe2_predict_proto = caffe2_pb2.NetDef()
with open(os.path.join(caffe2_model_dir, "init_net.pb"), "rb") as f:
caffe2_init_proto.ParseFromString(f.read())
caffe2_init_proto.name = f"{caffe2_model_name}_init"
with open(os.path.join(caffe2_model_dir, "predict_net.pb"), "rb") as f:
caffe2_predict_proto.ParseFromString(f.read())
caffe2_predict_proto.name = caffe2_model_name
with open(os.path.join(caffe2_model_dir, "value_info.json"), "rb") as f:
value_info = json.loads(f.read())
print(
f"Converting Caffe2 model {caffe2_model_name} in {caffe2_model_dir} to ONNX format"
)
onnx_model = caffe2.python.onnx.frontend.caffe2_net_to_onnx_model(
init_net=caffe2_init_proto,
predict_net=caffe2_predict_proto,
value_info=value_info,
)
return onnx_model, caffe2_init_proto, caffe2_predict_proto
def tensortype_to_ndarray(tensor_type):
shape = []
for dim in tensor_type.shape.dim:
shape.append(dim.dim_value)
if tensor_type.elem_type == onnx.TensorProto.FLOAT:
type = np.float32
elif tensor_type.elem_type == onnx.TensorProto.INT:
type = np.int32
else:
raise
array = np.random.rand(*shape).astype(type)
return array
def generate_test_input_data(onnx_model, scale):
real_inputs_names = list(
{input.name for input in onnx_model.graph.input}
- {init.name for init in onnx_model.graph.initializer}
)
real_inputs = []
for name in real_inputs_names:
for input in onnx_model.graph.input:
if name == input.name:
real_inputs.append(input)
test_inputs = []
for input in real_inputs:
ndarray = tensortype_to_ndarray(input.type.tensor_type)
test_inputs.append((input.name, ndarray * scale))
return test_inputs
def generate_test_output_data(caffe2_init_net, caffe2_predict_net, inputs):
p = c2_workspace.Predictor(caffe2_init_net, caffe2_predict_net)
inputs_map = {input[0]: input[1] for input in inputs}
output = p.run(inputs_map)
c2_workspace.ResetWorkspace()
return output
def onnx_verify(onnx_model, inputs, ref_outputs):
prepared = caffe2.python.onnx.backend.prepare(onnx_model)
onnx_inputs = []
for input in inputs:
if isinstance(input, tuple):
onnx_inputs.append(input[1])
else:
onnx_inputs.append(input)
onnx_outputs = prepared.run(inputs=onnx_inputs)
np.testing.assert_almost_equal(onnx_outputs, ref_outputs, decimal=3)
model_mapping = {
"bvlc_alexnet": "bvlc_alexnet",
"bvlc_googlenet": "bvlc_googlenet",
"bvlc_reference_caffenet": "bvlc_reference_caffenet",
"bvlc_reference_rcnn_ilsvrc13": "bvlc_reference_rcnn_ilsvrc13",
"densenet121": "densenet121",
#'finetune_flickr_style': 'finetune_flickr_style',
"inception_v1": "inception_v1",
"inception_v2": "inception_v2",
"resnet50": "resnet50",
"shufflenet": "shufflenet",
"squeezenet": "squeezenet_old",
#'vgg16': 'vgg16',
"vgg19": "vgg19",
"zfnet512": "zfnet512",
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Update the ONNX models.")
parser.add_argument("-v", action="store_true", default=False, help="verbose")
parser.add_argument(
"--local-dir",
type=str,
default=os.path.expanduser("~"),
help="local dir to store Caffe2 and ONNX models",
)
parser.add_argument(
"--no-cache",
action="store_true",
default=False,
help="whether use local ONNX models",
)
parser.add_argument(
"--clean-test-data",
action="store_true",
default=False,
help="remove the old test data",
)
parser.add_argument(
"--add-test-data", type=int, default=0, help="add new test data"
)
parser.add_argument(
"--only-local",
action="store_true",
default=False,
help="no upload including backup",
)
args = parser.parse_args()
delete_test_data = args.clean_test_data
add_test_data = args.add_test_data
use_cache = not args.no_cache
only_local = args.only_local
root_dir = args.local_dir
caffe2_zoo_dir = os.path.join(root_dir, ".caffe2", "models")
onnx_zoo_dir = os.path.join(root_dir, ".onnx", "models")
for onnx_model_name in model_mapping:
c2_model_name = model_mapping[onnx_model_name]
print(
f"####### Processing ONNX model {onnx_model_name} ({c2_model_name} in Caffe2) #######"
)
download_caffe2_model(c2_model_name, caffe2_zoo_dir, use_cache=use_cache)
download_onnx_model(
onnx_model_name, onnx_zoo_dir, use_cache=use_cache, only_local=only_local
)
onnx_model_dir = os.path.join(onnx_zoo_dir, onnx_model_name)
if delete_test_data:
print("Deleting all the existing test data...")
# NB: For now, we don't delete the npz files.
# for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')):
# os.remove(f)
for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
shutil.rmtree(f)
onnx_model, c2_init_net, c2_predict_net = caffe2_to_onnx(
c2_model_name, os.path.join(caffe2_zoo_dir, c2_model_name)
)
print(f"Deleteing old ONNX {onnx_model_name} model...")
for f in glob.glob(os.path.join(onnx_model_dir, "model*".format())):
os.remove(f)
print(f"Serializing generated ONNX {onnx_model_name} model ...")
with open(os.path.join(onnx_model_dir, "model.onnx"), "wb") as file:
file.write(onnx_model.SerializeToString())
print(f"Verifying model {onnx_model_name} with ONNX model checker...")
onnx.checker.check_model(onnx_model)
total_existing_data_set = 0
print(f"Verifying model {onnx_model_name} with existing test data...")
for f in glob.glob(os.path.join(onnx_model_dir, "*.npz")):
test_data = np.load(f, encoding="bytes")
inputs = list(test_data["inputs"])
ref_outputs = list(test_data["outputs"])
onnx_verify(onnx_model, inputs, ref_outputs)
total_existing_data_set += 1
for f in glob.glob(os.path.join(onnx_model_dir, "test_data_set*")):
inputs = []
inputs_num = len(glob.glob(os.path.join(f, "input_*.pb")))
for i in range(inputs_num):
tensor = onnx.TensorProto()
with open(os.path.join(f, f"input_{i}.pb"), "rb") as pf:
tensor.ParseFromString(pf.read())
inputs.append(numpy_helper.to_array(tensor))
ref_outputs = []
ref_outputs_num = len(glob.glob(os.path.join(f, "output_*.pb")))
for i in range(ref_outputs_num):
tensor = onnx.TensorProto()
with open(os.path.join(f, f"output_{i}.pb"), "rb") as pf:
tensor.ParseFromString(pf.read())
ref_outputs.append(numpy_helper.to_array(tensor))
onnx_verify(onnx_model, inputs, ref_outputs)
total_existing_data_set += 1
starting_index = 0
while os.path.exists(
os.path.join(onnx_model_dir, f"test_data_set_{starting_index}")
):
starting_index += 1
if total_existing_data_set == 0 and add_test_data == 0:
add_test_data = 3
total_existing_data_set = 3
print(f"Generating {add_test_data} sets of new test data...")
for i in range(starting_index, add_test_data + starting_index):
data_dir = os.path.join(onnx_model_dir, f"test_data_set_{i}")
os.makedirs(data_dir)
inputs = generate_test_input_data(onnx_model, 255)
ref_outputs = generate_test_output_data(c2_init_net, c2_predict_net, inputs)
onnx_verify(onnx_model, inputs, ref_outputs)
for index, input in enumerate(inputs):
tensor = numpy_helper.from_array(input[1])
with open(os.path.join(data_dir, f"input_{index}.pb"), "wb") as file:
file.write(tensor.SerializeToString())
for index, output in enumerate(ref_outputs):
tensor = numpy_helper.from_array(output)
with open(os.path.join(data_dir, f"output_{index}.pb"), "wb") as file:
file.write(tensor.SerializeToString())
del onnx_model
del c2_init_net
del c2_predict_net
upload_onnx_model(
onnx_model_name, onnx_zoo_dir, backup=False, only_local=only_local
)
print("\n\n")