| ## @package embedding_generation_benchmark |
| # Module caffe2.python.embedding_generation_benchmark |
| |
| |
| |
| |
| |
| from caffe2.proto import caffe2_pb2 |
| from caffe2.python import workspace, core, utils, model_helper |
| |
| import argparse |
| import numpy as np |
| import time |
| |
| import logging |
| |
| logging.basicConfig() |
| log = logging.getLogger("embedding_generation_benchmark") |
| log.setLevel(logging.DEBUG) |
| |
| |
| def generate_data(T, batch_size, max_seq_length): |
| ''' |
| Fill a queue with input data |
| ''' |
| log.info("Generating T={} batches".format(T)) |
| |
| generate_input_init_net = core.Net('generate_input_init') |
| queue = generate_input_init_net.CreateBlobsQueue( |
| [], "inputqueue", num_blobs=1, capacity=T, |
| ) |
| workspace.RunNetOnce(generate_input_init_net) |
| |
| generate_input_net = core.Net('generate_input') |
| generate_input_net.EnqueueBlobs([queue, "scratch"], ["scratch"]) |
| np.random.seed(2603) |
| |
| for t in range(T): |
| if (t % (max(10, T // 10)) == 0): |
| log.info("Generating data {}/{}".format(t, T)) |
| X = np.tile(np.arange(max_seq_length), [batch_size, 1]).transpose() |
| workspace.FeedBlob("scratch", X) |
| workspace.RunNetOnce(generate_input_net.Proto()) |
| |
| log.info("Finished data generation") |
| return queue |
| |
| |
| def generate_embedding_table(vocab_size, embedding_size): |
| log.info("Generating embedding table with dimensions {}" |
| .format([vocab_size, embedding_size])) |
| |
| generate_table_net = core.Net('generate_table') |
| table = generate_table_net.GaussianFill( |
| [], |
| ['embedding_table'], |
| shape=[vocab_size, embedding_size], |
| ) |
| |
| workspace.RunNetOnce(generate_table_net) |
| return table |
| |
| |
| def create_model(args, queue, embedding_table, embedding_size): |
| model = model_helper.ModelHelper(name='embedding_generation_bench') |
| input_blob = model.net.DequeueBlobs(queue, 'input_data') |
| |
| if args.implementation == 'sinusoid': |
| model.net.SinusoidPositionEncoding( |
| [input_blob], |
| ['output'], |
| embedding_size=embedding_size |
| ) |
| else: |
| model.net.Gather( |
| [embedding_table, input_blob], |
| ['output'], |
| ) |
| |
| return model |
| |
| |
| def Caffe2EmbeddingGeneration(args): |
| T = args.data_size // args.batch_size |
| |
| queue = generate_data(T, args.batch_size, args.seq_length) |
| |
| embedding_table = None |
| if args.implementation == 'table': |
| embedding_table = generate_embedding_table( |
| args.seq_length, |
| args.embedding_size, |
| ) |
| |
| model = create_model(args, queue, embedding_table, args.embedding_size) |
| |
| workspace.RunNetOnce(model.param_init_net) |
| workspace.CreateNet(model.net) |
| |
| start_time = time.time() |
| num_iters = T |
| total_iters = 0 |
| |
| # Run the Benchmark |
| log.info("------ Warming up ------") |
| workspace.RunNet(model.net.Proto().name) |
| |
| log.info("------ Starting benchmark ------") |
| start_time = time.time() |
| last_time = time.time() |
| for iteration in range(1, num_iters, args.iters_to_report): |
| iters_once = min(args.iters_to_report, num_iters - iteration) |
| total_iters += iters_once |
| workspace.RunNet(model.net.Proto().name, iters_once) |
| |
| new_time = time.time() |
| log.info( |
| "Iter: {} / {}. Embeddings Generated Per Second: {}k.".format( |
| iteration, |
| num_iters, |
| (iters_once * args.batch_size * args.seq_length) / |
| (new_time - last_time) // 100 / 10, |
| ) |
| ) |
| last_time = new_time |
| |
| total_per_sec = (num_iters - 1) * args.batch_size * args.seq_length |
| total_per_sec = total_per_sec / (time.time() - start_time) // 100 / 10 |
| |
| log.info("Done. Total embeddings generated per second " + |
| "excluding 1st iteration: {}k".format(total_per_sec)) |
| |
| return time.time() - start_time |
| |
| |
| @utils.debug |
| def Benchmark(args): |
| return Caffe2EmbeddingGeneration(args) |
| |
| |
| def GetArgumentParser(): |
| parser = argparse.ArgumentParser( |
| description="Embedding generation benchmark." |
| ) |
| |
| parser.add_argument( |
| "--embedding_size", |
| type=int, |
| default=512, |
| help="Embedding size", |
| ) |
| parser.add_argument( |
| "--batch_size", |
| type=int, |
| default=16, |
| help="The batch size." |
| ) |
| parser.add_argument( |
| "--data_size", |
| type=int, |
| default=10000, |
| help="Number of sequences to generate" |
| ) |
| parser.add_argument( |
| "--seq_length", |
| type=int, |
| default=128, |
| help="Max sequence length" |
| ) |
| parser.add_argument( |
| "--iters_to_report", |
| type=int, |
| default=20, |
| help="Number of iterations to report progress" |
| ) |
| parser.add_argument( |
| "--implementation", |
| type=str, |
| default="sinusoid", |
| help="'table' or 'sinusoid'", |
| ) |
| return parser |
| |
| |
| if __name__ == '__main__': |
| args, extra_args = GetArgumentParser().parse_known_args() |
| |
| workspace.GlobalInit([ |
| 'caffe2', |
| '--caffe2_log_level=0', |
| '--caffe2_print_blob_sizes_at_exit=0'] + extra_args) |
| |
| device = core.DeviceOption(caffe2_pb2.CPU) |
| |
| with core.DeviceScope(device): |
| Benchmark(args) |