blob: 861a6c5d43740214a32e97b22c17da5d0dbf1b33 [file] [log] [blame]
syntax = "proto2";
package caffe2;
// A few notes about the Caffe2's protobuffer convention:
// (1) Most objects are registered by their types, such as operators and nets.
// For these, we have a string-type field "type" for registration purposes.
// (2) We do not use extension because that used to create quite some conflicts
// in Caffe's protobuf design.
// (3) We have not used any proto3 specific features, such as Any or Map. This
// is mainly for backward compatibility purposes but we may consider using
// those in the future.
// TensorProto stores serialized Tensor objects.
message TensorProto {
// The dimensions in the tensor.
repeated int64 dims = 1;
// Data type
enum DataType {
UNDEFINED = 0;
// Basic types
FLOAT = 1; // float
INT32 = 2; // int
BYTE = 3; // byte, when deserialized, is going to be restored as uint8
STRING = 4; // string
// Less-commonly used data types
BOOL = 5; // bool
UINT8 = 6; // uint8_t
INT8 = 7; // int8_t
UINT16 = 8; // uint16_t
INT16 = 9; // int16_t
INT64 = 10; // int64_t
FLOAT16 = 12; // at::Half
DOUBLE = 13; // double
ZERO_COLLISION_HASH = 14; // zero-collision hash state
REBATCHING_BUFFER = 15; // rebatching buffer
}
// The type of the deserialized tensor data
optional DataType data_type = 2 [ default = FLOAT ];
// The format of the serialized data.
enum SerializationFormat {
// FMT_PROTOBUF is the existing serialization format from before the
// data_format field was introduced. Most data types are serialized using
// the protobuf typed fields, although in some cases raw little endian data
// is stored in the byte_data field instead.
FMT_PROTOBUF = 0;
// bfloat16 data stored in the raw_data field.
FMT_BFLOAT16 = 1;
}
// data_format is a SerializationFormat enum value.
// However, we intentionally store it as an integer value so we can
// distinguish between old messages that do not have a data_format value vs
// new messages that have a SerializationFormat value that we don't
// understand. If we stored this as an enum then protobuf would deserialize
// both of these cases the same way.
optional uint32 data_format = 15 [ default = 0 ];
// For float
repeated float float_data = 3 [ packed = true ];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
// Note: storing int8 and uint8 values in this field unfortunately results in
// larger serialized data than necessary, as protobuf's varint encoding
// scheme requires 2 bytes to represent int8 and uint8 values that have the
// MSB set.
repeated int32 int32_data = 4 [ packed = true ];
// For bytes
optional bytes byte_data = 5;
// For strings
repeated bytes string_data = 6;
// For double
repeated double double_data = 9 [ packed = true ];
// For int64
repeated int64 int64_data = 10 [ packed = true ];
// store the raw data, contents are serialized as little-endian
optional bytes raw_data = 13;
// Optionally, a name for the tensor.
optional string name = 7;
// Optionally, a TensorProto can contain the details about the device that
// it was serialized from. This is useful in cases like snapshotting a whole
// workspace in a multi-GPU environment.
optional DeviceOption device_detail = 8;
// When loading from chunks this is going to indicate where to put data in the
// full array. When not used full data have to be present
message Segment {
required int64 begin = 1;
required int64 end = 2;
}
optional Segment segment = 11;
// Field numbers 12 and 14 were previously used for now-deprecated fields.
// reserved 12, 14;
}
message QTensorProto {
repeated int64 dims = 1;
required int32 precision = 2;
required double scale = 3;
required double bias = 4;
required bool is_signed = 5;
repeated int32 data = 6 [ packed = true ];
optional string name = 7;
optional TensorProto.DataType data_type = 8 [ default = INT32 ];
// Multi-group quantization params
repeated double scales = 9;
repeated double biases = 10;
// Multi-group quantization needed, indicates in which dimension
// we do the "group wise quantization"
optional int32 axis = 11;
// It should be true if it is a multi-group quantization proto
optional bool is_multiparam = 12 [ default = false ];
}
// TensorProtos stores multiple TensorProto objects in one single proto. This
// is useful for small tensors; For anything big, consider using a DB for
// storage.
message TensorProtos {
repeated TensorProto protos = 1;
}
message TensorShape {
repeated int64 dims = 1;
optional TensorProto.DataType data_type = 2 [ default = FLOAT ];
repeated int32 unknown_dims = 3;
optional bool unknown_shape = 4 [ default = false ];
optional string name = 5;
}
message TensorShapes {
repeated TensorShape shapes = 1;
}
// TensorBoundShape is used to save bound shape inference result for a tensor.
// TensorBoundShape.shape is inferred shape for this tensor.
// TensorBoundShape.dimType contains dim_type for every dimension.
// eg: for dimension i, shape.dims[i] is the inferred shape and
// dim_type[i] is corresponding dim_type.
message TensorBoundShape {
optional TensorShape shape = 1;
enum DimType {
UNKNOWN = 0; // unknown
CONSTANT = 1; // constant
// batch, corresponding dimension is batch_size
BATCH = 2;
// batch_of_feature_max,
// corresponding shape is inferred_feature_length * batch_size
BATCH_OF_FEATURE_MAX = 3;
// batch_of_feature_max_default
// corresponding shape is default_feature_length * batch_size
BATCH_OF_FEATURE_MAX_DEFAULT = 4;
// feature_max, corresponding shape is inferred_feature_length
FEATURE_MAX = 5;
// feature_max_default, corresponding shape is default_feature_length
FEATURE_MAX_DEFAULT = 6;
}
repeated DimType dim_type = 2; // dim_type.size() == shape.dims.size()
optional string name = 3;
// a flag to indicate whether the shape is final and cannot be changed
// eg: input/output of in-place ops
optional bool shape_is_final = 4;
}
message TensorBoundShapes {
repeated TensorBoundShape shapes = 1;
optional int64 max_batch_size = 2;
optional int64 max_feature_len = 3;
}
message AOTConfig {
required int64 max_batch_size = 1;
required int64 max_seq_size = 2;
required bool in_batch_broadcast = 3;
optional string onnxifi_blacklist_ops = 4;
optional int32 onnxifi_min_ops = 5;
}
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
optional string name = 1;
optional float f = 2;
optional int64 i = 3;
optional bytes s = 4;
optional TensorProto t = 10;
optional NetDef n = 8;
repeated float floats = 5;
repeated int64 ints = 6;
repeated bytes strings = 7;
repeated TensorProto tensors = 11;
repeated NetDef nets = 9;
repeated QTensorProto qtensors = 12;
}
// DeviceType that Caffe2 currently supports.
// Note: if you add a device type, make sure you add the corresponding device
// line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
// and update c10/core/DeviceType.h
enum DeviceTypeProto {
PROTO_CPU = 0; // In default, we will use CPU.
PROTO_CUDA = 1; // CUDA.
PROTO_MKLDNN = 2; // Reserved for explicit MKLDNN
PROTO_OPENGL = 3; // OpenGL
PROTO_OPENCL = 4; // OpenCL
PROTO_IDEEP = 5; // IDEEP.
PROTO_HIP = 6; // AMD HIP
PROTO_FPGA = 7; // FPGA
PROTO_ORT = 8; // ONNX Runtime
PROTO_XLA = 9; // XLA / TPU
PROTO_MPS = 10; // MPS
// Change the following number if you add more devices in the code.
PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 11;
}
// Device-specific options. We do not distinguish DeviceOption protos for
// different DeviceTypes, so currently all devices share the same DeviceOption
// proto. Fields that are specific to a device type is ignored if the type does
// not match.
// Note: if you add fields to the DeviceOption, make sure you add the
// corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
message DeviceOption {
// [general] Options that need to be carried out before running the execution.
// optional DeviceType device_type = 1 [ default = CPU ];
optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
// [general] Used together with device_type to identify the exact device
optional int32 device_id = 2;
// [general] The random seed to start the device random number generator with.
optional uint32 random_seed = 3;
// [general] What node this op should execute on.
// Used for net transformation purposes. Must be empty at execution time.
optional string node_name = 4;
// [CPU and Linux specific] NUMA node id
optional int32 numa_node_id = 5;
// [general] Extra information passed, not used at execution time currently.
repeated string extra_info = 6;
}
// Operator Definition.
message OperatorDef {
repeated string input = 1; // the name of the input blobs
repeated string output = 2; // the name of output top blobs
optional string name = 3; // the operator name. This is optional.
// the operator type. This is needed to create the object from the operator
// registry.
optional string type = 4;
// arg is for the argument defined in operator schema
repeated Argument arg = 5;
// The device option that the operator should run under.
optional DeviceOption device_option = 6;
// Optionally, one can specify an engine when there are multiple
// implementations available simultaneously for one device type.
// If one specifies an engine but that engine does not exist in the compiled
// Caffe2 binary, Caffe2 will fall back to the default engine of that device
// type.
optional string engine = 7;
// Additional 'fake' inputs used for expressing control dependencies
// in the operator graph. This can be used to ensure that an
// operator does not run until another operator is ready, for e.g.
// scheduling control. These are not passed as actual inputs to the
// Operator implementation, and are only used by the Net class for
// scheduling purposes.
repeated string control_input = 8;
// is_gradient_op argument is only used as a hint in shape inference
// and has no runtime significance
optional bool is_gradient_op = 9 [ default = false ];
// debug information associated with the construction of the operator.
// This is an optional string with no assumed characteristics as
// operators can be constructed in any language.
optional string debug_info = 10;
// the domain of the operator to help runtime distinguish which operator
// library this OperatorDef refers to. For example, both caffe2 and aten
// has `Add` operator, with domain, we can easily decide which operator
// to execute. to support multiple operator libs, we use domain to
// distinguish which operator lib we refer to:
// - "caffe2" means this uses Caffe2 operator library
// - "aten" means this uses ATen operator library
// - "c10" is for the fused library
// - if the domain is missing or empty, we use "caffe2", this is for
// legacy models, new serializer should always export an OperatorDef
// with domain and op_version
optional string domain = 11;
// each operator is has its own version number.
// operator version information
// each time, we change the API or semantics of the operator,
// we bump the version for the operator.
// the runtime system should check the op_version of each OperatorDef
// and decide it should reject or accept the model
optional int64 op_version = 12;
}
// MapFieldEntry follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message MapFieldEntry {
required string key = 1;
required string val = 2;
};
// Used to hold backend-specific options.
message BackendOptions {
// Name of the backend that the specified options apply to.
required string backend_name = 1;
// Flexible map for passing in the options.
repeated MapFieldEntry option = 2;
};
// Partition definition.
message PartitionInfo {
// Name of the partition.
required string name = 1;
// A list of logic device ID, indicating which devices this partition
// can be executed on. If empty, it means the partition won't run on
// device but on host CPU instead.
repeated int32 device_id = 2;
// Extra debug info.
optional string extra_info = 3;
// Flexible map for passing options specific to a backend.
repeated BackendOptions backend_options = 4;
}
// Network definition.
message NetDef {
optional string name = 1; // the network's name
// Operators that the network contains.
// Note: this is not named "operator" because that is a reserved word in C++.
repeated OperatorDef op = 2;
// The type of network that the net should be run with. This routes the
// network instantiation to different execution modes. The default mode,
// "simple", runs the operators in a sequential way as the original Caffe
// implementation does.
optional string type = 3;
// the number of workers, if the operators in the network is to be carried out
// in parallel.
// Note: This is to be deprecated. Using the arg field with "num_workers" as
// key.
// Note 2: The old uses of this were never actually cleaned up
optional int32 num_workers = 4;
// The device option for the network. If a network has a specific device
// option and one of its operators does not have it set, we will copy over the
// device option to the operator. This allows us to basically avoid putting
// device options at every operator.
optional DeviceOption device_option = 5;
repeated Argument arg = 6;
// Two optional fields to declare external input and output of a net.
// If these two are set, when a net is created, we will sanity check for
// every op whether its input is declared (either as an external input,
// or as an intermediate blob created by one of the ops), and sanity check
// if all blobs in external_output are produced.
//
// In cases of memory optimization, declaring external_input and
// external_output also ensures that storage of these blobs are persistent:
// for any blob in external_input and external_output, after a network run
// finishes, their content are actually the right content. Any intermediate
// blobs' contents may be overwritten.
repeated string external_input = 7;
repeated string external_output = 8;
// Partitioning info, indexed by partition names.
repeated PartitionInfo partition_info = 9;
}
// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
// now.
message ExecutionStep {
// ExecutionStep should either contain a set of substeps, or a set of
// network names to run in this execution step. They should NOT both be set
// at the same time.
optional string name = 1;
// An execution step could be recursive, in which it involves a set of
// substeps.
repeated ExecutionStep substep = 2;
// Alternatively, an execution step could involve one or more networks.
// Note that you cannot have both substeps and networks. Choose one.
// Note that an execution step refers networks by their name. The actual
// network definition of the same name should be included in the network field
// of the plan. The reason is that a network object might hold internal states
// (think of a data layer), so we want to have the same network object that
// multiple steps could ask to run.
repeated string network = 3;
// Number of iterations to run this step. The substeps or the networks
// specified will be run sequentially, and one sequential run is considered
// one iteration. If this is not set, the number of iterations is assumed to
// be 1.
optional int64 num_iter = 4;
// Criteria network specifies a single output (TensorCPU<bool>) of
// size (1), is run on every iteration by the executor, and
// execution terminates when the output[0] is `false`.
optional string criteria_network = 5 [ deprecated = true ];
// DEPRECATED. Use `run_every_ms`.
optional string report_net = 7;
optional int32 report_interval = 8;
// If provided, execute this step at every time interval (in millisecs)
// while its sibiling execution steps execute in parallel. This step is
// guaranteed to run at least once after all non-interval siblings finished.
optional int64 run_every_ms = 11;
// If false or not set, execute sub-steps serially.
// If true, execute all substeps concurrently, each one in a separate thread.
optional bool concurrent_substeps = 6;
// Name of a scalar boolean tensor.
// ES checks this blob AFTER every substeps/subnets.
// If specified, and the value is true, then ES will skip the rest and return
// immediately.
// This means that the report_net and the first step will always be called.
// Use cases:
// 1) the first substep stops the rest if data condition not met
// 2) the first substep decide which of the rest of the steps should be run.
// 3) external control
//
// ** It is the user's responsibility to not to put this blob in race
// conditions.
// ** For example when setting this blob in concurrent substeps
optional string should_stop_blob = 9;
// if only_once is true, this step will only be executed once. this ONLY takes
// effect when using should_stop_blob
optional bool only_once = 10;
// Whether to create a child workspace for this step.
// If yes, the workflow and nets are re-created every time this step is run.
optional bool create_workspace = 12;
// How many copies of the children execution steps to run concurrently.
optional int32 num_concurrent_instances = 13;
}
message PlanDef {
// All the networks that are used in this execution. Note that networks should
// be ordered in the way they are executed, i.e. for a layer in a network, all
// its input blobs should already have been initialized by the layers or
// networks defined before it.
optional string name = 1;
// The networks that are going to be used in this plan.
repeated NetDef network = 2;
repeated ExecutionStep execution_step = 3;
}
// Protobuf format for blobs that are not Tensors. We use a key to store the
// type of the blob. For example for a serialized DBProto, the type should
// be "DBReader" and the content should be a serialized DBProto object.
message BlobProto {
optional string name = 1;
optional string type = 2;
optional TensorProto tensor = 3;
optional bytes content = 4;
optional QTensorProto qtensor = 5;
// If blob is not Tensor and is divided into chunks, content_num_chunks
// contains number of chunks, into which blob was divided.
optional int32 content_num_chunks = 6;
optional int32 content_chunk_id = 7;
}
// Protobuf format to serialize DBReader.
message DBReaderProto {
// The name for the DB object in the workspace.
optional string name = 1;
// The source of the DB
optional string source = 2;
// The type of the DB
optional string db_type = 3;
// The current key of the DB if the DB supports seeking.
optional string key = 4;
}
message BlobSerializationOptions {
// This set of options will only apply to blobs whose name matches this
// pattern. If the blob_name_pattern is empty then it will be treated as
// matching all blobs.
optional string blob_name_regex = 1;
// Note:
// - a chunk_size of 0 means "use the default chunk size". The default chunk
// size is controlled by the --caffe2_tensor_chunk_size command line flag.
// - a chunk size of -1 means to disable chunking, and serialize the blob in
// a single chunk.
optional int64 chunk_size = 2;
enum FloatFormat {
// Use the current default serialization format, as chosen by the
// current version of the code. (At the time of writing this is PROTOBUF)
FLOAT_DEFAULT = 0;
// Store the data in the TensorProto's float_data field
FLOAT_PROTOBUF = 1;
// Serialize float values as bfloat16. Note that this conversion is lossy.
FLOAT_BFLOAT16 = 2;
}
// Settings for how to serialize tensors containing float values
optional FloatFormat float_format = 3;
}
message SerializationOptions {
// A set of options to use when serialializing blobs.
// This is a list, sorted from highest to lowest precedence. When
// serializing a blob, the first entry whose blob_name_pattern matches the
// blob name will be used.
repeated BlobSerializationOptions options = 1;
}