src/xnnpack/subgraph.h - platform/external/XNNPACK - Git at Google

 // Copyright 2020 Google LLC
 //
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.

 #pragma once

 #include <stddef.h>
 #include <stdint.h>

 #include <xnnpack.h>
 #include <xnnpack/common.h>
 #include <xnnpack/cache.h>
 #include <xnnpack/node-type.h>

 #if defined(EMSCRIPTEN)
 #include <emscripten/emscripten.h>
 #elif XNN_PLATFORM_WINDOWS
 #include <windows.h>
 #else
 #include <time.h>
 #endif

 #define XNN_MAX_INPUTS 4
 #define XNN_MAX_OUTPUTS 4

 #define XNN_MAX_RUNTIME_INPUTS 4
 #define XNN_MAX_RUNTIME_OUTPUTS 4

 #define XNN_INVALID_NODE_ID UINT32_MAX

 #define XNN_MAX_OPERATOR_OBJECTS 4

 /// Disable fusion of nodes in subgraph. Fusion is enabled by default, set this flag to turn it off.
 #define XNN_FLAG_NO_OPERATOR_FUSION 0x80000000

 #ifdef __cplusplus
 extern "C" {
 #endif

 struct xnn_shape {
   size_t num_dims;
   size_t dim[XNN_MAX_TENSOR_DIMS];
 };

 enum xnn_value_type {
   xnn_value_type_invalid = 0,
   xnn_value_type_dense_tensor = 1,
 };

 enum xnn_layout_type {
   xnn_layout_type_nhwc = 0,
   xnn_layout_type_nchw = 1,
 };

 /// Abstraction for a collections of elements produced and consumed by nodes.
 struct xnn_value {
   /// Unique ID for the value.
   uint32_t id;
   /// Type of the collection of elements.
   ///
   /// Currently only dense tensors are supported.
   /// Other types (e.g. sparse tensors) might be supported in the future.
   enum xnn_value_type type;
   /// Type of elements in the collection.
   enum xnn_datatype datatype;
   /// Per-value quantization parameters.
   struct {
     /// Offset from zero of the quantized elements.
     int32_t zero_point;
     union {
       /// Multiplication factor to convert quantized elements to real representation.
       float scale;
       struct {
         /// Per-channel multiplication factor to convert quantized elements to real representation.
         const float* channelwise_scale;
         /// Index of the channel dimension with per-channel quantization parameters.
         size_t channel_dimension;
       };
     };
   } quantization;
   /// Tensor shape.
   struct xnn_shape shape;
   /// Binary features of the tensor. Supported values are any combination of:
   /// - XNN_VALUE_FLAG_EXTERNAL_INPUT
   /// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
   uint32_t flags;
   /// Static initialization data. Must be null for non-static values.
   const void* data;
   /// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input.
   uint32_t producer;
   /// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the
   /// graph (e.g. Value is an external output).
   uint32_t first_consumer;
   /// Number of Nodes that consume the value.
   /// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times.
   /// If the Value is an external output, it counts as having an extra consumer.
   uint32_t num_consumers;
   uint32_t num_nchw_compatible_consumers;
   enum xnn_layout_type layout;
   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
   /// Indicates that this value should be converted to FP16.
   bool fp16_compatible;
   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
   /// Indicates Value ID of the FP16 variant of this Value.
   uint32_t fp16_id;
   /// Set during analysis in xnn_subgraph_rewrite_for_fp16.
   /// Indicates Value ID of the FP32 variant of this Value.
   uint32_t fp32_id;
 };


 XNN_INLINE bool xnn_value_is_external(const struct xnn_value* value) {
   return (value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT | XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0;
 }

 XNN_INLINE bool xnn_value_is_external_output(const struct xnn_value* value) {
   return (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) != 0;
 }

 XNN_INLINE bool xnn_value_is_external_input(const struct xnn_value* value) {
   return (value->flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) != 0;
 }

 enum xnn_allocation_type {
   xnn_allocation_type_invalid = 0,
   /// Static data that is provided by caller, needs to outlive the xnn_runtime.
   xnn_allocation_type_static,
   /// Lives in XNNPACK-managed internal workspace.
   xnn_allocation_type_workspace,
   /// Non-static data that is external to the runtime, provided by caller, specified in xnn_setup_runtime.
   xnn_allocation_type_external,
 };

 struct xnn_blob {
   /// Size in bytes.
   size_t size;
   /// Data pointer.
   void* data;
   enum xnn_allocation_type allocation_type;
 };

 struct xnn_node;
 struct xnn_operator_data;

 typedef enum xnn_status (*xnn_create_operator_fn)(
   const struct xnn_node* node,
   const struct xnn_value* values,
   size_t num_values,
   struct xnn_operator_data* opdata,
   const struct xnn_caches* caches);

 typedef enum xnn_status (*xnn_setup_operator_fn)(
   const struct xnn_operator_data* opdata,
   const struct xnn_blob* blobs,
   size_t num_blobs,
   pthreadpool_t threadpool);

 enum xnn_compute_type {
   xnn_compute_type_invalid = 0,
   xnn_compute_type_fp32,
   xnn_compute_type_fp16,
   xnn_compute_type_qc8,
   xnn_compute_type_qs8,
   xnn_compute_type_qu8,
   xnn_compute_type_fp32_to_fp16,
   xnn_compute_type_fp32_to_qs8,
   xnn_compute_type_fp32_to_qu8,
   xnn_compute_type_fp16_to_fp32,
   xnn_compute_type_qs8_to_fp32,
   xnn_compute_type_qu8_to_fp32,
 };

 struct xnn_node {
   enum xnn_node_type type;
   uint32_t id;
   enum xnn_compute_type compute_type;
   /// Static parameters of the operator node.
   union {
     struct {
       uint32_t input_padding_top;
       uint32_t input_padding_right;
       uint32_t input_padding_bottom;
       uint32_t input_padding_left;
       uint32_t kernel_height;
       uint32_t kernel_width;
       uint32_t subsampling_height;
       uint32_t subsampling_width;
       uint32_t dilation_height;
       uint32_t dilation_width;
       uint32_t groups;
       size_t group_input_channels;
       size_t group_output_channels;
     } convolution_2d;
     struct {
       uint32_t padding_top;
       uint32_t padding_right;
       uint32_t padding_bottom;
       uint32_t padding_left;
       uint32_t adjustment_height;
       uint32_t adjustment_width;
       uint32_t kernel_height;
       uint32_t kernel_width;
       uint32_t upsampling_height;
       uint32_t upsampling_width;
       uint32_t dilation_height;
       uint32_t dilation_width;
       uint32_t groups;
       size_t group_input_channels;
       size_t group_output_channels;
     } deconvolution_2d;
     struct {
       uint32_t input_padding_top;
       uint32_t input_padding_right;
       uint32_t input_padding_bottom;
       uint32_t input_padding_left;
       uint32_t kernel_height;
       uint32_t kernel_width;
       uint32_t subsampling_height;
       uint32_t subsampling_width;
       uint32_t dilation_height;
       uint32_t dilation_width;
       uint32_t depth_multiplier;
       size_t input_channels;
     } depthwise_convolution_2d;
     struct {
       uint32_t block_size;
     } depth_to_space;
     struct {
       uint32_t padding_top;
       uint32_t padding_right;
       uint32_t padding_bottom;
       uint32_t padding_left;
       uint32_t pooling_height;
       uint32_t pooling_width;
       uint32_t stride_height;
       uint32_t stride_width;
       uint32_t dilation_height;
       uint32_t dilation_width;
     } pooling_2d;
     struct {
       float alpha;
     } elu;
     struct {
       float negative_slope;
     } leaky_relu;
     struct {
       size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
       size_t post_paddings[XNN_MAX_TENSOR_DIMS];
       uint32_t padding_value;
     } static_pad;
     struct {
       struct xnn_shape new_shape;
     } static_reshape;
     struct {
       size_t new_height;
       size_t new_width;
     } static_resize;
     struct {
       size_t axis;
     } concatenate;
     struct {
       size_t axis;
     } even_split;
     struct {
       size_t perm[XNN_MAX_TENSOR_DIMS];
       size_t num_dims;
     } transpose;
   } params;
   struct {
     float output_min;
     float output_max;
   } activation;
   /// Value IDs for node inputs.
   uint32_t inputs[XNN_MAX_INPUTS];
   uint32_t num_inputs;
   /// Value IDs for node outputs.
   uint32_t outputs[XNN_MAX_OUTPUTS];
   uint32_t num_outputs;
   uint32_t flags;
   uint32_t layout_flags;
   uint32_t cluster_leader;
   // Number of filter parameters in all 1x1 Convolutions of the sparse cluster.
   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
   size_t num_params;
   // Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster.
   // This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
   size_t num_zeroes;
   // Factory function to create an operator object from the node.
   xnn_create_operator_fn create;
   // Function to setup an operator using opdata.
   xnn_setup_operator_fn setup;
 };

 #ifdef __MACH__
 typedef uint64_t xnn_timestamp;
 #elif __EMSCRIPTEN__
 typedef double xnn_timestamp;
 #elif XNN_PLATFORM_WINDOWS
 typedef LARGE_INTEGER xnn_timestamp;
 #else
 typedef struct timespec xnn_timestamp;
 #endif

 struct xnn_operator_data {
   xnn_operator_t operator_objects[XNN_MAX_OPERATOR_OBJECTS];
   xnn_setup_operator_fn setup;
   size_t batch_size;
   size_t input_height;
   size_t input_width;
   size_t output_height;
   size_t output_width;
   struct xnn_shape shape1;
   struct xnn_shape shape2;
   size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
   size_t post_paddings[XNN_MAX_TENSOR_DIMS];
   uint32_t adjustment_height;
   uint32_t adjustment_width;
   uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
   uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
   xnn_timestamp end_ts[XNN_MAX_OPERATOR_OBJECTS];
 };

 struct xnn_subgraph {
   /// Number of Value IDs reserved for communication with external graph representation.
   /// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
   uint32_t external_value_ids;

   uint32_t num_reserved_values;
   uint32_t num_values;
   struct xnn_value* values;

   uint32_t num_reserved_nodes;
   uint32_t num_nodes;
   struct xnn_node* nodes;
 };

 /// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
 struct xnn_runtime {
   uint32_t num_external_values;

   /// List of operators in the execution plan, in execution order.
   struct xnn_operator_data* opdata;
   /// Number of operators in the execution plan.
   size_t num_ops;

   struct xnn_blob* blobs;
   size_t num_blobs;

   struct xnn_workspace* workspace;
   struct xnn_runtime* next_workspace_user;

 #if XNN_PLATFORM_JIT
   struct xnn_code_cache code_cache;
 #endif // XNN_PLATFORM_JIT

   pthreadpool_t threadpool;

   bool profiling;
   // The start timestamp of the first operator in the subgraph. This is set when profiling is true.
   xnn_timestamp start_ts;
 };

 struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);

 struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);

 void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes);

 size_t xnn_tensor_get_size(
   xnn_subgraph_t subgraph,
   uint32_t value_id);

 // Product of all shape dimensions
 size_t xnn_shape_multiply_all_dims(
   const struct xnn_shape shape[1]);

 // Product of all shape dimensions, except for the specified number of the last dimensions
 size_t xnn_shape_multiply_batch_dims(
   const struct xnn_shape shape[1], size_t num_nonbatch_dims);

 // Product of all shape dimensions, except for the last (channel) one
 size_t xnn_shape_multiply_non_channel_dims(
   const struct xnn_shape shape[1]);

 enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags);

 void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
 // Rewrites subgraph for FP16, returns true if success, false if rewrite failed.
 bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph);

 void xnn_node_clear(struct xnn_node* node);
 void xnn_value_clear(struct xnn_value* value);

 void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value);

 void xnn_init_convert_node(
   struct xnn_node* node,
   enum xnn_compute_type compute_type,
   uint32_t input_id,
   uint32_t output_id,
   uint32_t flags);

 struct xnn_workspace {
   void* data;
   size_t size;
   struct xnn_runtime* first_user;
   // Workspace will be destroyed in xnn_delete_runtime or xnn_delete_workspace if num_users reaches 0.
   size_t ref_count;
 };

 #ifdef __cplusplus
 }  // extern "C"
 #endif
	// Copyright 2020 Google LLC
	//
	// This source code is licensed under the BSD-style license found in the
	// LICENSE file in the root directory of this source tree.

	#pragma once

	#include <stddef.h>
	#include <stdint.h>

	#include <xnnpack.h>
	#include <xnnpack/common.h>
	#include <xnnpack/cache.h>
	#include <xnnpack/node-type.h>

	#if defined(EMSCRIPTEN)
	#include <emscripten/emscripten.h>
	#elif XNN_PLATFORM_WINDOWS
	#include <windows.h>
	#else
	#include <time.h>
	#endif

	#define XNN_MAX_INPUTS 4
	#define XNN_MAX_OUTPUTS 4

	#define XNN_MAX_RUNTIME_INPUTS 4
	#define XNN_MAX_RUNTIME_OUTPUTS 4

	#define XNN_INVALID_NODE_ID UINT32_MAX

	#define XNN_MAX_OPERATOR_OBJECTS 4

	/// Disable fusion of nodes in subgraph. Fusion is enabled by default, set this flag to turn it off.
	#define XNN_FLAG_NO_OPERATOR_FUSION 0x80000000

	#ifdef __cplusplus
	extern "C" {
	#endif

	struct xnn_shape {
	size_t num_dims;
	size_t dim[XNN_MAX_TENSOR_DIMS];
	};

	enum xnn_value_type {
	xnn_value_type_invalid = 0,
	xnn_value_type_dense_tensor = 1,
	};

	enum xnn_layout_type {
	xnn_layout_type_nhwc = 0,
	xnn_layout_type_nchw = 1,
	};

	/// Abstraction for a collections of elements produced and consumed by nodes.
	struct xnn_value {
	/// Unique ID for the value.
	uint32_t id;
	/// Type of the collection of elements.
	///
	/// Currently only dense tensors are supported.
	/// Other types (e.g. sparse tensors) might be supported in the future.
	enum xnn_value_type type;
	/// Type of elements in the collection.
	enum xnn_datatype datatype;
	/// Per-value quantization parameters.
	struct {
	/// Offset from zero of the quantized elements.
	int32_t zero_point;
	union {
	/// Multiplication factor to convert quantized elements to real representation.
	float scale;
	struct {
	/// Per-channel multiplication factor to convert quantized elements to real representation.
	const float* channelwise_scale;
	/// Index of the channel dimension with per-channel quantization parameters.
	size_t channel_dimension;
	};
	};
	} quantization;
	/// Tensor shape.
	struct xnn_shape shape;
	/// Binary features of the tensor. Supported values are any combination of:
	/// - XNN_VALUE_FLAG_EXTERNAL_INPUT
	/// - XNN_VALUE_FLAG_EXTERNAL_OUTPUT
	uint32_t flags;
	/// Static initialization data. Must be null for non-static values.
	const void* data;
	/// Index of the Subgraph node that produced the value, or XNN_INVALID_NODE_ID is the Value is an external input.
	uint32_t producer;
	/// Index of the first Node that consume the value, or XNN_INVALID_NODE_ID if the Value has no consumers within the
	/// graph (e.g. Value is an external output).
	uint32_t first_consumer;
	/// Number of Nodes that consume the value.
	/// If multiple inputs in a Node refer to this Value as input, the Node is counted as consumer multiple times.
	/// If the Value is an external output, it counts as having an extra consumer.
	uint32_t num_consumers;
	uint32_t num_nchw_compatible_consumers;
	enum xnn_layout_type layout;
	/// Set during analysis in xnn_subgraph_rewrite_for_fp16.
	/// Indicates that this value should be converted to FP16.
	bool fp16_compatible;
	/// Set during analysis in xnn_subgraph_rewrite_for_fp16.
	/// Indicates Value ID of the FP16 variant of this Value.
	uint32_t fp16_id;
	/// Set during analysis in xnn_subgraph_rewrite_for_fp16.
	/// Indicates Value ID of the FP32 variant of this Value.
	uint32_t fp32_id;
	};


	XNN_INLINE bool xnn_value_is_external(const struct xnn_value* value) {
	return (value->flags & (XNN_VALUE_FLAG_EXTERNAL_INPUT \| XNN_VALUE_FLAG_EXTERNAL_OUTPUT)) != 0;
	}

	XNN_INLINE bool xnn_value_is_external_output(const struct xnn_value* value) {
	return (value->flags & XNN_VALUE_FLAG_EXTERNAL_OUTPUT) != 0;
	}

	XNN_INLINE bool xnn_value_is_external_input(const struct xnn_value* value) {
	return (value->flags & XNN_VALUE_FLAG_EXTERNAL_INPUT) != 0;
	}

	enum xnn_allocation_type {
	xnn_allocation_type_invalid = 0,
	/// Static data that is provided by caller, needs to outlive the xnn_runtime.
	xnn_allocation_type_static,
	/// Lives in XNNPACK-managed internal workspace.
	xnn_allocation_type_workspace,
	/// Non-static data that is external to the runtime, provided by caller, specified in xnn_setup_runtime.
	xnn_allocation_type_external,
	};

	struct xnn_blob {
	/// Size in bytes.
	size_t size;
	/// Data pointer.
	void* data;
	enum xnn_allocation_type allocation_type;
	};

	struct xnn_node;
	struct xnn_operator_data;

	typedef enum xnn_status (*xnn_create_operator_fn)(
	const struct xnn_node* node,
	const struct xnn_value* values,
	size_t num_values,
	struct xnn_operator_data* opdata,
	const struct xnn_caches* caches);

	typedef enum xnn_status (*xnn_setup_operator_fn)(
	const struct xnn_operator_data* opdata,
	const struct xnn_blob* blobs,
	size_t num_blobs,
	pthreadpool_t threadpool);

	enum xnn_compute_type {
	xnn_compute_type_invalid = 0,
	xnn_compute_type_fp32,
	xnn_compute_type_fp16,
	xnn_compute_type_qc8,
	xnn_compute_type_qs8,
	xnn_compute_type_qu8,
	xnn_compute_type_fp32_to_fp16,
	xnn_compute_type_fp32_to_qs8,
	xnn_compute_type_fp32_to_qu8,
	xnn_compute_type_fp16_to_fp32,
	xnn_compute_type_qs8_to_fp32,
	xnn_compute_type_qu8_to_fp32,
	};

	struct xnn_node {
	enum xnn_node_type type;
	uint32_t id;
	enum xnn_compute_type compute_type;
	/// Static parameters of the operator node.
	union {
	struct {
	uint32_t input_padding_top;
	uint32_t input_padding_right;
	uint32_t input_padding_bottom;
	uint32_t input_padding_left;
	uint32_t kernel_height;
	uint32_t kernel_width;
	uint32_t subsampling_height;
	uint32_t subsampling_width;
	uint32_t dilation_height;
	uint32_t dilation_width;
	uint32_t groups;
	size_t group_input_channels;
	size_t group_output_channels;
	} convolution_2d;
	struct {
	uint32_t padding_top;
	uint32_t padding_right;
	uint32_t padding_bottom;
	uint32_t padding_left;
	uint32_t adjustment_height;
	uint32_t adjustment_width;
	uint32_t kernel_height;
	uint32_t kernel_width;
	uint32_t upsampling_height;
	uint32_t upsampling_width;
	uint32_t dilation_height;
	uint32_t dilation_width;
	uint32_t groups;
	size_t group_input_channels;
	size_t group_output_channels;
	} deconvolution_2d;
	struct {
	uint32_t input_padding_top;
	uint32_t input_padding_right;
	uint32_t input_padding_bottom;
	uint32_t input_padding_left;
	uint32_t kernel_height;
	uint32_t kernel_width;
	uint32_t subsampling_height;
	uint32_t subsampling_width;
	uint32_t dilation_height;
	uint32_t dilation_width;
	uint32_t depth_multiplier;
	size_t input_channels;
	} depthwise_convolution_2d;
	struct {
	uint32_t block_size;
	} depth_to_space;
	struct {
	uint32_t padding_top;
	uint32_t padding_right;
	uint32_t padding_bottom;
	uint32_t padding_left;
	uint32_t pooling_height;
	uint32_t pooling_width;
	uint32_t stride_height;
	uint32_t stride_width;
	uint32_t dilation_height;
	uint32_t dilation_width;
	} pooling_2d;
	struct {
	float alpha;
	} elu;
	struct {
	float negative_slope;
	} leaky_relu;
	struct {
	size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
	size_t post_paddings[XNN_MAX_TENSOR_DIMS];
	uint32_t padding_value;
	} static_pad;
	struct {
	struct xnn_shape new_shape;
	} static_reshape;
	struct {
	size_t new_height;
	size_t new_width;
	} static_resize;
	struct {
	size_t axis;
	} concatenate;
	struct {
	size_t axis;
	} even_split;
	struct {
	size_t perm[XNN_MAX_TENSOR_DIMS];
	size_t num_dims;
	} transpose;
	} params;
	struct {
	float output_min;
	float output_max;
	} activation;
	/// Value IDs for node inputs.
	uint32_t inputs[XNN_MAX_INPUTS];
	uint32_t num_inputs;
	/// Value IDs for node outputs.
	uint32_t outputs[XNN_MAX_OUTPUTS];
	uint32_t num_outputs;
	uint32_t flags;
	uint32_t layout_flags;
	uint32_t cluster_leader;
	// Number of filter parameters in all 1x1 Convolutions of the sparse cluster.
	// This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
	size_t num_params;
	// Number of zero filter parameters in all 1x1 Convolutions of the sparse cluster.
	// This value is properly initialized only in sparse inference analysis of 1x1 Convolutions.
	size_t num_zeroes;
	// Factory function to create an operator object from the node.
	xnn_create_operator_fn create;
	// Function to setup an operator using opdata.
	xnn_setup_operator_fn setup;
	};

	#ifdef __MACH__
	typedef uint64_t xnn_timestamp;
	#elif __EMSCRIPTEN__
	typedef double xnn_timestamp;
	#elif XNN_PLATFORM_WINDOWS
	typedef LARGE_INTEGER xnn_timestamp;
	#else
	typedef struct timespec xnn_timestamp;
	#endif

	struct xnn_operator_data {
	xnn_operator_t operator_objects[XNN_MAX_OPERATOR_OBJECTS];
	xnn_setup_operator_fn setup;
	size_t batch_size;
	size_t input_height;
	size_t input_width;
	size_t output_height;
	size_t output_width;
	struct xnn_shape shape1;
	struct xnn_shape shape2;
	size_t pre_paddings[XNN_MAX_TENSOR_DIMS];
	size_t post_paddings[XNN_MAX_TENSOR_DIMS];
	uint32_t adjustment_height;
	uint32_t adjustment_width;
	uint32_t inputs[XNN_MAX_RUNTIME_INPUTS];
	uint32_t outputs[XNN_MAX_RUNTIME_OUTPUTS];
	xnn_timestamp end_ts[XNN_MAX_OPERATOR_OBJECTS];
	};

	struct xnn_subgraph {
	/// Number of Value IDs reserved for communication with external graph representation.
	/// Values created during subgraph transformation avoid using IDs in [0, reserved_value_ids-1] range.
	uint32_t external_value_ids;

	uint32_t num_reserved_values;
	uint32_t num_values;
	struct xnn_value* values;

	uint32_t num_reserved_nodes;
	uint32_t num_nodes;
	struct xnn_node* nodes;
	};

	/// Runtime is a combination of an execution plan for subgraph Nodes and a memory manager for subgraph Values.
	struct xnn_runtime {
	uint32_t num_external_values;

	/// List of operators in the execution plan, in execution order.
	struct xnn_operator_data* opdata;
	/// Number of operators in the execution plan.
	size_t num_ops;

	struct xnn_blob* blobs;
	size_t num_blobs;

	struct xnn_workspace* workspace;
	struct xnn_runtime* next_workspace_user;

	#if XNN_PLATFORM_JIT
	struct xnn_code_cache code_cache;
	#endif // XNN_PLATFORM_JIT

	pthreadpool_t threadpool;

	bool profiling;
	// The start timestamp of the first operator in the subgraph. This is set when profiling is true.
	xnn_timestamp start_ts;
	};

	struct xnn_value* xnn_subgraph_new_internal_value(xnn_subgraph_t subgraph);

	struct xnn_node* xnn_subgraph_new_node(xnn_subgraph_t subgraph);

	void xnn_subgraph_add_nodes(xnn_subgraph_t subgraph, size_t num_nodes);

	size_t xnn_tensor_get_size(
	xnn_subgraph_t subgraph,
	uint32_t value_id);

	// Product of all shape dimensions
	size_t xnn_shape_multiply_all_dims(
	const struct xnn_shape shape[1]);

	// Product of all shape dimensions, except for the specified number of the last dimensions
	size_t xnn_shape_multiply_batch_dims(
	const struct xnn_shape shape[1], size_t num_nonbatch_dims);

	// Product of all shape dimensions, except for the last (channel) one
	size_t xnn_shape_multiply_non_channel_dims(
	const struct xnn_shape shape[1]);

	enum xnn_status xnn_subgraph_optimize(xnn_subgraph_t subgraph, uint32_t flags);

	void xnn_subgraph_rewrite_for_nchw(xnn_subgraph_t subgraph);
	// Rewrites subgraph for FP16, returns true if success, false if rewrite failed.
	bool xnn_subgraph_rewrite_for_fp16(xnn_subgraph_t subgraph);

	void xnn_node_clear(struct xnn_node* node);
	void xnn_value_clear(struct xnn_value* value);

	void xnn_value_copy(struct xnn_value* dst_value, const struct xnn_value* src_value);

	void xnn_init_convert_node(
	struct xnn_node* node,
	enum xnn_compute_type compute_type,
	uint32_t input_id,
	uint32_t output_id,
	uint32_t flags);

	struct xnn_workspace {
	void* data;
	size_t size;
	struct xnn_runtime* first_user;
	// Workspace will be destroyed in xnn_delete_runtime or xnn_delete_workspace if num_users reaches 0.
	size_t ref_count;
	};

	#ifdef __cplusplus
	} // extern "C"
	#endif