torch/csrc/jit/serialization/pickler.h - platform/external/pytorch - Git at Google

 #pragma once

 #include <ATen/core/qualified_name.h>
 #include <string>
 #include <utility>
 #include <vector>

 #include <ATen/Utils.h>
 #include <ATen/core/ivalue.h>
 #include <ATen/core/jit_type.h>
 #include <c10/util/ArrayRef.h>
 #include <c10/util/FbcodeMaps.h>
 #include <c10/util/string_view.h>
 #include <torch/csrc/Export.h>

 namespace torch {
 namespace jit {

 // See Python's pickletools.py for a detailed description of each of these codes
 enum class PickleOpCode : char {
   MARK = '(',
   STOP = '.',
   POP = '0',
   POP_MARK = '1',
   DUP = '2',
   FLOAT = 'F',
   INT = 'I',
   BININT = 'J',
   BININT1 = 'K',
   LONG = 'L',
   BININT2 = 'M',
   NONE = 'N',
   PERSID = 'P',
   BINPERSID = 'Q',
   REDUCE = 'R',
   STRING = 'S',
   BINSTRING = 'T',
   SHORT_BINSTRING = 'U',
   // NB: Avoid using UNICODE as it is a macro in the Windows API
   UNICODE_ = 'V',
   BINUNICODE = 'X',
   APPEND = 'a',
   BUILD = 'b',
   GLOBAL = 'c',
   DICT = 'd',
   EMPTY_DICT = '}',
   APPENDS = 'e',
   GET = 'g',
   BINGET = 'h',
   INST = 'i',
   LONG_BINGET = 'j',
   LIST = 'l',
   EMPTY_LIST = ']',
   OBJ = 'o',
   PUT = 'p',
   BINPUT = 'q',
   LONG_BINPUT = 'r',
   SETITEM = 's',
   TUPLE = 't',
   EMPTY_TUPLE = ')',
   SETITEMS = 'u',
   BINFLOAT = 'G',

   // Protocol 2
   PROTO = char('\x80'),
   NEWOBJ = '\x81',
   EXT1 = '\x82',
   EXT2 = '\x83',
   EXT4 = '\x84',
   TUPLE1 = '\x85',
   TUPLE2 = '\x86',
   TUPLE3 = '\x87',
   NEWTRUE = '\x88',
   NEWFALSE = '\x89',
   LONG1 = '\x8a',
   LONG4 = '\x8b',

   // Protocol 3 (Python 3.x)
   BINBYTES = 'B',
   SHORT_BINBYTES = 'C',

   // Protocol 4
   SHORT_BINUNICODE = char('\x8c'),
   BINUNICODE8 = '\x8d',
   BINBYTES8 = '\x8e',
   EMPTY_SET = '\x8f',
   ADDITEMS = '\x90',
   FROZENSET = '\x91',
   NEWOBJ_EX = '\x92',
   STACK_GLOBAL = '\x93',
   MEMOIZE = '\x94',
   FRAME = '\x95'
 };

 using ::c10::IValue;

 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
 struct WriteableTensorData {
   const char* data() const {
     return static_cast<const char*>(tensor_.storage().data());
   }
   size_t sizeInBytes() const {
     return size_;
   }
   size_t nbytes() const {
     return tensor_.storage().nbytes();
   }
   bool storageHasDeleter() const {
     return tensor_.storage().data_ptr().get_context() != nullptr;
   }

  private:
   friend TORCH_API WriteableTensorData
   getWriteableTensorData(const at::Tensor& tensor, bool to_cpu);
   at::Tensor tensor_;
   uint64_t size_;
 };

 void setTypeTags(bool state);
 bool getTypeTags();

 class TORCH_API Pickler {
   AT_DISALLOW_COPY_AND_ASSIGN(Pickler);

  public:
   Pickler(std::function<void(const char*, size_t)> writer)
       : Pickler(std::move(writer), nullptr, nullptr, nullptr) {}

   // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
   Pickler(
       std::function<void(const char*, size_t)> writer,
       std::vector<at::Tensor>* tensor_table,
       std::function<c10::QualifiedName(const c10::ClassTypePtr&)> type_renamer,
       std::vector<c10::ClassTypePtr>* memoized_class_types,
       std::function<std::string(const at::Tensor&)> get_tensor_id = nullptr,
       bool tag_aggregates = true)
       : writer_(std::move(writer)),
         tensor_table_(tensor_table),
         type_renamer_(std::move(type_renamer)),
         memoized_class_types_(memoized_class_types),
         get_tensor_id_(std::move(get_tensor_id)),
         tag_aggregates_(tag_aggregates) {}
   // NOLINTNEXTLINE(bugprone-exception-escape)
   ~Pickler();

   // Push protocol onto the stack
   void protocol();

   // Push STOP PickleOpCode onto the stack
   void stop();

   void pushIValue(const IValue& ivalue);

   void startTuple();
   void endTuple();

   const std::vector<at::Tensor>& tensorData() {
     return tensor_data_;
   }

   void pushEmptyDict();
   void pushDict(const IValue& ivalue);
   void pushInt(int64_t value);
   void pushLong(const std::string& data);

  private:
   void pushIValueImpl(const IValue& ivalue);
   void startTypeTag();
   void endTypeTag(const IValue& value);
   void pushBool(bool value);
   void pushDouble(double value);
   void pushComplexDouble(const IValue& value);
   void pushGenericList(const IValue& ivalue);
   void pushIntList(const IValue& ivalue);
   void pushList(const IValue& ivalue);
   void pushTensor(const IValue& ivalue);
   void pushTensorReference(const IValue& ivalue);
   void pushLiteralTensor(const IValue& ivalue);
   void pushLiteralSparseTensor(const at::Tensor& tensor);
   void pushTuple(const IValue& ivalue);
   void pushString(const std::string& string);
   void pushDevice(const IValue& ivalue);
 #ifdef USE_DISTRIBUTED
   void pushRRef(const IValue& ivalue);
 #endif
   // unmemoized version
   void pushStringImpl(const std::string& string);
   void pushStorageOfTensor(const at::Tensor& tensor);

   void pushBinGet(uint32_t memo_id);
   void pushSpecializedList(
       const IValue& ivalue,
       const char* list_name,
       const std::function<void(const IValue&)>& item_pusher);
   void pushGlobal(c10::string_view module_name, c10::string_view class_name);
   // raw string data is appended directly to the byte stream
   void pushBytes(const std::string& string);
   void pushTensorData(const at::Tensor& tensor);

   // Add a BINPUT op and return the memoization id used
   size_t pushNextBinPut();

   const void* getPointer(const IValue& ivalue);

   // Caller checks that bufferPos_ > 0
   void flushNonEmpty() {
     writer_(buffer_.data(), bufferPos_);
     bufferPos_ = 0;
   }

   void flush() {
     if (bufferPos_ != 0) {
       flushNonEmpty();
     }
   }

   // These convert values to bytes and add them to the stack (NB: since T is to
   // the left of a '::', its type cannot be deduced by the compiler so one must
   // explicitly instantiate the template, i.e. push<int>(int) works, push(int)
   // does not)
   static CONSTEXPR_EXCEPT_WIN_CUDA size_t kBufferSize = 256;
   template <typename T>
   void push(typename std::common_type<T>::type value) {
     const char* begin = reinterpret_cast<const char*>(&value);
     if (bufferPos_ + sizeof(T) > buffer_.size()) {
       flushNonEmpty();
     }
     static_assert(sizeof(T) <= kBufferSize, "Buffer size assumption");
     memcpy(buffer_.data() + bufferPos_, begin, sizeof(T));
     bufferPos_ += sizeof(T);
   }

   // Stream to write binary data to
   // Code shouldn't call writer_ directly without first flush()ing.
   std::function<void(const char*, size_t)> writer_;

   // Buffer to avoid calling a writer_ on a per-byte basis.
   std::array<char, kBufferSize> buffer_;
   size_t bufferPos_{0};

   // Stack of opcodes/data
   std::vector<char> stack_;

   // External table of tensors to serialize. If this is missing, then tensors
   // are serialized directly into the pickle
   std::vector<at::Tensor>* tensor_table_;

   // TODO: only use this if necessary (add a pass to find all shared ivalues,
   // and only memoize those)
   uint32_t memo_id_ = 0;

   // Memoization of IValues that have been written (index in table is used for
   // BINPUT opcodes) to enable shared references
   c10::FastMap<const void*, uint32_t> memoized_ivalue_map_;

   // because we de-dup ivalues based on their raw pointer address in the above
   // map we need to keep all the memoized values alive during the pickle.
   // Otherwise, it is possible that a raw address gets reused for another
   // object, and we will alias it to the old object at that address.
   std::vector<IValue> memoized_ivalues_;

   std::function<c10::QualifiedName(const c10::ClassTypePtr&)> type_renamer_;

   // List of all the types that it wrote, inspect from the IValues it wrote.
   std::vector<c10::ClassTypePtr>* memoized_class_types_;

   // Function to grab next id_name for tensor storage, function is responsible
   // for returning unique ids
   std::function<std::string(const at::Tensor&)> get_tensor_id_;

   // List of tensor storages to serialize in the same binary as the pickle data
   // similar to ivalues, they are memoized using BINPUT
   std::vector<at::Tensor> tensor_data_;
   c10::FastMap<const void*, uint32_t> memoized_storage_map_;

   c10::FastMap<std::string, uint32_t> memoized_globals_map_;
   c10::FastMap<std::string, uint32_t> memoized_strings_map_;
   c10::FastMap<std::string, uint32_t> memoized_devices_map_;
   // when true, List and Dict objects will be wrapped in a
   // torch.jit._pickle.restore_type_tag call to correctly set the dynamic
   // TorchScript type for the object. When true the thing unpickling must have
   // torch installed.
   bool tag_aggregates_;
 };

 // returns a (tensor, record_size) for a tensor, converting it to a CPU tensor
 // if it was CUDA and to_cpu is True.
 TORCH_API WriteableTensorData
 getWriteableTensorData(const at::Tensor& tensor, bool to_cpu = true);

 // return the value of the tensor's storage pointer
 uint64_t getStorageKey(const at::Tensor& tensor);

 // if the cls has __getstate__/__setstate__
 // assert they have the right schema and return true,
 // otherwise return false
 bool checkHasValidSetGetState(const std::shared_ptr<c10::ClassType>& cls);

 // Return a map of Tensor Metadata for serialization.
 // For now, it only takes care of `conj` and `neg` bit.
 inline std::unordered_map<std::string, bool> getTensorMetadata(
     const at::Tensor& t) {
   // We don't support serializing `ZeroTensor` as it is not public
   // facing yet.
   TORCH_CHECK(
       !t._is_zerotensor(),
       "ZeroTensor is not serializable,",
       " please file an issue if required.");
   std::unordered_map<std::string, bool> metadata{};

   // Only add meta-data if the value is not default.
   if (t.is_conj()) {
     metadata["conj"] = true;
   }
   if (t.is_neg()) {
     metadata["neg"] = true;
   }
   return metadata;
 }

 // set Tensor Metadata based on the map.
 // Refer: getTensorMathdata
 inline void setTensorMetadata(
     const at::Tensor& t,
     std::unordered_map<std::string, bool> metadata) {
   for (auto& key_value_pair : metadata) {
     if (key_value_pair.first == "conj") {
       t._set_conj(true);
     } else if (key_value_pair.first == "neg") {
       t._set_neg(true);
     } else {
       TORCH_CHECK(
           false,
           "Unexpected key `",
           key_value_pair.first,
           "` passed to setTensorMetadata.");
     }
   }
 }

 // set Tensor metadata based on the map.
 // NOTE: This overload is required by unpickler.cpp
 inline void setTensorMetadata(
     const at::Tensor& t,
     c10::Dict<c10::IValue, c10::IValue> metadata_idict) {
   std::unordered_map<std::string, bool> metadata;
   for (auto& pair : metadata_idict) {
     auto key = *pair.key().toString();
     metadata[key] = pair.value().toBool();
   }
   setTensorMetadata(t, std::move(metadata));
 }

 } // namespace jit
 } // namespace torch
	#pragma once

	#include <ATen/core/qualified_name.h>
	#include <string>
	#include <utility>
	#include <vector>

	#include <ATen/Utils.h>
	#include <ATen/core/ivalue.h>
	#include <ATen/core/jit_type.h>
	#include <c10/util/ArrayRef.h>
	#include <c10/util/FbcodeMaps.h>
	#include <c10/util/string_view.h>
	#include <torch/csrc/Export.h>

	namespace torch {
	namespace jit {

	// See Python's pickletools.py for a detailed description of each of these codes
	enum class PickleOpCode : char {
	MARK = '(',
	STOP = '.',
	POP = '0',
	POP_MARK = '1',
	DUP = '2',
	FLOAT = 'F',
	INT = 'I',
	BININT = 'J',
	BININT1 = 'K',
	LONG = 'L',
	BININT2 = 'M',
	NONE = 'N',
	PERSID = 'P',
	BINPERSID = 'Q',
	REDUCE = 'R',
	STRING = 'S',
	BINSTRING = 'T',
	SHORT_BINSTRING = 'U',
	// NB: Avoid using UNICODE as it is a macro in the Windows API
	UNICODE_ = 'V',
	BINUNICODE = 'X',
	APPEND = 'a',
	BUILD = 'b',
	GLOBAL = 'c',
	DICT = 'd',
	EMPTY_DICT = '}',
	APPENDS = 'e',
	GET = 'g',
	BINGET = 'h',
	INST = 'i',
	LONG_BINGET = 'j',
	LIST = 'l',
	EMPTY_LIST = ']',
	OBJ = 'o',
	PUT = 'p',
	BINPUT = 'q',
	LONG_BINPUT = 'r',
	SETITEM = 's',
	TUPLE = 't',
	EMPTY_TUPLE = ')',
	SETITEMS = 'u',
	BINFLOAT = 'G',

	// Protocol 2
	PROTO = char('\x80'),
	NEWOBJ = '\x81',
	EXT1 = '\x82',
	EXT2 = '\x83',
	EXT4 = '\x84',
	TUPLE1 = '\x85',
	TUPLE2 = '\x86',
	TUPLE3 = '\x87',
	NEWTRUE = '\x88',
	NEWFALSE = '\x89',
	LONG1 = '\x8a',
	LONG4 = '\x8b',

	// Protocol 3 (Python 3.x)
	BINBYTES = 'B',
	SHORT_BINBYTES = 'C',

	// Protocol 4
	SHORT_BINUNICODE = char('\x8c'),
	BINUNICODE8 = '\x8d',
	BINBYTES8 = '\x8e',
	EMPTY_SET = '\x8f',
	ADDITEMS = '\x90',
	FROZENSET = '\x91',
	NEWOBJ_EX = '\x92',
	STACK_GLOBAL = '\x93',
	MEMOIZE = '\x94',
	FRAME = '\x95'
	};

	using ::c10::IValue;

	// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
	struct WriteableTensorData {
	const char* data() const {
	return static_cast<const char*>(tensor_.storage().data());
	}
	size_t sizeInBytes() const {
	return size_;
	}
	size_t nbytes() const {
	return tensor_.storage().nbytes();
	}
	bool storageHasDeleter() const {
	return tensor_.storage().data_ptr().get_context() != nullptr;
	}

	private:
	friend TORCH_API WriteableTensorData
	getWriteableTensorData(const at::Tensor& tensor, bool to_cpu);
	at::Tensor tensor_;
	uint64_t size_;
	};

	void setTypeTags(bool state);
	bool getTypeTags();

	class TORCH_API Pickler {
	AT_DISALLOW_COPY_AND_ASSIGN(Pickler);

	public:
	Pickler(std::function<void(const char*, size_t)> writer)
	: Pickler(std::move(writer), nullptr, nullptr, nullptr) {}

	// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
	Pickler(
	std::function<void(const char*, size_t)> writer,
	std::vector<at::Tensor>* tensor_table,
	std::function<c10::QualifiedName(const c10::ClassTypePtr&)> type_renamer,
	std::vector<c10::ClassTypePtr>* memoized_class_types,
	std::function<std::string(const at::Tensor&)> get_tensor_id = nullptr,
	bool tag_aggregates = true)
	: writer_(std::move(writer)),
	tensor_table_(tensor_table),
	type_renamer_(std::move(type_renamer)),
	memoized_class_types_(memoized_class_types),
	get_tensor_id_(std::move(get_tensor_id)),
	tag_aggregates_(tag_aggregates) {}
	// NOLINTNEXTLINE(bugprone-exception-escape)
	~Pickler();

	// Push protocol onto the stack
	void protocol();

	// Push STOP PickleOpCode onto the stack
	void stop();

	void pushIValue(const IValue& ivalue);

	void startTuple();
	void endTuple();

	const std::vector<at::Tensor>& tensorData() {
	return tensor_data_;
	}

	void pushEmptyDict();
	void pushDict(const IValue& ivalue);
	void pushInt(int64_t value);
	void pushLong(const std::string& data);

	private:
	void pushIValueImpl(const IValue& ivalue);
	void startTypeTag();
	void endTypeTag(const IValue& value);
	void pushBool(bool value);
	void pushDouble(double value);
	void pushComplexDouble(const IValue& value);
	void pushGenericList(const IValue& ivalue);
	void pushIntList(const IValue& ivalue);
	void pushList(const IValue& ivalue);
	void pushTensor(const IValue& ivalue);
	void pushTensorReference(const IValue& ivalue);
	void pushLiteralTensor(const IValue& ivalue);
	void pushLiteralSparseTensor(const at::Tensor& tensor);
	void pushTuple(const IValue& ivalue);
	void pushString(const std::string& string);
	void pushDevice(const IValue& ivalue);
	#ifdef USE_DISTRIBUTED
	void pushRRef(const IValue& ivalue);
	#endif
	// unmemoized version
	void pushStringImpl(const std::string& string);
	void pushStorageOfTensor(const at::Tensor& tensor);

	void pushBinGet(uint32_t memo_id);
	void pushSpecializedList(
	const IValue& ivalue,
	const char* list_name,
	const std::function<void(const IValue&)>& item_pusher);
	void pushGlobal(c10::string_view module_name, c10::string_view class_name);
	// raw string data is appended directly to the byte stream
	void pushBytes(const std::string& string);
	void pushTensorData(const at::Tensor& tensor);

	// Add a BINPUT op and return the memoization id used
	size_t pushNextBinPut();

	const void* getPointer(const IValue& ivalue);

	// Caller checks that bufferPos_ > 0
	void flushNonEmpty() {
	writer_(buffer_.data(), bufferPos_);
	bufferPos_ = 0;
	}

	void flush() {
	if (bufferPos_ != 0) {
	flushNonEmpty();
	}
	}

	// These convert values to bytes and add them to the stack (NB: since T is to
	// the left of a '::', its type cannot be deduced by the compiler so one must
	// explicitly instantiate the template, i.e. push<int>(int) works, push(int)
	// does not)
	static CONSTEXPR_EXCEPT_WIN_CUDA size_t kBufferSize = 256;
	template <typename T>
	void push(typename std::common_type<T>::type value) {
	const char* begin = reinterpret_cast<const char*>(&value);
	if (bufferPos_ + sizeof(T) > buffer_.size()) {
	flushNonEmpty();
	}
	static_assert(sizeof(T) <= kBufferSize, "Buffer size assumption");
	memcpy(buffer_.data() + bufferPos_, begin, sizeof(T));
	bufferPos_ += sizeof(T);
	}

	// Stream to write binary data to
	// Code shouldn't call writer_ directly without first flush()ing.
	std::function<void(const char*, size_t)> writer_;

	// Buffer to avoid calling a writer_ on a per-byte basis.
	std::array<char, kBufferSize> buffer_;
	size_t bufferPos_{0};

	// Stack of opcodes/data
	std::vector<char> stack_;

	// External table of tensors to serialize. If this is missing, then tensors
	// are serialized directly into the pickle
	std::vector<at::Tensor>* tensor_table_;

	// TODO: only use this if necessary (add a pass to find all shared ivalues,
	// and only memoize those)
	uint32_t memo_id_ = 0;

	// Memoization of IValues that have been written (index in table is used for
	// BINPUT opcodes) to enable shared references
	c10::FastMap<const void*, uint32_t> memoized_ivalue_map_;

	// because we de-dup ivalues based on their raw pointer address in the above
	// map we need to keep all the memoized values alive during the pickle.
	// Otherwise, it is possible that a raw address gets reused for another
	// object, and we will alias it to the old object at that address.
	std::vector<IValue> memoized_ivalues_;

	std::function<c10::QualifiedName(const c10::ClassTypePtr&)> type_renamer_;

	// List of all the types that it wrote, inspect from the IValues it wrote.
	std::vector<c10::ClassTypePtr>* memoized_class_types_;

	// Function to grab next id_name for tensor storage, function is responsible
	// for returning unique ids
	std::function<std::string(const at::Tensor&)> get_tensor_id_;

	// List of tensor storages to serialize in the same binary as the pickle data
	// similar to ivalues, they are memoized using BINPUT
	std::vector<at::Tensor> tensor_data_;
	c10::FastMap<const void*, uint32_t> memoized_storage_map_;

	c10::FastMap<std::string, uint32_t> memoized_globals_map_;
	c10::FastMap<std::string, uint32_t> memoized_strings_map_;
	c10::FastMap<std::string, uint32_t> memoized_devices_map_;
	// when true, List and Dict objects will be wrapped in a
	// torch.jit._pickle.restore_type_tag call to correctly set the dynamic
	// TorchScript type for the object. When true the thing unpickling must have
	// torch installed.
	bool tag_aggregates_;
	};

	// returns a (tensor, record_size) for a tensor, converting it to a CPU tensor
	// if it was CUDA and to_cpu is True.
	TORCH_API WriteableTensorData
	getWriteableTensorData(const at::Tensor& tensor, bool to_cpu = true);

	// return the value of the tensor's storage pointer
	uint64_t getStorageKey(const at::Tensor& tensor);

	// if the cls has __getstate__/__setstate__
	// assert they have the right schema and return true,
	// otherwise return false
	bool checkHasValidSetGetState(const std::shared_ptr<c10::ClassType>& cls);

	// Return a map of Tensor Metadata for serialization.
	// For now, it only takes care of `conj` and `neg` bit.
	inline std::unordered_map<std::string, bool> getTensorMetadata(
	const at::Tensor& t) {
	// We don't support serializing `ZeroTensor` as it is not public
	// facing yet.
	TORCH_CHECK(
	!t._is_zerotensor(),
	"ZeroTensor is not serializable,",
	" please file an issue if required.");
	std::unordered_map<std::string, bool> metadata{};

	// Only add meta-data if the value is not default.
	if (t.is_conj()) {
	metadata["conj"] = true;
	}
	if (t.is_neg()) {
	metadata["neg"] = true;
	}
	return metadata;
	}

	// set Tensor Metadata based on the map.
	// Refer: getTensorMathdata
	inline void setTensorMetadata(
	const at::Tensor& t,
	std::unordered_map<std::string, bool> metadata) {
	for (auto& key_value_pair : metadata) {
	if (key_value_pair.first == "conj") {
	t._set_conj(true);
	} else if (key_value_pair.first == "neg") {
	t._set_neg(true);
	} else {
	TORCH_CHECK(
	false,
	"Unexpected key `",
	key_value_pair.first,
	"` passed to setTensorMetadata.");
	}
	}
	}

	// set Tensor metadata based on the map.
	// NOTE: This overload is required by unpickler.cpp
	inline void setTensorMetadata(
	const at::Tensor& t,
	c10::Dict<c10::IValue, c10::IValue> metadata_idict) {
	std::unordered_map<std::string, bool> metadata;
	for (auto& pair : metadata_idict) {
	auto key = *pair.key().toString();
	metadata[key] = pair.value().toBool();
	}
	setTensorMetadata(t, std::move(metadata));
	}

	} // namespace jit
	} // namespace torch