caffe2/python/examples/lmdb_create_example.py - platform/external/pytorch - Git at Google

 ## @package lmdb_create_example
 # Module caffe2.python.examples.lmdb_create_example


 import argparse
 import numpy as np

 import lmdb
 from caffe2.proto import caffe2_pb2
 from caffe2.python import workspace, model_helper

 '''
 Simple example to create an lmdb database of random image data and labels.
 This can be used a skeleton to write your own data import.

 It also runs a dummy-model with Caffe2 that reads the data and
 validates the checksum is same.
 '''


 def create_db(output_file):
     print(">>> Write database...")
     LMDB_MAP_SIZE = 1 << 40   # MODIFY
     env = lmdb.open(output_file, map_size=LMDB_MAP_SIZE)

     checksum = 0
     with env.begin(write=True) as txn:
         for j in range(0, 128):
             # MODIFY: add your own data reader / creator
             label = j % 10
             width = 64
             height = 32

             img_data = np.random.rand(3, width, height)
             # ...

             # Create TensorProtos
             tensor_protos = caffe2_pb2.TensorProtos()
             img_tensor = tensor_protos.protos.add()
             img_tensor.dims.extend(img_data.shape)
             img_tensor.data_type = 1

             flatten_img = img_data.reshape(np.prod(img_data.shape))
             img_tensor.float_data.extend(flatten_img)

             label_tensor = tensor_protos.protos.add()
             label_tensor.data_type = 2
             label_tensor.int32_data.append(label)
             txn.put(
                 '{}'.format(j).encode('ascii'),
                 tensor_protos.SerializeToString()
             )

             checksum += np.sum(img_data) * label
             if (j % 16 == 0):
                 print("Inserted {} rows".format(j))

     print("Checksum/write: {}".format(int(checksum)))
     return checksum


 def read_db_with_caffe2(db_file, expected_checksum):
     print(">>> Read database...")
     model = model_helper.ModelHelper(name="lmdbtest")
     batch_size = 32
     data, label = model.TensorProtosDBInput(
         [], ["data", "label"], batch_size=batch_size,
         db=db_file, db_type="lmdb")

     checksum = 0

     workspace.RunNetOnce(model.param_init_net)
     workspace.CreateNet(model.net)

     for _ in range(0, 4):
         workspace.RunNet(model.net.Proto().name)

         img_datas = workspace.FetchBlob("data")
         labels = workspace.FetchBlob("label")
         for j in range(batch_size):
             checksum += np.sum(img_datas[j, :]) * labels[j]

     print("Checksum/read: {}".format(int(checksum)))
     assert np.abs(expected_checksum - checksum < 0.1), \
         "Read/write checksums dont match"


 def main():
     parser = argparse.ArgumentParser(
         description="Example LMDB creation"
     )
     parser.add_argument("--output_file", type=str, default=None,
                         help="Path to write the database to",
                         required=True)

     args = parser.parse_args()
     checksum = create_db(args.output_file)

     # For testing reading:
     read_db_with_caffe2(args.output_file, checksum)


 if __name__ == '__main__':
     main()
	## @package lmdb_create_example
	# Module caffe2.python.examples.lmdb_create_example





	import argparse
	import numpy as np

	import lmdb
	from caffe2.proto import caffe2_pb2
	from caffe2.python import workspace, model_helper

	'''
	Simple example to create an lmdb database of random image data and labels.
	This can be used a skeleton to write your own data import.

	It also runs a dummy-model with Caffe2 that reads the data and
	validates the checksum is same.
	'''


	def create_db(output_file):
	print(">>> Write database...")
	LMDB_MAP_SIZE = 1 << 40 # MODIFY
	env = lmdb.open(output_file, map_size=LMDB_MAP_SIZE)

	checksum = 0
	with env.begin(write=True) as txn:
	for j in range(0, 128):
	# MODIFY: add your own data reader / creator
	label = j % 10
	width = 64
	height = 32

	img_data = np.random.rand(3, width, height)
	# ...

	# Create TensorProtos
	tensor_protos = caffe2_pb2.TensorProtos()
	img_tensor = tensor_protos.protos.add()
	img_tensor.dims.extend(img_data.shape)
	img_tensor.data_type = 1

	flatten_img = img_data.reshape(np.prod(img_data.shape))
	img_tensor.float_data.extend(flatten_img)

	label_tensor = tensor_protos.protos.add()
	label_tensor.data_type = 2
	label_tensor.int32_data.append(label)
	txn.put(
	'{}'.format(j).encode('ascii'),
	tensor_protos.SerializeToString()
	)

	checksum += np.sum(img_data) * label
	if (j % 16 == 0):
	print("Inserted {} rows".format(j))

	print("Checksum/write: {}".format(int(checksum)))
	return checksum


	def read_db_with_caffe2(db_file, expected_checksum):
	print(">>> Read database...")
	model = model_helper.ModelHelper(name="lmdbtest")
	batch_size = 32
	data, label = model.TensorProtosDBInput(
	[], ["data", "label"], batch_size=batch_size,
	db=db_file, db_type="lmdb")

	checksum = 0

	workspace.RunNetOnce(model.param_init_net)
	workspace.CreateNet(model.net)

	for _ in range(0, 4):
	workspace.RunNet(model.net.Proto().name)

	img_datas = workspace.FetchBlob("data")
	labels = workspace.FetchBlob("label")
	for j in range(batch_size):
	checksum += np.sum(img_datas[j, :]) * labels[j]

	print("Checksum/read: {}".format(int(checksum)))
	assert np.abs(expected_checksum - checksum < 0.1), \
	"Read/write checksums dont match"


	def main():
	parser = argparse.ArgumentParser(
	description="Example LMDB creation"
	)
	parser.add_argument("--output_file", type=str, default=None,
	help="Path to write the database to",
	required=True)

	args = parser.parse_args()
	checksum = create_db(args.output_file)

	# For testing reading:
	read_db_with_caffe2(args.output_file, checksum)


	if __name__ == '__main__':
	main()