| syntax = "proto2"; |
| |
| package caffe2; |
| |
| // Hierarchical Softmax protobuffer convention: |
| // The HSM operator requires a hierarchy of vocabulary words in the form of a |
| // tree from the user. This tree is expressed using the proto format. |
| // TreeProto points to the root NodeProto which can recursively contain children |
| // NodeProtos (internal nodes) or word_ids (leaf nodes). |
| |
| // The aforementioned TreeProto is internally translated into a list of word_ids |
| // tagged with a list of NodeProtos that lie in the path from the root to that |
| // word_id using hsm_util.create_hierarchy(tree_proto). |
| // Specifically, HierarchyProto contains a list of PathProtos. Each PathProto |
| // belongs to a word_id and contains a list of PathNodeProtos. Each |
| // PathNodeProto contains information about the number of children the node has |
| // (length), the index of the child node that lies in the path from root to |
| // word_id (target) and a cumulative sum of children nodes (index; this acts as |
| // the weight parameter matrix offset). |
| |
| // Each node in the hierarchy contains links to either leaf nodes or more |
| // non-terminal nodes |
| message NodeProto { |
| // Links to non-terminal children nodes |
| repeated NodeProto children = 1; |
| // Links to terminal (leaf) nodes |
| repeated int32 word_ids = 2; |
| optional int32 offset = 3; |
| optional string name = 4; |
| repeated float scores = 5; |
| } |
| |
| // Protobuf format to accept hierarchy for hierarchical softmax operator. |
| // TreeProto points to the root node. |
| message TreeProto { |
| optional NodeProto root_node = 1; |
| } |
| |
| // Internal Protobuf format which represents the path in the tree hierarchy for |
| // each word in the vocabulary. |
| message HierarchyProto { |
| optional int32 size = 1; |
| repeated PathProto paths = 2; |
| } |
| |
| // Each PathProto belongs to a word and is an array of nodes in the |
| // path from the root to the leaf (which is the word itself) in the tree. |
| message PathProto { |
| optional int32 word_id = 1; |
| repeated PathNodeProto path_nodes = 2; |
| } |
| |
| // Represents a node in the path from the root node all the way down to the |
| // word (leaf). |
| message PathNodeProto { |
| // Parameter matrix offset for this node |
| optional int32 index = 1; |
| // Number of children |
| optional int32 length = 2; |
| // Index of the next node in the path |
| optional int32 target = 3; |
| } |