| // fst.cc |
| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Copyright 2005-2010 Google, Inc. |
| // Author: [email protected] (Michael Riley) |
| // |
| // \file |
| // FST definitions. |
| |
| #include <fst/fst.h> |
| |
| // Include these so they are registered |
| #include <fst/compact-fst.h> |
| #include <fst/const-fst.h> |
| #include <fst/matcher-fst.h> |
| #include <fst/vector-fst.h> |
| #include <fst/edit-fst.h> |
| |
| // FST flag definitions |
| |
| DEFINE_bool(fst_verify_properties, false, |
| "Verify fst properties queried by TestProperties"); |
| |
| DEFINE_string(fst_weight_separator, ",", |
| "Character separator between printed composite weights; " |
| "must be a single character"); |
| |
| DEFINE_string(fst_weight_parentheses, "", |
| "Characters enclosing the first weight of a printed composite " |
| "weight (e.g. pair weight, tuple weight and derived classes) to " |
| "ensure proper I/O of nested composite weights; " |
| "must have size 0 (none) or 2 (open and close parenthesis)"); |
| |
| DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache"); |
| |
| DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL, |
| "Cache byte size that triggers garbage collection"); |
| |
| DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate"); |
| |
| DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file"); |
| DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file"); |
| |
| DEFINE_string(fst_read_mode, "read", |
| "Default file reading mode for mappable files"); |
| |
| namespace fst { |
| |
| // Register VectorFst, ConstFst and EditFst for common arcs types |
| REGISTER_FST(VectorFst, StdArc); |
| REGISTER_FST(VectorFst, LogArc); |
| REGISTER_FST(VectorFst, Log64Arc); |
| REGISTER_FST(ConstFst, StdArc); |
| REGISTER_FST(ConstFst, LogArc); |
| REGISTER_FST(ConstFst, Log64Arc); |
| REGISTER_FST(EditFst, StdArc); |
| REGISTER_FST(EditFst, LogArc); |
| REGISTER_FST(EditFst, Log64Arc); |
| |
| // Register CompactFst for common arcs with the default (uint32) size type |
| static FstRegisterer< |
| CompactFst<StdArc, StringCompactor<StdArc> > > |
| CompactFst_StdArc_StringCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<LogArc, StringCompactor<LogArc> > > |
| CompactFst_LogArc_StringCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<StdArc, WeightedStringCompactor<StdArc> > > |
| CompactFst_StdArc_WeightedStringCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<LogArc, WeightedStringCompactor<LogArc> > > |
| CompactFst_LogArc_WeightedStringCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<StdArc, AcceptorCompactor<StdArc> > > |
| CompactFst_StdArc_AcceptorCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<LogArc, AcceptorCompactor<LogArc> > > |
| CompactFst_LogArc_AcceptorCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<StdArc, UnweightedCompactor<StdArc> > > |
| CompactFst_StdArc_UnweightedCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<LogArc, UnweightedCompactor<LogArc> > > |
| CompactFst_LogArc_UnweightedCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > > |
| CompactFst_StdArc_UnweightedAcceptorCompactor_registerer; |
| static FstRegisterer< |
| CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > > |
| CompactFst_LogArc_UnweightedAcceptorCompactor_registerer; |
| |
| // Fst type definitions for lookahead Fsts. |
| extern const char arc_lookahead_fst_type[] = "arc_lookahead"; |
| extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead"; |
| extern const char olabel_lookahead_fst_type[] = "olabel_lookahead"; |
| |
| // Identifies stream data as an FST (and its endianity) |
| static const int32 kFstMagicNumber = 2125659606; |
| |
| // Check for Fst magic number in stream, to indicate |
| // caller function that the stream content is an Fst header; |
| bool IsFstHeader(istream &strm, const string &source) { |
| int64 pos = strm.tellg(); |
| bool match = true; |
| int32 magic_number = 0; |
| ReadType(strm, &magic_number); |
| if (magic_number != kFstMagicNumber |
| ) { |
| match = false; |
| } |
| strm.seekg(pos); |
| return match; |
| } |
| |
| // Check Fst magic number and read in Fst header. |
| // If rewind = true, reposition stream to before call (if possible). |
| bool FstHeader::Read(istream &strm, const string &source, bool rewind) { |
| int64 pos = 0; |
| if (rewind) pos = strm.tellg(); |
| int32 magic_number = 0; |
| ReadType(strm, &magic_number); |
| if (magic_number != kFstMagicNumber |
| ) { |
| LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source; |
| if (rewind) strm.seekg(pos); |
| return false; |
| } |
| |
| ReadType(strm, &fsttype_); |
| ReadType(strm, &arctype_); |
| ReadType(strm, &version_); |
| ReadType(strm, &flags_); |
| ReadType(strm, &properties_); |
| ReadType(strm, &start_); |
| ReadType(strm, &numstates_); |
| ReadType(strm, &numarcs_); |
| if (!strm) { |
| LOG(ERROR) << "FstHeader::Read: read failed: " << source; |
| return false; |
| } |
| if (rewind) strm.seekg(pos); |
| return true; |
| } |
| |
| // Write Fst magic number and Fst header. |
| bool FstHeader::Write(ostream &strm, const string &source) const { |
| WriteType(strm, kFstMagicNumber); |
| WriteType(strm, fsttype_); |
| WriteType(strm, arctype_); |
| WriteType(strm, version_); |
| WriteType(strm, flags_); |
| WriteType(strm, properties_); |
| WriteType(strm, start_); |
| WriteType(strm, numstates_); |
| WriteType(strm, numarcs_); |
| return true; |
| } |
| |
| FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr, |
| const SymbolTable* isym, const SymbolTable* osym) |
| : source(src), header(hdr), isymbols(isym), osymbols(osym) { |
| mode = ReadMode(FLAGS_fst_read_mode); |
| } |
| |
| FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym, |
| const SymbolTable* osym) |
| : source(src), header(0), isymbols(isym), osymbols(osym) { |
| mode = ReadMode(FLAGS_fst_read_mode); |
| } |
| |
| FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) { |
| if (mode == "read") { |
| return READ; |
| } |
| if (mode == "map") { |
| return MAP; |
| } |
| LOG(ERROR) << "Unknown file read mode " << mode; |
| return READ; |
| } |
| |
| } // namespace fst |