native/utils/grammar/rules-utils.cc - platform/external/libtextclassifier - Git at Google

 /*
  * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "utils/grammar/rules-utils.h"

 namespace libtextclassifier3::grammar {

 std::vector<std::vector<Locale>> ParseRulesLocales(const RulesSet* rules) {
   if (rules == nullptr || rules->rules() == nullptr) {
     return {};
   }
   std::vector<std::vector<Locale>> locales(rules->rules()->size());
   for (int i = 0; i < rules->rules()->size(); i++) {
     const grammar::RulesSet_::Rules* rules_shard = rules->rules()->Get(i);
     if (rules_shard->locale() == nullptr) {
       continue;
     }
     for (const LanguageTag* tag : *rules_shard->locale()) {
       locales[i].push_back(Locale::FromLanguageTag(tag));
     }
   }
   return locales;
 }

 std::vector<const grammar::RulesSet_::Rules*> SelectLocaleMatchingShards(
     const RulesSet* rules,
     const std::vector<std::vector<Locale>>& shard_locales,
     const std::vector<Locale>& locales) {
   std::vector<const grammar::RulesSet_::Rules*> shards;
   if (rules->rules() == nullptr) {
     return shards;
   }
   for (int i = 0; i < shard_locales.size(); i++) {
     if (shard_locales[i].empty() ||
         Locale::IsAnyLocaleSupported(locales,
                                      /*supported_locales=*/shard_locales[i],
                                      /*default_value=*/false)) {
       shards.push_back(rules->rules()->Get(i));
     }
   }
   return shards;
 }

 std::vector<Derivation> DeduplicateDerivations(
     const std::vector<Derivation>& derivations) {
   std::vector<Derivation> sorted_candidates = derivations;
   std::stable_sort(
       sorted_candidates.begin(), sorted_candidates.end(),
       [](const Derivation& a, const Derivation& b) {
         // Sort by id.
         if (a.rule_id != b.rule_id) {
           return a.rule_id < b.rule_id;
         }

         // Sort by increasing start.
         if (a.match->codepoint_span.first != b.match->codepoint_span.first) {
           return a.match->codepoint_span.first < b.match->codepoint_span.first;
         }

         // Sort by decreasing end.
         return a.match->codepoint_span.second > b.match->codepoint_span.second;
       });

   // Deduplicate by overlap.
   std::vector<Derivation> result;
   for (int i = 0; i < sorted_candidates.size(); i++) {
     const Derivation& candidate = sorted_candidates[i];
     bool eliminated = false;

     // Due to the sorting above, the candidate can only be completely
     // intersected by a match before it in the sorted order.
     for (int j = i - 1; j >= 0; j--) {
       if (sorted_candidates[j].rule_id != candidate.rule_id) {
         break;
       }
       if (sorted_candidates[j].match->codepoint_span.first <=
               candidate.match->codepoint_span.first &&
           sorted_candidates[j].match->codepoint_span.second >=
               candidate.match->codepoint_span.second) {
         eliminated = true;
         break;
       }
     }

     if (!eliminated) {
       result.push_back(candidate);
     }
   }
   return result;
 }

 bool VerifyAssertions(const Match* match) {
   bool result = true;
   grammar::Traverse(match, [&result](const Match* node) {
     if (node->type != Match::kAssertionMatch) {
       // Only validation if all checks so far passed.
       return result;
     }

     // Positive assertions are by definition fulfilled,
     // fail if the assertion is negative.
     if (static_cast<const AssertionMatch*>(node)->negative) {
       result = false;
     }
     return result;
   });
   return result;
 }

 }  // namespace libtextclassifier3::grammar
	/*
	* Copyright (C) 2018 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "utils/grammar/rules-utils.h"

	namespace libtextclassifier3::grammar {

	std::vector<std::vector<Locale>> ParseRulesLocales(const RulesSet* rules) {
	if (rules == nullptr \|\| rules->rules() == nullptr) {
	return {};
	}
	std::vector<std::vector<Locale>> locales(rules->rules()->size());
	for (int i = 0; i < rules->rules()->size(); i++) {
	const grammar::RulesSet_::Rules* rules_shard = rules->rules()->Get(i);
	if (rules_shard->locale() == nullptr) {
	continue;
	}
	for (const LanguageTag* tag : *rules_shard->locale()) {
	locales[i].push_back(Locale::FromLanguageTag(tag));
	}
	}
	return locales;
	}

	std::vector<const grammar::RulesSet_::Rules*> SelectLocaleMatchingShards(
	const RulesSet* rules,
	const std::vector<std::vector<Locale>>& shard_locales,
	const std::vector<Locale>& locales) {
	std::vector<const grammar::RulesSet_::Rules*> shards;
	if (rules->rules() == nullptr) {
	return shards;
	}
	for (int i = 0; i < shard_locales.size(); i++) {
	if (shard_locales[i].empty() \|\|
	Locale::IsAnyLocaleSupported(locales,
	/supported_locales=/shard_locales[i],
	/default_value=/false)) {
	shards.push_back(rules->rules()->Get(i));
	}
	}
	return shards;
	}

	std::vector<Derivation> DeduplicateDerivations(
	const std::vector<Derivation>& derivations) {
	std::vector<Derivation> sorted_candidates = derivations;
	std::stable_sort(
	sorted_candidates.begin(), sorted_candidates.end(),
	[](const Derivation& a, const Derivation& b) {
	// Sort by id.
	if (a.rule_id != b.rule_id) {
	return a.rule_id < b.rule_id;
	}

	// Sort by increasing start.
	if (a.match->codepoint_span.first != b.match->codepoint_span.first) {
	return a.match->codepoint_span.first < b.match->codepoint_span.first;
	}

	// Sort by decreasing end.
	return a.match->codepoint_span.second > b.match->codepoint_span.second;
	});

	// Deduplicate by overlap.
	std::vector<Derivation> result;
	for (int i = 0; i < sorted_candidates.size(); i++) {
	const Derivation& candidate = sorted_candidates[i];
	bool eliminated = false;

	// Due to the sorting above, the candidate can only be completely
	// intersected by a match before it in the sorted order.
	for (int j = i - 1; j >= 0; j--) {
	if (sorted_candidates[j].rule_id != candidate.rule_id) {
	break;
	}
	if (sorted_candidates[j].match->codepoint_span.first <=
	candidate.match->codepoint_span.first &&
	sorted_candidates[j].match->codepoint_span.second >=
	candidate.match->codepoint_span.second) {
	eliminated = true;
	break;
	}
	}

	if (!eliminated) {
	result.push_back(candidate);
	}
	}
	return result;
	}

	bool VerifyAssertions(const Match* match) {
	bool result = true;
	grammar::Traverse(match, [&result](const Match* node) {
	if (node->type != Match::kAssertionMatch) {
	// Only validation if all checks so far passed.
	return result;
	}

	// Positive assertions are by definition fulfilled,
	// fail if the assertion is negative.
	if (static_cast<const AssertionMatch*>(node)->negative) {
	result = false;
	}
	return result;
	});
	return result;
	}

	} // namespace libtextclassifier3::grammar