google/assistant/embedded/v1alpha2/embedded_assistant.proto - platform/external/googleapis - Git at Google

 // Copyright 2018 Google Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 syntax = "proto3";

 package google.assistant.embedded.v1alpha2;

 import "google/api/annotations.proto";
 import "google/type/latlng.proto";

 option go_package = "google.golang.org/genproto/googleapis/assistant/embedded/v1alpha2;embedded";
 option java_multiple_files = true;
 option java_outer_classname = "AssistantProto";
 option java_package = "com.google.assistant.embedded.v1alpha2";
 option objc_class_prefix = "ASTSDK";

 // Service that implements the Google Assistant API.
 service EmbeddedAssistant {
   // Initiates or continues a conversation with the embedded Assistant Service.
   // Each call performs one round-trip, sending an audio request to the service
   // and receiving the audio response. Uses bidirectional streaming to receive
   // results, such as the `END_OF_UTTERANCE` event, while sending audio.
   //
   // A conversation is one or more gRPC connections, each consisting of several
   // streamed requests and responses.
   // For example, the user says *Add to my shopping list* and the Assistant
   // responds *What do you want to add?*. The sequence of streamed requests and
   // responses in the first gRPC message could be:
   //
   // *   AssistRequest.config
   // *   AssistRequest.audio_in
   // *   AssistRequest.audio_in
   // *   AssistRequest.audio_in
   // *   AssistRequest.audio_in
   // *   AssistResponse.event_type.END_OF_UTTERANCE
   // *   AssistResponse.speech_results.transcript "add to my shopping list"
   // *   AssistResponse.dialog_state_out.microphone_mode.DIALOG_FOLLOW_ON
   // *   AssistResponse.audio_out
   // *   AssistResponse.audio_out
   // *   AssistResponse.audio_out
   //
   //
   // The user then says *bagels* and the Assistant responds
   // *OK, I've added bagels to your shopping list*. This is sent as another gRPC
   // connection call to the `Assist` method, again with streamed requests and
   // responses, such as:
   //
   // *   AssistRequest.config
   // *   AssistRequest.audio_in
   // *   AssistRequest.audio_in
   // *   AssistRequest.audio_in
   // *   AssistResponse.event_type.END_OF_UTTERANCE
   // *   AssistResponse.dialog_state_out.microphone_mode.CLOSE_MICROPHONE
   // *   AssistResponse.audio_out
   // *   AssistResponse.audio_out
   // *   AssistResponse.audio_out
   // *   AssistResponse.audio_out
   //
   // Although the precise order of responses is not guaranteed, sequential
   // `AssistResponse.audio_out` messages will always contain sequential portions
   // of audio.
   rpc Assist(stream AssistRequest) returns (stream AssistResponse);
 }

 // The top-level message sent by the client. Clients must send at least two, and
 // typically numerous `AssistRequest` messages. The first message must
 // contain a `config` message and must not contain `audio_in` data. All
 // subsequent messages must contain `audio_in` data and must not contain a
 // `config` message.
 message AssistRequest {
   // Exactly one of these fields must be specified in each `AssistRequest`.
   oneof type {
     // The `config` message provides information to the recognizer that
     // specifies how to process the request.
     // The first `AssistRequest` message must contain a `config` message.
     AssistConfig config = 1;

     // The audio data to be recognized. Sequential chunks of audio data are sent
     // in sequential `AssistRequest` messages. The first `AssistRequest`
     // message must not contain `audio_in` data and all subsequent
     // `AssistRequest` messages must contain `audio_in` data. The audio bytes
     // must be encoded as specified in `AudioInConfig`.
     // Audio must be sent at approximately real-time (16000 samples per second).
     // An error will be returned if audio is sent significantly faster or
     // slower.
     bytes audio_in = 2;
   }
 }

 // The top-level message received by the client. A series of one or more
 // `AssistResponse` messages are streamed back to the client.
 message AssistResponse {
   // Indicates the type of event.
   enum EventType {
     // No event specified.
     EVENT_TYPE_UNSPECIFIED = 0;

     // This event indicates that the server has detected the end of the user's
     // speech utterance and expects no additional speech. Therefore, the server
     // will not process additional audio (although it may subsequently return
     // additional results). The client should stop sending additional audio
     // data, half-close the gRPC connection, and wait for any additional results
     // until the server closes the gRPC connection.
     END_OF_UTTERANCE = 1;
   }

   // *Output-only* Indicates the type of event.
   EventType event_type = 1;

   // *Output-only* The audio containing the Assistant's response to the query.
   AudioOut audio_out = 3;

   // *Output-only* Contains the Assistant's visual response to the query.
   ScreenOut screen_out = 4;

   // *Output-only* Contains the action triggered by the query with the
   // appropriate payloads and semantic parsing.
   DeviceAction device_action = 6;

   // *Output-only* This repeated list contains zero or more speech recognition
   // results that correspond to consecutive portions of the audio currently
   // being processed, starting with the portion corresponding to the earliest
   // audio (and most stable portion) to the portion corresponding to the most
   // recent audio. The strings can be concatenated to view the full
   // in-progress response. When the speech recognition completes, this list
   // will contain one item with `stability` of `1.0`.
   repeated SpeechRecognitionResult speech_results = 2;

   // *Output-only* Contains output related to the user's query.
   DialogStateOut dialog_state_out = 5;

   // *Output-only* Debugging info for developer. Only returned if request set
   // `return_debug_info` to true.
   DebugInfo debug_info = 8;
 }

 // Debug info for developer. Only returned if request set `return_debug_info`
 // to true.
 message DebugInfo {
   // The original JSON response from an Action-on-Google agent to Google server.
   // See
   // https://developers.google.com/actions/reference/rest/Shared.Types/AppResponse.
   // It will only be populated if the request maker owns the AoG project and the
   // AoG project is in preview mode.
   string aog_agent_to_assistant_json = 1;
 }

 // Specifies how to process the `AssistRequest` messages.
 message AssistConfig {
   oneof type {
     // Specifies how to process the subsequent incoming audio. Required if
     // [AssistRequest.audio_in][google.assistant.embedded.v1alpha2.AssistRequest.audio_in]
     // bytes will be provided in subsequent requests.
     AudioInConfig audio_in_config = 1;

     // The text input to be sent to the Assistant. This can be populated from a
     // text interface if audio input is not available.
     string text_query = 6;
   }

   // *Required* Specifies how to format the audio that will be returned.
   AudioOutConfig audio_out_config = 2;

   // *Optional* Specifies the desired format to use when server returns a
   // visual screen response.
   ScreenOutConfig screen_out_config = 8;

   // *Required* Represents the current dialog state.
   DialogStateIn dialog_state_in = 3;

   // Device configuration that uniquely identifies a specific device.
   DeviceConfig device_config = 4;

   // *Optional* Debugging parameters for the whole `Assist` RPC.
   DebugConfig debug_config = 5;
 }

 // Specifies how to process the `audio_in` data that will be provided in
 // subsequent requests. For recommended settings, see the Google Assistant SDK
 // [best
 // practices](https://developers.google.com/assistant/sdk/guides/service/python/best-practices/audio).
 message AudioInConfig {
   // Audio encoding of the data sent in the audio message.
   // Audio must be one-channel (mono).
   enum Encoding {
     // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
     ENCODING_UNSPECIFIED = 0;

     // Uncompressed 16-bit signed little-endian samples (Linear PCM).
     // This encoding includes no header, only the raw audio bytes.
     LINEAR16 = 1;

     // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio
     // Codec) is the recommended encoding because it is
     // lossless--therefore recognition is not compromised--and
     // requires only about half the bandwidth of `LINEAR16`. This encoding
     // includes the `FLAC` stream header followed by audio data. It supports
     // 16-bit and 24-bit samples, however, not all fields in `STREAMINFO` are
     // supported.
     FLAC = 2;
   }

   // *Required* Encoding of audio data sent in all `audio_in` messages.
   Encoding encoding = 1;

   // *Required* Sample rate (in Hertz) of the audio data sent in all `audio_in`
   // messages. Valid values are from 16000-24000, but 16000 is optimal.
   // For best results, set the sampling rate of the audio source to 16000 Hz.
   // If that's not possible, use the native sample rate of the audio source
   // (instead of re-sampling).
   int32 sample_rate_hertz = 2;
 }

 // Specifies the desired format for the server to use when it returns
 // `audio_out` messages.
 message AudioOutConfig {
   // Audio encoding of the data returned in the audio message. All encodings are
   // raw audio bytes with no header, except as indicated below.
   enum Encoding {
     // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
     ENCODING_UNSPECIFIED = 0;

     // Uncompressed 16-bit signed little-endian samples (Linear PCM).
     LINEAR16 = 1;

     // MP3 audio encoding. The sample rate is encoded in the payload.
     MP3 = 2;

     // Opus-encoded audio wrapped in an ogg container. The result will be a
     // file which can be played natively on Android and in some browsers (such
     // as Chrome). The quality of the encoding is considerably higher than MP3
     // while using the same bitrate. The sample rate is encoded in the payload.
     OPUS_IN_OGG = 3;
   }

   // *Required* The encoding of audio data to be returned in all `audio_out`
   // messages.
   Encoding encoding = 1;

   // *Required* The sample rate in Hertz of the audio data returned in
   // `audio_out` messages. Valid values are: 16000-24000.
   int32 sample_rate_hertz = 2;

   // *Required* Current volume setting of the device's audio output.
   // Valid values are 1 to 100 (corresponding to 1% to 100%).
   int32 volume_percentage = 3;
 }

 // Specifies the desired format for the server to use when it returns
 // `screen_out` response.
 message ScreenOutConfig {
   // Possible modes for visual screen-output on the device.
   enum ScreenMode {
     // No video mode specified.
     // The Assistant may respond as if in `OFF` mode.
     SCREEN_MODE_UNSPECIFIED = 0;

     // Screen is off (or has brightness or other settings set so low it is
     // not visible). The Assistant will typically not return a screen response
     // in this mode.
     OFF = 1;

     // The Assistant will typically return a partial-screen response in this
     // mode.
     PLAYING = 3;
   }

   // Current visual screen-mode for the device while issuing the query.
   ScreenMode screen_mode = 1;
 }

 // Provides information about the current dialog state.
 message DialogStateIn {
   // *Required* This field must always be set to the
   // [DialogStateOut.conversation_state][google.assistant.embedded.v1alpha2.DialogStateOut.conversation_state]
   // value that was returned in the prior `Assist` RPC. It should only be
   // omitted (field not set) if there was no prior `Assist` RPC because this is
   // the first `Assist` RPC made by this device after it was first setup and/or
   // a factory-default reset.
   bytes conversation_state = 1;

   // *Required* Language of the request in
   // [IETF BCP 47 syntax](https://tools.ietf.org/html/bcp47) (for example,
   // "en-US"). See [Language
   // Support](https://developers.google.com/assistant/sdk/reference/rpc/languages)
   // for more information. If you have selected a language for this `device_id`
   // using the
   // [Settings](https://developers.google.com/assistant/sdk/reference/assistant-app/assistant-settings)
   // menu in your phone's Google Assistant app, that selection will override
   // this value.
   string language_code = 2;

   // *Optional* Location of the device where the query originated.
   DeviceLocation device_location = 5;

   // *Optional* If true, the server will treat the request as a new conversation
   // and not use state from the prior request. Set this field to true when the
   // conversation should be restarted, such as after a device reboot, or after a
   // significant lapse of time since the prior query.
   bool is_new_conversation = 7;
 }

 // *Required* Fields that identify the device to the Assistant.
 //
 // See also:
 //
 // *   [Register a Device - REST
 // API](https://developers.google.com/assistant/sdk/reference/device-registration/register-device-manual)
 // *   [Device Model and Instance
 // Schemas](https://developers.google.com/assistant/sdk/reference/device-registration/model-and-instance-schemas)
 // *   [Device
 // Proto](https://developers.google.com/assistant/sdk/reference/rpc/google.assistant.devices.v1alpha2#device)
 message DeviceConfig {
   // *Required* Unique identifier for the device. The id length must be 128
   // characters or less. Example: DBCDW098234. This MUST match the device_id
   // returned from device registration. This device_id is used to match against
   // the user's registered devices to lookup the supported traits and
   // capabilities of this device. This information should not change across
   // device reboots. However, it should not be saved across
   // factory-default resets.
   string device_id = 1;

   // *Required* Unique identifier for the device model. The combination of
   // device_model_id and device_id must have been previously associated through
   // device registration.
   string device_model_id = 3;
 }

 // The audio containing the Assistant's response to the query. Sequential chunks
 // of audio data are received in sequential `AssistResponse` messages.
 message AudioOut {
   // *Output-only* The audio data containing the Assistant's response to the
   // query. Sequential chunks of audio data are received in sequential
   // `AssistResponse` messages.
   bytes audio_data = 1;
 }

 // The Assistant's visual output response to query. Enabled by
 // `screen_out_config`.
 message ScreenOut {
   // Possible formats of the screen data.
   enum Format {
     // No format specified.
     FORMAT_UNSPECIFIED = 0;

     // Data will contain a fully-formed HTML5 layout encoded in UTF-8, e.g.
     // `<html><body><div>...</div></body></html>`. It is intended to be rendered
     // along with the audio response. Note that HTML5 doctype should be included
     // in the actual HTML data.
     HTML = 1;
   }

   // *Output-only* The format of the provided screen data.
   Format format = 1;

   // *Output-only* The raw screen data to be displayed as the result of the
   // Assistant query.
   bytes data = 2;
 }

 // The response returned to the device if the user has triggered a Device
 // Action. For example, a device which supports the query *Turn on the light*
 // would receive a `DeviceAction` with a JSON payload containing the semantics
 // of the request.
 message DeviceAction {
   // JSON containing the device command response generated from the triggered
   // Device Action grammar. The format is given by the
   // `action.devices.EXECUTE` intent for a given
   // [trait](https://developers.google.com/assistant/sdk/reference/traits/).
   string device_request_json = 1;
 }

 // The estimated transcription of a phrase the user has spoken. This could be
 // a single segment or the full guess of the user's spoken query.
 message SpeechRecognitionResult {
   // *Output-only* Transcript text representing the words that the user spoke.
   string transcript = 1;

   // *Output-only* An estimate of the likelihood that the Assistant will not
   // change its guess about this result. Values range from 0.0 (completely
   // unstable) to 1.0 (completely stable and final). The default of 0.0 is a
   // sentinel value indicating `stability` was not set.
   float stability = 2;
 }

 // The dialog state resulting from the user's query. Multiple of these messages
 // may be received.
 message DialogStateOut {
   // Possible states of the microphone after a `Assist` RPC completes.
   enum MicrophoneMode {
     // No mode specified.
     MICROPHONE_MODE_UNSPECIFIED = 0;

     // The service is not expecting a follow-on question from the user.
     // The microphone should remain off until the user re-activates it.
     CLOSE_MICROPHONE = 1;

     // The service is expecting a follow-on question from the user. The
     // microphone should be re-opened when the `AudioOut` playback completes
     // (by starting a new `Assist` RPC call to send the new audio).
     DIALOG_FOLLOW_ON = 2;
   }

   // *Output-only* Supplemental display text from the Assistant. This could be
   // the same as the speech spoken in `AssistResponse.audio_out` or it could
   // be some additional information which aids the user's understanding.
   string supplemental_display_text = 1;

   // *Output-only* State information for the subsequent `Assist` RPC. This
   // value should be saved in the client and returned in the
   // [`DialogStateIn.conversation_state`](#dialogstatein) field with the next
   // `Assist` RPC. (The client does not need to interpret or otherwise use this
   // value.) This information should be saved across device reboots. However,
   // this value should be cleared (not saved in the client) during a
   // factory-default reset.
   bytes conversation_state = 2;

   // *Output-only* Specifies the mode of the microphone after this `Assist`
   // RPC is processed.
   MicrophoneMode microphone_mode = 3;

   // *Output-only* Updated volume level. The value will be 0 or omitted
   // (indicating no change) unless a voice command such as *Increase the volume*
   // or *Set volume level 4* was recognized, in which case the value will be
   // between 1 and 100 (corresponding to the new volume level of 1% to 100%).
   // Typically, a client should use this volume level when playing the
   // `audio_out` data, and retain this value as the current volume level and
   // supply it in the `AudioOutConfig` of the next `AssistRequest`. (Some
   // clients may also implement other ways to allow the current volume level to
   // be changed, for example, by providing a knob that the user can turn.)
   int32 volume_percentage = 4;
 }

 // Debugging parameters for the current request.
 message DebugConfig {
   // When this field is set to true, the `debug_info` field in `AssistResponse`
   // may be populated. However it will significantly increase latency of
   // responses. Do not set this field true in production code.
   bool return_debug_info = 6;
 }

 // There are three sources of locations. They are used with this precedence:
 //
 // 1. This `DeviceLocation`, which is primarily used for mobile devices with
 //    GPS .
 // 2. Location specified by the user during device setup; this is per-user, per
 //    device. This location is used if `DeviceLocation` is not specified.
 // 3. Inferred location based on IP address. This is used only if neither of the
 //    above are specified.
 message DeviceLocation {
   oneof type {
     // Latitude and longitude of device.
     google.type.LatLng coordinates = 1;
   }
 }
	// Copyright 2018 Google Inc.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	syntax = "proto3";

	package google.assistant.embedded.v1alpha2;

	import "google/api/annotations.proto";
	import "google/type/latlng.proto";

	option go_package = "google.golang.org/genproto/googleapis/assistant/embedded/v1alpha2;embedded";
	option java_multiple_files = true;
	option java_outer_classname = "AssistantProto";
	option java_package = "com.google.assistant.embedded.v1alpha2";
	option objc_class_prefix = "ASTSDK";

	// Service that implements the Google Assistant API.
	service EmbeddedAssistant {
	// Initiates or continues a conversation with the embedded Assistant Service.
	// Each call performs one round-trip, sending an audio request to the service
	// and receiving the audio response. Uses bidirectional streaming to receive
	// results, such as the `END_OF_UTTERANCE` event, while sending audio.
	//
	// A conversation is one or more gRPC connections, each consisting of several
	// streamed requests and responses.
	// For example, the user says Add to my shopping list and the Assistant
	// responds What do you want to add?. The sequence of streamed requests and
	// responses in the first gRPC message could be:
	//
	// * AssistRequest.config
	// * AssistRequest.audio_in
	// * AssistRequest.audio_in
	// * AssistRequest.audio_in
	// * AssistRequest.audio_in
	// * AssistResponse.event_type.END_OF_UTTERANCE
	// * AssistResponse.speech_results.transcript "add to my shopping list"
	// * AssistResponse.dialog_state_out.microphone_mode.DIALOG_FOLLOW_ON
	// * AssistResponse.audio_out
	// * AssistResponse.audio_out
	// * AssistResponse.audio_out
	//
	//
	// The user then says bagels and the Assistant responds
	// OK, I've added bagels to your shopping list. This is sent as another gRPC
	// connection call to the `Assist` method, again with streamed requests and
	// responses, such as:
	//
	// * AssistRequest.config
	// * AssistRequest.audio_in
	// * AssistRequest.audio_in
	// * AssistRequest.audio_in
	// * AssistResponse.event_type.END_OF_UTTERANCE
	// * AssistResponse.dialog_state_out.microphone_mode.CLOSE_MICROPHONE
	// * AssistResponse.audio_out
	// * AssistResponse.audio_out
	// * AssistResponse.audio_out
	// * AssistResponse.audio_out
	//
	// Although the precise order of responses is not guaranteed, sequential
	// `AssistResponse.audio_out` messages will always contain sequential portions
	// of audio.
	rpc Assist(stream AssistRequest) returns (stream AssistResponse);
	}

	// The top-level message sent by the client. Clients must send at least two, and
	// typically numerous `AssistRequest` messages. The first message must
	// contain a `config` message and must not contain `audio_in` data. All
	// subsequent messages must contain `audio_in` data and must not contain a
	// `config` message.
	message AssistRequest {
	// Exactly one of these fields must be specified in each `AssistRequest`.
	oneof type {
	// The `config` message provides information to the recognizer that
	// specifies how to process the request.
	// The first `AssistRequest` message must contain a `config` message.
	AssistConfig config = 1;

	// The audio data to be recognized. Sequential chunks of audio data are sent
	// in sequential `AssistRequest` messages. The first `AssistRequest`
	// message must not contain `audio_in` data and all subsequent
	// `AssistRequest` messages must contain `audio_in` data. The audio bytes
	// must be encoded as specified in `AudioInConfig`.
	// Audio must be sent at approximately real-time (16000 samples per second).
	// An error will be returned if audio is sent significantly faster or
	// slower.
	bytes audio_in = 2;
	}
	}

	// The top-level message received by the client. A series of one or more
	// `AssistResponse` messages are streamed back to the client.
	message AssistResponse {
	// Indicates the type of event.
	enum EventType {
	// No event specified.
	EVENT_TYPE_UNSPECIFIED = 0;

	// This event indicates that the server has detected the end of the user's
	// speech utterance and expects no additional speech. Therefore, the server
	// will not process additional audio (although it may subsequently return
	// additional results). The client should stop sending additional audio
	// data, half-close the gRPC connection, and wait for any additional results
	// until the server closes the gRPC connection.
	END_OF_UTTERANCE = 1;
	}

	// Output-only Indicates the type of event.
	EventType event_type = 1;

	// Output-only The audio containing the Assistant's response to the query.
	AudioOut audio_out = 3;

	// Output-only Contains the Assistant's visual response to the query.
	ScreenOut screen_out = 4;

	// Output-only Contains the action triggered by the query with the
	// appropriate payloads and semantic parsing.
	DeviceAction device_action = 6;

	// Output-only This repeated list contains zero or more speech recognition
	// results that correspond to consecutive portions of the audio currently
	// being processed, starting with the portion corresponding to the earliest
	// audio (and most stable portion) to the portion corresponding to the most
	// recent audio. The strings can be concatenated to view the full
	// in-progress response. When the speech recognition completes, this list
	// will contain one item with `stability` of `1.0`.
	repeated SpeechRecognitionResult speech_results = 2;

	// Output-only Contains output related to the user's query.
	DialogStateOut dialog_state_out = 5;

	// Output-only Debugging info for developer. Only returned if request set
	// `return_debug_info` to true.
	DebugInfo debug_info = 8;
	}

	// Debug info for developer. Only returned if request set `return_debug_info`
	// to true.
	message DebugInfo {
	// The original JSON response from an Action-on-Google agent to Google server.
	// See
	// https://developers.google.com/actions/reference/rest/Shared.Types/AppResponse.
	// It will only be populated if the request maker owns the AoG project and the
	// AoG project is in preview mode.
	string aog_agent_to_assistant_json = 1;
	}

	// Specifies how to process the `AssistRequest` messages.
	message AssistConfig {
	oneof type {
	// Specifies how to process the subsequent incoming audio. Required if
	// [AssistRequest.audio_in][google.assistant.embedded.v1alpha2.AssistRequest.audio_in]
	// bytes will be provided in subsequent requests.
	AudioInConfig audio_in_config = 1;

	// The text input to be sent to the Assistant. This can be populated from a
	// text interface if audio input is not available.
	string text_query = 6;
	}

	// Required Specifies how to format the audio that will be returned.
	AudioOutConfig audio_out_config = 2;

	// Optional Specifies the desired format to use when server returns a
	// visual screen response.
	ScreenOutConfig screen_out_config = 8;

	// Required Represents the current dialog state.
	DialogStateIn dialog_state_in = 3;

	// Device configuration that uniquely identifies a specific device.
	DeviceConfig device_config = 4;

	// Optional Debugging parameters for the whole `Assist` RPC.
	DebugConfig debug_config = 5;
	}

	// Specifies how to process the `audio_in` data that will be provided in
	// subsequent requests. For recommended settings, see the Google Assistant SDK
	// [best
	// practices](https://developers.google.com/assistant/sdk/guides/service/python/best-practices/audio).
	message AudioInConfig {
	// Audio encoding of the data sent in the audio message.
	// Audio must be one-channel (mono).
	enum Encoding {
	// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
	ENCODING_UNSPECIFIED = 0;

	// Uncompressed 16-bit signed little-endian samples (Linear PCM).
	// This encoding includes no header, only the raw audio bytes.
	LINEAR16 = 1;

	// [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio
	// Codec) is the recommended encoding because it is
	// lossless--therefore recognition is not compromised--and
	// requires only about half the bandwidth of `LINEAR16`. This encoding
	// includes the `FLAC` stream header followed by audio data. It supports
	// 16-bit and 24-bit samples, however, not all fields in `STREAMINFO` are
	// supported.
	FLAC = 2;
	}

	// Required Encoding of audio data sent in all `audio_in` messages.
	Encoding encoding = 1;

	// Required Sample rate (in Hertz) of the audio data sent in all `audio_in`
	// messages. Valid values are from 16000-24000, but 16000 is optimal.
	// For best results, set the sampling rate of the audio source to 16000 Hz.
	// If that's not possible, use the native sample rate of the audio source
	// (instead of re-sampling).
	int32 sample_rate_hertz = 2;
	}

	// Specifies the desired format for the server to use when it returns
	// `audio_out` messages.
	message AudioOutConfig {
	// Audio encoding of the data returned in the audio message. All encodings are
	// raw audio bytes with no header, except as indicated below.
	enum Encoding {
	// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][].
	ENCODING_UNSPECIFIED = 0;

	// Uncompressed 16-bit signed little-endian samples (Linear PCM).
	LINEAR16 = 1;

	// MP3 audio encoding. The sample rate is encoded in the payload.
	MP3 = 2;

	// Opus-encoded audio wrapped in an ogg container. The result will be a
	// file which can be played natively on Android and in some browsers (such
	// as Chrome). The quality of the encoding is considerably higher than MP3
	// while using the same bitrate. The sample rate is encoded in the payload.
	OPUS_IN_OGG = 3;
	}

	// Required The encoding of audio data to be returned in all `audio_out`
	// messages.
	Encoding encoding = 1;

	// Required The sample rate in Hertz of the audio data returned in
	// `audio_out` messages. Valid values are: 16000-24000.
	int32 sample_rate_hertz = 2;

	// Required Current volume setting of the device's audio output.
	// Valid values are 1 to 100 (corresponding to 1% to 100%).
	int32 volume_percentage = 3;
	}

	// Specifies the desired format for the server to use when it returns
	// `screen_out` response.
	message ScreenOutConfig {
	// Possible modes for visual screen-output on the device.
	enum ScreenMode {
	// No video mode specified.
	// The Assistant may respond as if in `OFF` mode.
	SCREEN_MODE_UNSPECIFIED = 0;

	// Screen is off (or has brightness or other settings set so low it is
	// not visible). The Assistant will typically not return a screen response
	// in this mode.
	OFF = 1;

	// The Assistant will typically return a partial-screen response in this
	// mode.
	PLAYING = 3;
	}

	// Current visual screen-mode for the device while issuing the query.
	ScreenMode screen_mode = 1;
	}

	// Provides information about the current dialog state.
	message DialogStateIn {
	// Required This field must always be set to the
	// [DialogStateOut.conversation_state][google.assistant.embedded.v1alpha2.DialogStateOut.conversation_state]
	// value that was returned in the prior `Assist` RPC. It should only be
	// omitted (field not set) if there was no prior `Assist` RPC because this is
	// the first `Assist` RPC made by this device after it was first setup and/or
	// a factory-default reset.
	bytes conversation_state = 1;

	// Required Language of the request in
	// [IETF BCP 47 syntax](https://tools.ietf.org/html/bcp47) (for example,
	// "en-US"). See [Language
	// Support](https://developers.google.com/assistant/sdk/reference/rpc/languages)
	// for more information. If you have selected a language for this `device_id`
	// using the
	// [Settings](https://developers.google.com/assistant/sdk/reference/assistant-app/assistant-settings)
	// menu in your phone's Google Assistant app, that selection will override
	// this value.
	string language_code = 2;

	// Optional Location of the device where the query originated.
	DeviceLocation device_location = 5;

	// Optional If true, the server will treat the request as a new conversation
	// and not use state from the prior request. Set this field to true when the
	// conversation should be restarted, such as after a device reboot, or after a
	// significant lapse of time since the prior query.
	bool is_new_conversation = 7;
	}

	// Required Fields that identify the device to the Assistant.
	//
	// See also:
	//
	// * [Register a Device - REST
	// API](https://developers.google.com/assistant/sdk/reference/device-registration/register-device-manual)
	// * [Device Model and Instance
	// Schemas](https://developers.google.com/assistant/sdk/reference/device-registration/model-and-instance-schemas)
	// * [Device
	// Proto](https://developers.google.com/assistant/sdk/reference/rpc/google.assistant.devices.v1alpha2#device)
	message DeviceConfig {
	// Required Unique identifier for the device. The id length must be 128
	// characters or less. Example: DBCDW098234. This MUST match the device_id
	// returned from device registration. This device_id is used to match against
	// the user's registered devices to lookup the supported traits and
	// capabilities of this device. This information should not change across
	// device reboots. However, it should not be saved across
	// factory-default resets.
	string device_id = 1;

	// Required Unique identifier for the device model. The combination of
	// device_model_id and device_id must have been previously associated through
	// device registration.
	string device_model_id = 3;
	}

	// The audio containing the Assistant's response to the query. Sequential chunks
	// of audio data are received in sequential `AssistResponse` messages.
	message AudioOut {
	// Output-only The audio data containing the Assistant's response to the
	// query. Sequential chunks of audio data are received in sequential
	// `AssistResponse` messages.
	bytes audio_data = 1;
	}

	// The Assistant's visual output response to query. Enabled by
	// `screen_out_config`.
	message ScreenOut {
	// Possible formats of the screen data.
	enum Format {
	// No format specified.
	FORMAT_UNSPECIFIED = 0;

	// Data will contain a fully-formed HTML5 layout encoded in UTF-8, e.g.
	// `<html><body><div>...</div></body></html>`. It is intended to be rendered
	// along with the audio response. Note that HTML5 doctype should be included
	// in the actual HTML data.
	HTML = 1;
	}

	// Output-only The format of the provided screen data.
	Format format = 1;

	// Output-only The raw screen data to be displayed as the result of the
	// Assistant query.
	bytes data = 2;
	}

	// The response returned to the device if the user has triggered a Device
	// Action. For example, a device which supports the query Turn on the light
	// would receive a `DeviceAction` with a JSON payload containing the semantics
	// of the request.
	message DeviceAction {
	// JSON containing the device command response generated from the triggered
	// Device Action grammar. The format is given by the
	// `action.devices.EXECUTE` intent for a given
	// [trait](https://developers.google.com/assistant/sdk/reference/traits/).
	string device_request_json = 1;
	}

	// The estimated transcription of a phrase the user has spoken. This could be
	// a single segment or the full guess of the user's spoken query.
	message SpeechRecognitionResult {
	// Output-only Transcript text representing the words that the user spoke.
	string transcript = 1;

	// Output-only An estimate of the likelihood that the Assistant will not
	// change its guess about this result. Values range from 0.0 (completely
	// unstable) to 1.0 (completely stable and final). The default of 0.0 is a
	// sentinel value indicating `stability` was not set.
	float stability = 2;
	}

	// The dialog state resulting from the user's query. Multiple of these messages
	// may be received.
	message DialogStateOut {
	// Possible states of the microphone after a `Assist` RPC completes.
	enum MicrophoneMode {
	// No mode specified.
	MICROPHONE_MODE_UNSPECIFIED = 0;

	// The service is not expecting a follow-on question from the user.
	// The microphone should remain off until the user re-activates it.
	CLOSE_MICROPHONE = 1;

	// The service is expecting a follow-on question from the user. The
	// microphone should be re-opened when the `AudioOut` playback completes
	// (by starting a new `Assist` RPC call to send the new audio).
	DIALOG_FOLLOW_ON = 2;
	}

	// Output-only Supplemental display text from the Assistant. This could be
	// the same as the speech spoken in `AssistResponse.audio_out` or it could
	// be some additional information which aids the user's understanding.
	string supplemental_display_text = 1;

	// Output-only State information for the subsequent `Assist` RPC. This
	// value should be saved in the client and returned in the
	// [`DialogStateIn.conversation_state`](#dialogstatein) field with the next
	// `Assist` RPC. (The client does not need to interpret or otherwise use this
	// value.) This information should be saved across device reboots. However,
	// this value should be cleared (not saved in the client) during a
	// factory-default reset.
	bytes conversation_state = 2;

	// Output-only Specifies the mode of the microphone after this `Assist`
	// RPC is processed.
	MicrophoneMode microphone_mode = 3;

	// Output-only Updated volume level. The value will be 0 or omitted
	// (indicating no change) unless a voice command such as Increase the volume
	// or Set volume level 4 was recognized, in which case the value will be
	// between 1 and 100 (corresponding to the new volume level of 1% to 100%).
	// Typically, a client should use this volume level when playing the
	// `audio_out` data, and retain this value as the current volume level and
	// supply it in the `AudioOutConfig` of the next `AssistRequest`. (Some
	// clients may also implement other ways to allow the current volume level to
	// be changed, for example, by providing a knob that the user can turn.)
	int32 volume_percentage = 4;
	}

	// Debugging parameters for the current request.
	message DebugConfig {
	// When this field is set to true, the `debug_info` field in `AssistResponse`
	// may be populated. However it will significantly increase latency of
	// responses. Do not set this field true in production code.
	bool return_debug_info = 6;
	}

	// There are three sources of locations. They are used with this precedence:
	//
	// 1. This `DeviceLocation`, which is primarily used for mobile devices with
	// GPS .
	// 2. Location specified by the user during device setup; this is per-user, per
	// device. This location is used if `DeviceLocation` is not specified.
	// 3. Inferred location based on IP address. This is used only if neither of the
	// above are specified.
	message DeviceLocation {
	oneof type {
	// Latitude and longitude of device.
	google.type.LatLng coordinates = 1;
	}
	}