| // Copyright 2018 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| library fuchsia.mediacodec; |
| |
| // Value |
| // |
| // Generic "value" for use within generic "Parameter" struct. |
| union Value { |
| bool bool_value; |
| uint64 uint64_value; |
| int64 int64_value; |
| string string_value; |
| // Prefer using codec_oob_bytes instead. |
| vector<uint8> bytes_value; |
| }; |
| |
| // Parameter |
| // |
| // Generic parameter. |
| // |
| // We want to minimize use of this generic "Parameter" structure by natively |
| // defining as many codec-specific parameter semantics as we can. |
| // |
| // TODO: When possible, describe the very limited scenarios in which it would |
| // still be reasonable to use a generic Parameter. |
| struct Parameter { |
| // Some indication of the scope of applicability of this Parameter. |
| string scope; |
| // Specific name of this parameter, without the scope prefix. |
| string name; |
| // The particular value of this parameter. |
| Value value; |
| }; |
| |
| // CodecFormatDetails |
| // |
| // The purpose of CodecFormatDetails is to fill in additional details not |
| // conveyed via other means. |
| // |
| // TODO(dustingreen): Where at all possible, definitions for the concepts |
| // within CodecFormatDetails should go in media_types.fidl or another location |
| // that's widely shared. Maybe some encoder settings could remain |
| // codec-specific. |
| // |
| // For decoders input, the format details tend to be fairly sparse, since most |
| // compressed formats tend to be mostly self-describing. |
| // |
| // For decoder output and encoder input, the format details need to include all |
| // the out-of-band information regarding the uncompressed data, which tends not |
| // to be self-describing. |
| // |
| // For encoder output, we also include in CodecFormatDetails some additional |
| // encoding parameters which are needed to configure the encoder. These are not |
| // really needed by any downstream decoder under most circumstances, but these |
| // encoder config settings are legitimate optional format details, so we use |
| // the same overall structure, but name them "Encoder" to make it more obvious |
| // that these are mostly relevant to the encoder. A downstream consumer could |
| // potentially benefit from knowing these settings, but most won't look at them. |
| // |
| // Settings that are completely redundant with the data in the format itself |
| // should not be in a required field here. Some encoder parameters may also be |
| // represented in codec_oob_bytes on the output of an encoder - a downstream |
| // consumer can assume the codec_oob_bytes are correct and not check for whether |
| // encoder settings are present or consistent. |
| // |
| // With some exceptions made for encoder settings on output of an encoder, this |
| // stuff should be limited to things we need to know to properly process the |
| // data which we can't already determine from the data itself, and which isn't |
| // already covered by a format's defined OOB binary config blob, which is |
| // conveyed in codec_oob_bytes. |
| // |
| // TODO(dustingreen): Consider whether to split encoder settings into a |
| // separate struct - the main counter-argument would be if a consumer |
| // immediately downstream of an encoder may have good reason to know encoder |
| // settings to help process the data. For example, the nominal bit-rate. |
| |
| // Compressed formats tend to be much more self-describing than uncompressed |
| // formats. (The "self" in "self-describing" includes the codec_oob_bytes.) |
| // |
| // Most decoders can have CodecFormatDetails.domain null. |
| |
| // AudioCompressedFormat |
| // |
| // Format details relevant to compressed audio, mostly for encoder settings. |
| // |
| // Unless otherwise specified in a comment on a field in this structure, |
| // CodecFormatDetails.domain is null for a decoder. A sub-structure whose name |
| // ends in "Encoder" is for encoder output settings. |
| // |
| // TODO(dustingreen): Consider whether splitting encoder settings out separately |
| // would be better. |
| union AudioCompressedFormat { |
| // For an aac encoder, this field has settings the encoder needs which are |
| // specific to AAC encoding. |
| AudioCompressedFormatAacEncoder aac; |
| }; |
| |
| enum AudioBitrateMode { |
| // Used mainly when a client is configuring an encoder's output format. May |
| // also be present in an OnOutputConfig() message from an encoder, but should |
| // not be relied upon to be present by any consumer downstream of an encoder. |
| UNSPECIFIED = 0; |
| CBR = 1; |
| VBR = 2; |
| }; |
| |
| // AudioCompressedFormatAacEncoder |
| // |
| // Encoder settings for an AAC encoder. |
| struct AudioCompressedFormatAacEncoder { |
| // In SetOutputConfig(): |
| // |
| // If zero, an encoder should generate 256 kbps, and a consumer should not |
| // assume any particular bitrate. |
| // |
| // If not zero, the encoder should not exceed this bitrate. In CBR the |
| // encoder should use the highest available bitrate that doesn't exceed this |
| // value, or if there is no such bitrate, the lowest available bitrate. In |
| // VBR, the encoder should stay at or below this bitrate. |
| // |
| // In VBR it's left up to the encoder to choose a reasonable ratio between |
| // max bits per second and min bits per second, with the aim in VBR being |
| // constant perceived quality. |
| // |
| // In OnOutputConfig(): |
| // |
| // In CBR, the nominal bits per second. In VBR, the nominal max bits per |
| // second. |
| uint32 bits_per_second; |
| |
| // In SetOutputConfig(): |
| // |
| // If UNSPECIFIED, up to the encoder. If CBR or VBR, a hint to the encoder |
| // to use that mode. |
| // |
| // In OnOutputConfig(): |
| // |
| // Actual mode being used. UNSPECIFIED means the source is not specifying |
| // which mode. |
| AudioBitrateMode bitrate_mode; |
| |
| // TODO(dustingreen): AAC profile settings. |
| }; |
| |
| // AudioPcmMode |
| // |
| // TODO(dustingreen): Keep or discard any non-linear formats for purposes of the |
| // Codec interface? |
| enum AudioPcmMode { |
| // 16 bit signed int linear or 32 bit float linear, for now |
| // 1-N channels ok, with "A.B" channels designated as A+B channel_count - the |
| // channel map is separately specified. So 5.1 becomes channel_count 6. |
| LINEAR = 0; |
| // G.711 8 bit format-defined waveform semantics |
| // 1 channel |
| ALAW = 1; |
| // G.711 8 bit format-defined waveform semantics |
| // 1 channel |
| MULAW = 2; |
| }; |
| |
| // AudioChannelId |
| // |
| // Used in specifying which audio channel is for which speaker location / type. |
| // |
| // TODO(dustingreen): Do we need more channel IDs than this? |
| // |
| // TODO(dustingreen): Check with mpuryear@ re. naming consistency for "S" vs. |
| // "R" as we move these to a common definition. Also the ordering of LS/RS vs. |
| // LR/RR - probably LR/RR being first would make more sense re. how channels |
| // get added incrementally, but changing the order would no longer match |
| // Android's ordering. |
| enum AudioChannelId { |
| SKIP = 0; // unused channel |
| LF = 1; // left front |
| RF = 2; // right front |
| CF = 3; // center front |
| LS = 4; // left surround |
| RS = 5; // right surround |
| LFE = 6; // low frequency effects |
| CS = 7; // back surround |
| LR = 8; // left rear |
| RR = 9; // right rear |
| // This is the last explicitly-defined value + 1. This name will be |
| // re-defined in future if we add more defined channel IDs above. |
| END_DEFINED = 10; |
| // This is where format-specific (or ad-hoc) channel ID values should go, to |
| // avoid colliding with any additional values allocated above. The values |
| // here are not guaranteed to avoid collision across different formats. |
| EXTENDED_CHANNEL_ID_BASE = 0x6f000000; |
| // Extended channel IDs should be <= Max. |
| MAX = 0x7fffffff; |
| }; |
| |
| // PcmFormat |
| // |
| // PCM audio format details. |
| // |
| // TODO(dustingreen): Discuss with mpuryear@ re. where definitions for these |
| // details go and make sure the common details can specify at least this much. |
| struct PcmFormat { |
| // Implicit details: |
| // * For bits_per_sample > 8, host-endian is implied. |
| // * At least for now, for channel_count >= 2, interleaved layout is |
| // implied. |
| |
| AudioPcmMode pcm_mode; |
| |
| // bits_per_sample |
| // |
| // A "sample" is for a single channel. |
| // |
| // For example, CD quality is 16. See PcmMode comments, as the mode |
| // constrains this value. |
| uint32 bits_per_sample; |
| |
| // frames_per_second |
| // |
| // A "frame" is one datapoint (one "sample") for each channel. Each channel |
| // is sampled this many times per second. For example, CD quality is 44100. |
| uint32 frames_per_second; |
| |
| // channel_map |
| // |
| // channel_map.size() is the channel count. See PcmMode comments, as some |
| // modes constrain the channel count to 1. |
| // |
| // Values from AudioChannelId should be used if they are suitable. |
| // |
| // If a channel has no suitable AudioChannelId, an ad-hoc value can be used in |
| // a range starting from AudioChannel_ExtendedChannelIdBase. |
| vector<AudioChannelId>:16 channel_map; |
| |
| // TODO(dustingreen): Add unsigned 8 bit, float 32 bit, maybe others. FWIW, |
| // AOSP appears to support signed 16 bit, unsigned 8 bit, and float 32 bit |
| // under "Pcm", AFAICT based on OMX_NUMERICALDATATYPE and ACodec.cpp code. |
| }; |
| |
| // AudioUncompressedFormat |
| // |
| // Uncompressed audio format details. |
| union AudioUncompressedFormat { |
| PcmFormat pcm; |
| }; |
| |
| // AudioFormat |
| // |
| // Audio format details. |
| union AudioFormat { |
| AudioCompressedFormat compressed; |
| AudioUncompressedFormat uncompressed; |
| }; |
| |
| // VideoCompressedFormat |
| // |
| // Compressed video format details. |
| // |
| // Mostly encoder settings will go under here. |
| // |
| // If a compressed video format is missing any fields here other than encoder |
| // settings, it's because it's a good format and is already self-describing |
| // given the mime_type + format-defined codec_oob_bytes as appropriate + |
| // in-band data. |
| union VideoCompressedFormat { |
| // TODO(dustingreen): Any compressed video formats that aren't sufficiently |
| // self-describing to select and create a Codec instance to decode it? |
| |
| // TODO(dustingreen): temp field to make the compiler happy until we have at |
| // least one real field. |
| uint32 temp_field_todo_remove; |
| }; |
| |
| // VideoUncompressedFormatSpecificDetails |
| // |
| // Extended format-specific uncompressed video format details. |
| // |
| // TODO(dustingreen): Switch to FIDL table instead. |
| union VideoUncompressedFormatSpecificDetails { |
| // TODO(dustingreen): Which formats that we care about really require special |
| // format-specific details here? |
| |
| // TODO(dustingreen): temp field to make the compiler happy until we have at |
| // least one real field. |
| uint32 temp_field_todo_remove; |
| }; |
| |
| enum VideoColorSpace { |
| // TODO(dustingreen): add to this list |
| INVALID = 0; |
| }; |
| |
| // VideoUncompressedFormat |
| // |
| // Uncompressed video format details. |
| // |
| // TODO(dustingreen): Integrate with a system-wide structure for this purpose. |
| struct VideoUncompressedFormat { |
| // fourcc |
| // |
| // A human-readable fourcc like RGBA should be 0x41424752 in the fourcc field |
| // (regardless of host endian-ness). Note that the R (first character) of the |
| // fourcc is in the low-order byte of this fourcc field. |
| // |
| // There are some fourcc codes that don't format nicely as a string. While I |
| // don't foresee any use of any of the purely numeric fourcc codes (not |
| // corresponding to packed ascii character values), those would be stored |
| // such that their numeric value has it's low-order byte in the low-order |
| // byte of this fourcc value. So a fourcc with "hex value" 0x00000001 would |
| // have the numeric value 1 in this field. |
| // |
| // The endian-ness of fourcc values stored in files or in network packets is |
| // outside the scope of these comments, other than to state that regardless |
| // of the source of the fourcc code and the order that storage / transmission |
| // format stores these bytes, a human-readable fourcc should have its |
| // human-read first ascii character value in the low order byte of this |
| // field. |
| uint32 fourcc; |
| |
| // For formats with different planes having different resolution, this is the |
| // resolution of the highest-resolution plane(s). Else it's the resolution |
| // of all the planes. |
| uint32 primary_width_pixels; |
| uint32 primary_height_pixels; |
| |
| // For formats where the seconary planes are the same resolution, these fields |
| // will be the same as primary_width_pixels and primary_height_pixels. For |
| // formats with smaller secondary resolutions, these indicate that resolution. |
| uint32 secondary_width_pixels; |
| uint32 secondary_height_pixels; |
| |
| // Planar means the various planes are separately stored in their own chunks |
| // of memory. |
| bool planar; |
| |
| // If a format is swizzled, the swizzling parameters are not directly here. |
| bool swizzled; |
| |
| uint32 primary_line_stride_bytes; |
| // Formats with the same stride for all planes will have this field equal to |
| // primary_line_stride_bytes. |
| uint32 secondary_line_stride_bytes; |
| |
| // R or Y |
| uint32 primary_start_offset; |
| // G or U |
| uint32 secondary_start_offset; |
| // B or V |
| uint32 tertiary_start_offset; |
| |
| uint32 primary_pixel_stride; |
| // For formats with the same pixel stride for all planes, this field will be |
| // equal to primary_pixel_stride. |
| uint32 secondary_pixel_stride; |
| |
| // These override the primary_width_pixels and primary_height_pixels for |
| // purposes of display (but not for purposes of determining the pixel layout |
| // in memory). These can crop on the right and bottom. These must be <= the |
| // corresponding coded dimension. |
| // |
| // This value must be <= primary_width_pixels. |
| uint32 primary_display_width_pixels; |
| // This value must be <= primary_height_pixels. |
| uint32 primary_display_height_pixels; |
| |
| // The pixel_aspect_ratio_width : pixel_aspect_ratio_height is the pixel |
| // aspect ratio (AKA sample aspect ratio aka SAR) for the luma (AKA Y) |
| // samples. A pixel_aspect_ratio of 1:1 mean square pixels. A |
| // pixel_aspect_ratio of 2:1 would mean pixels that are displayed twice as |
| // wide as they are tall. Codec implementation should ensure these two values |
| // are relatively prime by reducing the fraction (dividing both by GCF) if |
| // necessary. |
| // |
| // When has_pixel_aspect_ratio == false, pixel_aspect_ratio_width and |
| // pixel_aspect_ratio_height will both be 1, but in that case the |
| // pixel_aspect_ratio_width : pixel_aspect_ratio_height of 1:1 is just a very |
| // weak suggestion re. reasonable-ish handling, not in any way authoritative. |
| // In this case (or in any case really) the receiver of this message may have |
| // other OOB means to determine the actual pixel_aspect_ratio. |
| bool has_pixel_aspect_ratio = false; |
| uint32 pixel_aspect_ratio_width = 1; |
| uint32 pixel_aspect_ratio_height = 1; |
| |
| // TODO(dustingreen): Currently this assumes 8 bits per channel, but we'll |
| // need fields to indicate more bits per pixel such as 10 or 12 bits per |
| // pixel. Also, potentially a way to indicate different number of bits per |
| // channel for 565 16 bit RGB + packing details. Also, potentially |
| // endian-ness. |
| // |
| // TODO(dustingreen): Also, an easy way to get a template |
| // VideoUncompressedFormat that's pre-populated with reasonably-plausible |
| // values, based on a fourcc or enum value + maybe primary resolution. |
| |
| VideoUncompressedFormatSpecificDetails special_formats; |
| }; |
| |
| // VideoFormat |
| // |
| // Video (compress or uncompressed) format details. |
| union VideoFormat { |
| VideoCompressedFormat compressed; |
| VideoUncompressedFormat uncompressed; |
| }; |
| |
| // DomainFormat |
| // |
| // Domain-specific format details (audio or video, compressed or uncompressed). |
| union DomainFormat { |
| AudioFormat audio; |
| VideoFormat video; |
| }; |
| |
| const uint64 kMaxCodecOobBytesSize = 8192; |
| |
| // CodecFormatDetails |
| // |
| // This describes/details the format on input or output of a codec (separate |
| // instances for input vs. output). |
| struct CodecFormatDetails { |
| // Particular instances of CodecFormatDetails will set this field to make it |
| // easier for a receiver to determine if any part of the format has changed |
| // vs. the last CodecFormatDetails received for the same context. |
| uint64 format_details_version_ordinal; |
| |
| // "mime_type" strings used by particular decoders / encoders so far: |
| // |
| // SW AAC decoder: |
| // * input: |
| // * "audio/aac-adts" - ATDS AAC; self-contained format, but |
| // implementation for now requires codec_oob_bytes to contain |
| // AudioSpecificConfig() reconstructed from ADTS header data - see also |
| // make_AudioSpecificConfig_from_ADTS_header() for now. |
| // * output: |
| // * "audio/raw" - stereo linear 16 bit integer PCM |
| // |
| // TODO(dustingreen): avoid requiring codec_oob_bytes when using SoftAAC2.cpp |
| // for AAC ADTS. |
| // |
| // TODO(dustingreen): Add non-ADTS AAC support (which naturally needs |
| // codec_oob_bytes). |
| // |
| // TODO(dustingreen): Consider "pseudo_mime_type", or an enum, + "domain" |
| // details as needed instead, since calling this "mime_type" could lead to |
| // confusion. |
| string mime_type; |
| |
| // Some codecs have their own binary codec configuration structure. For those |
| // codecs we allow that binary structure to be directly conveyed to the codec |
| // here. |
| // |
| // audio/aac - this is an AudioSpecificConfig(). |
| // audio/aac-adts - this is not set. |
| // TODO(dustingreen): make the audio/aac-adts statement true soon. At the |
| // moment we set this with make_AudioSpecificConfig_from_ADTS_header(), but |
| // that should not be the client's job for ADTS. |
| // |
| // For some formats whose "ES" data format is self-contained, or for which |
| // there is no format-defined binary OOB config, this is null. |
| // |
| // A server can close the channel if the count of bytes is > |
| // kMaxCodecOobBytesSize or is larger than makes any sense for the codec. If |
| // any codec actually needs more than kMaxCodecOobBytesSize bytes here, we |
| // could potentially increase this restriction some, but this interface isn't |
| // designed to support codec OOB config blobs that approach |
| // ZX_CHANNEL_MAX_MSG_BYTES. |
| vector<uint8>? codec_oob_bytes; |
| |
| // Decoder input format: |
| // |
| // If a format is not self-describing given the mime_type and a |
| // format-spec-defined codec_oob_bytes, this domain field can be set to |
| // provide the additional compressed-format-specific details. This is |
| // expected to be fairly rare, so most compressed input formats will have |
| // only the mime_type and possibly codec_oob_bytes set, with domain typically |
| // null. If an encoder is upstream however, domain may be set to convey the |
| // encoder settings that were used, but a decoder consumer doesn't need to |
| // look at those. |
| // |
| // Encoder output format: |
| // |
| // The encoder's compressed data output typically needs some configuration |
| // (provided in this field) that's convenient to provide in a form that's not |
| // codec_oob_bytes, and the codec can convert that config to codec_oob_bytes |
| // on encoder output via OnOutputConfig(). We retain these encoder settings |
| // in the output CodecFormatDetails to allow for cases where a downstream |
| // consumer knowing the encoder settings could be useful. |
| // |
| // TODO(dustingreen): Decide if we want to retain this, or if we'd prefer to |
| // split out config settings and maybe only represent a few encoder settings |
| // as best-effort optional aux data, like bitrate. |
| // |
| // Encoder input format / decoder output format: |
| // |
| // This field contains fairly detailed information re. uncompressed data |
| // format details, which tends to _not_ be self-describing in-band. |
| DomainFormat? domain; |
| |
| // See comments above on Parameter. At the time we lock relevant FIDL |
| // interfaces, there should be zero use of this field outside tests, but this |
| // is here in case we need to allow a codec client to convey additional config |
| // parameters to/from a codec which we didn't anticipate before locking. |
| // |
| // If there are any known "official" exceptions to the previous paragraph, |
| // we'll list them here by corresponding mime_type (none so far): |
| // * "<mime_type>" - <usage_description> |
| // |
| // For codecs that define their own codec-specific config/OOB data, put that |
| // in codec_oob_bytes above instead of this field. |
| vector<Parameter>? pass_through_parameters; |
| }; |