blob: f5cd86bc95ee52d83b112feee90322448065b92a [file] [log] [blame]
// This is a generated file (see the discoveryapis_generator project).
// ignore_for_file: camel_case_types
// ignore_for_file: comment_references
// ignore_for_file: file_names
// ignore_for_file: library_names
// ignore_for_file: lines_longer_than_80_chars
// ignore_for_file: non_constant_identifier_names
// ignore_for_file: prefer_expression_function_bodies
// ignore_for_file: prefer_final_locals
// ignore_for_file: prefer_interpolation_to_compose_strings
// ignore_for_file: unnecessary_brace_in_string_interps
// ignore_for_file: unnecessary_cast
// ignore_for_file: unnecessary_lambdas
// ignore_for_file: unnecessary_parenthesis
// ignore_for_file: unnecessary_string_interpolations
/// Cloud Speech-to-Text API - v1
///
/// Converts audio to text by applying powerful neural network models.
///
/// For more information, see
/// <https://cloud.google.com/speech-to-text/docs/quickstart-protocol>
library googleapis.speech.v1;
import 'dart:async' as async;
import 'dart:convert' as convert;
import 'dart:core' as core;
import 'package:_discoveryapis_commons/_discoveryapis_commons.dart' as commons;
import 'package:http/http.dart' as http;
import '../src/user_agent.dart';
export 'package:_discoveryapis_commons/_discoveryapis_commons.dart'
show ApiRequestError, DetailedApiRequestError;
/// Converts audio to text by applying powerful neural network models.
class SpeechApi {
/// View and manage your data across Google Cloud Platform services
static const cloudPlatformScope =
'https://www.googleapis.com/auth/cloud-platform';
final commons.ApiRequester _requester;
OperationsResourceApi get operations => OperationsResourceApi(_requester);
ProjectsResourceApi get projects => ProjectsResourceApi(_requester);
SpeechResourceApi get speech => SpeechResourceApi(_requester);
SpeechApi(http.Client client,
{core.String rootUrl = 'https://speech.googleapis.com/',
core.String servicePath = ''})
: _requester =
commons.ApiRequester(client, rootUrl, servicePath, userAgent);
}
class OperationsResourceApi {
final commons.ApiRequester _requester;
OperationsResourceApi(commons.ApiRequester client) : _requester = client;
/// Gets the latest state of a long-running operation. Clients can use this
/// method to poll the operation result at intervals as recommended by the API
/// service.
///
/// Request parameters:
///
/// [name] - The name of the operation resource.
/// Value must have pattern "^.*$".
///
/// [$fields] - Selector specifying which fields to include in a partial
/// response.
///
/// Completes with a [Operation].
///
/// Completes with a [commons.ApiRequestError] if the API endpoint returned an
/// error.
///
/// If the used [http.Client] completes with an error when making a REST call,
/// this method will complete with the same error.
async.Future<Operation> get(
core.String name, {
core.String $fields,
}) {
core.String _url;
final _queryParams = <core.String, core.List<core.String>>{};
commons.Media _uploadMedia;
commons.UploadOptions _uploadOptions;
var _downloadOptions = commons.DownloadOptions.Metadata;
core.String _body;
if (name == null) {
throw core.ArgumentError('Parameter name is required.');
}
if ($fields != null) {
_queryParams['fields'] = [$fields];
}
_url = 'v1/operations/' + commons.Escaper.ecapeVariableReserved('$name');
final _response = _requester.request(
_url,
'GET',
body: _body,
queryParams: _queryParams,
uploadOptions: _uploadOptions,
uploadMedia: _uploadMedia,
downloadOptions: _downloadOptions,
);
return _response.then(
(data) => Operation.fromJson(data as core.Map<core.String, core.dynamic>),
);
}
/// Lists operations that match the specified filter in the request. If the
/// server doesn't support this method, it returns `UNIMPLEMENTED`. NOTE: the
/// `name` binding allows API services to override the binding to use
/// different resource name schemes, such as `users / * /operations`. To
/// override the binding, API services can add a binding such as
/// `"/v1/{name=users / * }/operations"` to their service configuration. For
/// backwards compatibility, the default name includes the operations
/// collection id, however overriding users must ensure the name binding is
/// the parent resource, without the operations collection id.
///
/// Request parameters:
///
/// [filter] - The standard list filter.
///
/// [name] - The name of the operation's parent resource.
///
/// [pageSize] - The standard list page size.
///
/// [pageToken] - The standard list page token.
///
/// [$fields] - Selector specifying which fields to include in a partial
/// response.
///
/// Completes with a [ListOperationsResponse].
///
/// Completes with a [commons.ApiRequestError] if the API endpoint returned an
/// error.
///
/// If the used [http.Client] completes with an error when making a REST call,
/// this method will complete with the same error.
async.Future<ListOperationsResponse> list({
core.String filter,
core.String name,
core.int pageSize,
core.String pageToken,
core.String $fields,
}) {
core.String _url;
final _queryParams = <core.String, core.List<core.String>>{};
commons.Media _uploadMedia;
commons.UploadOptions _uploadOptions;
var _downloadOptions = commons.DownloadOptions.Metadata;
core.String _body;
if (filter != null) {
_queryParams['filter'] = [filter];
}
if (name != null) {
_queryParams['name'] = [name];
}
if (pageSize != null) {
_queryParams['pageSize'] = ['${pageSize}'];
}
if (pageToken != null) {
_queryParams['pageToken'] = [pageToken];
}
if ($fields != null) {
_queryParams['fields'] = [$fields];
}
_url = 'v1/operations';
final _response = _requester.request(
_url,
'GET',
body: _body,
queryParams: _queryParams,
uploadOptions: _uploadOptions,
uploadMedia: _uploadMedia,
downloadOptions: _downloadOptions,
);
return _response.then(
(data) => ListOperationsResponse.fromJson(
data as core.Map<core.String, core.dynamic>),
);
}
}
class ProjectsResourceApi {
final commons.ApiRequester _requester;
ProjectsLocationsResourceApi get locations =>
ProjectsLocationsResourceApi(_requester);
ProjectsResourceApi(commons.ApiRequester client) : _requester = client;
}
class ProjectsLocationsResourceApi {
final commons.ApiRequester _requester;
ProjectsLocationsOperationsResourceApi get operations =>
ProjectsLocationsOperationsResourceApi(_requester);
ProjectsLocationsResourceApi(commons.ApiRequester client)
: _requester = client;
}
class ProjectsLocationsOperationsResourceApi {
final commons.ApiRequester _requester;
ProjectsLocationsOperationsResourceApi(commons.ApiRequester client)
: _requester = client;
/// Gets the latest state of a long-running operation. Clients can use this
/// method to poll the operation result at intervals as recommended by the API
/// service.
///
/// Request parameters:
///
/// [name] - The name of the operation resource.
/// Value must have pattern
/// "^projects/[^/]+/locations/[^/]+/operations/[^/]+$".
///
/// [$fields] - Selector specifying which fields to include in a partial
/// response.
///
/// Completes with a [Operation].
///
/// Completes with a [commons.ApiRequestError] if the API endpoint returned an
/// error.
///
/// If the used [http.Client] completes with an error when making a REST call,
/// this method will complete with the same error.
async.Future<Operation> get(
core.String name, {
core.String $fields,
}) {
core.String _url;
final _queryParams = <core.String, core.List<core.String>>{};
commons.Media _uploadMedia;
commons.UploadOptions _uploadOptions;
var _downloadOptions = commons.DownloadOptions.Metadata;
core.String _body;
if (name == null) {
throw core.ArgumentError('Parameter name is required.');
}
if ($fields != null) {
_queryParams['fields'] = [$fields];
}
_url = 'v1/' + commons.Escaper.ecapeVariableReserved('$name');
final _response = _requester.request(
_url,
'GET',
body: _body,
queryParams: _queryParams,
uploadOptions: _uploadOptions,
uploadMedia: _uploadMedia,
downloadOptions: _downloadOptions,
);
return _response.then(
(data) => Operation.fromJson(data as core.Map<core.String, core.dynamic>),
);
}
/// Lists operations that match the specified filter in the request. If the
/// server doesn't support this method, it returns `UNIMPLEMENTED`. NOTE: the
/// `name` binding allows API services to override the binding to use
/// different resource name schemes, such as `users / * /operations`. To
/// override the binding, API services can add a binding such as
/// `"/v1/{name=users / * }/operations"` to their service configuration. For
/// backwards compatibility, the default name includes the operations
/// collection id, however overriding users must ensure the name binding is
/// the parent resource, without the operations collection id.
///
/// Request parameters:
///
/// [name] - The name of the operation's parent resource.
/// Value must have pattern "^projects/[^/]+/locations/[^/]+$".
///
/// [filter] - The standard list filter.
///
/// [pageSize] - The standard list page size.
///
/// [pageToken] - The standard list page token.
///
/// [$fields] - Selector specifying which fields to include in a partial
/// response.
///
/// Completes with a [ListOperationsResponse].
///
/// Completes with a [commons.ApiRequestError] if the API endpoint returned an
/// error.
///
/// If the used [http.Client] completes with an error when making a REST call,
/// this method will complete with the same error.
async.Future<ListOperationsResponse> list(
core.String name, {
core.String filter,
core.int pageSize,
core.String pageToken,
core.String $fields,
}) {
core.String _url;
final _queryParams = <core.String, core.List<core.String>>{};
commons.Media _uploadMedia;
commons.UploadOptions _uploadOptions;
var _downloadOptions = commons.DownloadOptions.Metadata;
core.String _body;
if (name == null) {
throw core.ArgumentError('Parameter name is required.');
}
if (filter != null) {
_queryParams['filter'] = [filter];
}
if (pageSize != null) {
_queryParams['pageSize'] = ['${pageSize}'];
}
if (pageToken != null) {
_queryParams['pageToken'] = [pageToken];
}
if ($fields != null) {
_queryParams['fields'] = [$fields];
}
_url =
'v1/' + commons.Escaper.ecapeVariableReserved('$name') + '/operations';
final _response = _requester.request(
_url,
'GET',
body: _body,
queryParams: _queryParams,
uploadOptions: _uploadOptions,
uploadMedia: _uploadMedia,
downloadOptions: _downloadOptions,
);
return _response.then(
(data) => ListOperationsResponse.fromJson(
data as core.Map<core.String, core.dynamic>),
);
}
}
class SpeechResourceApi {
final commons.ApiRequester _requester;
SpeechResourceApi(commons.ApiRequester client) : _requester = client;
/// Performs asynchronous speech recognition: receive results via the
/// google.longrunning.Operations interface. Returns either an
/// `Operation.error` or an `Operation.response` which contains a
/// `LongRunningRecognizeResponse` message. For more information on
/// asynchronous speech recognition, see the
/// [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize).
///
/// [request] - The metadata request object.
///
/// Request parameters:
///
/// [$fields] - Selector specifying which fields to include in a partial
/// response.
///
/// Completes with a [Operation].
///
/// Completes with a [commons.ApiRequestError] if the API endpoint returned an
/// error.
///
/// If the used [http.Client] completes with an error when making a REST call,
/// this method will complete with the same error.
async.Future<Operation> longrunningrecognize(
LongRunningRecognizeRequest request, {
core.String $fields,
}) {
core.String _url;
final _queryParams = <core.String, core.List<core.String>>{};
commons.Media _uploadMedia;
commons.UploadOptions _uploadOptions;
var _downloadOptions = commons.DownloadOptions.Metadata;
core.String _body;
if (request != null) {
_body = convert.json.encode(request.toJson());
}
if ($fields != null) {
_queryParams['fields'] = [$fields];
}
_url = 'v1/speech:longrunningrecognize';
final _response = _requester.request(
_url,
'POST',
body: _body,
queryParams: _queryParams,
uploadOptions: _uploadOptions,
uploadMedia: _uploadMedia,
downloadOptions: _downloadOptions,
);
return _response.then(
(data) => Operation.fromJson(data as core.Map<core.String, core.dynamic>),
);
}
/// Performs synchronous speech recognition: receive results after all audio
/// has been sent and processed.
///
/// [request] - The metadata request object.
///
/// Request parameters:
///
/// [$fields] - Selector specifying which fields to include in a partial
/// response.
///
/// Completes with a [RecognizeResponse].
///
/// Completes with a [commons.ApiRequestError] if the API endpoint returned an
/// error.
///
/// If the used [http.Client] completes with an error when making a REST call,
/// this method will complete with the same error.
async.Future<RecognizeResponse> recognize(
RecognizeRequest request, {
core.String $fields,
}) {
core.String _url;
final _queryParams = <core.String, core.List<core.String>>{};
commons.Media _uploadMedia;
commons.UploadOptions _uploadOptions;
var _downloadOptions = commons.DownloadOptions.Metadata;
core.String _body;
if (request != null) {
_body = convert.json.encode(request.toJson());
}
if ($fields != null) {
_queryParams['fields'] = [$fields];
}
_url = 'v1/speech:recognize';
final _response = _requester.request(
_url,
'POST',
body: _body,
queryParams: _queryParams,
uploadOptions: _uploadOptions,
uploadMedia: _uploadMedia,
downloadOptions: _downloadOptions,
);
return _response.then(
(data) => RecognizeResponse.fromJson(
data as core.Map<core.String, core.dynamic>),
);
}
}
/// The response message for Operations.ListOperations.
class ListOperationsResponse {
/// The standard List next-page token.
core.String nextPageToken;
/// A list of operations that matches the specified filter in the request.
core.List<Operation> operations;
ListOperationsResponse();
ListOperationsResponse.fromJson(core.Map _json) {
if (_json.containsKey('nextPageToken')) {
nextPageToken = _json['nextPageToken'] as core.String;
}
if (_json.containsKey('operations')) {
operations = (_json['operations'] as core.List)
.map<Operation>((value) =>
Operation.fromJson(value as core.Map<core.String, core.dynamic>))
.toList();
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (nextPageToken != null) {
_json['nextPageToken'] = nextPageToken;
}
if (operations != null) {
_json['operations'] = operations.map((value) => value.toJson()).toList();
}
return _json;
}
}
/// Describes the progress of a long-running `LongRunningRecognize` call. It is
/// included in the `metadata` field of the `Operation` returned by the
/// `GetOperation` call of the `google::longrunning::Operations` service.
class LongRunningRecognizeMetadata {
/// Time of the most recent processing update.
core.String lastUpdateTime;
/// Approximate percentage of audio processed thus far. Guaranteed to be 100
/// when the audio is fully processed and the results are available.
core.int progressPercent;
/// Time when the request was received.
core.String startTime;
/// Output only. The URI of the audio file being transcribed. Empty if the
/// audio was sent as byte content.
core.String uri;
LongRunningRecognizeMetadata();
LongRunningRecognizeMetadata.fromJson(core.Map _json) {
if (_json.containsKey('lastUpdateTime')) {
lastUpdateTime = _json['lastUpdateTime'] as core.String;
}
if (_json.containsKey('progressPercent')) {
progressPercent = _json['progressPercent'] as core.int;
}
if (_json.containsKey('startTime')) {
startTime = _json['startTime'] as core.String;
}
if (_json.containsKey('uri')) {
uri = _json['uri'] as core.String;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (lastUpdateTime != null) {
_json['lastUpdateTime'] = lastUpdateTime;
}
if (progressPercent != null) {
_json['progressPercent'] = progressPercent;
}
if (startTime != null) {
_json['startTime'] = startTime;
}
if (uri != null) {
_json['uri'] = uri;
}
return _json;
}
}
/// The top-level message sent by the client for the `LongRunningRecognize`
/// method.
class LongRunningRecognizeRequest {
/// Required. The audio data to be recognized.
RecognitionAudio audio;
/// Required. Provides information to the recognizer that specifies how to
/// process the request.
RecognitionConfig config;
LongRunningRecognizeRequest();
LongRunningRecognizeRequest.fromJson(core.Map _json) {
if (_json.containsKey('audio')) {
audio = RecognitionAudio.fromJson(
_json['audio'] as core.Map<core.String, core.dynamic>);
}
if (_json.containsKey('config')) {
config = RecognitionConfig.fromJson(
_json['config'] as core.Map<core.String, core.dynamic>);
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (audio != null) {
_json['audio'] = audio.toJson();
}
if (config != null) {
_json['config'] = config.toJson();
}
return _json;
}
}
/// The only message returned to the client by the `LongRunningRecognize`
/// method. It contains the result as zero or more sequential
/// `SpeechRecognitionResult` messages. It is included in the `result.response`
/// field of the `Operation` returned by the `GetOperation` call of the
/// `google::longrunning::Operations` service.
class LongRunningRecognizeResponse {
/// Sequential list of transcription results corresponding to sequential
/// portions of audio.
core.List<SpeechRecognitionResult> results;
LongRunningRecognizeResponse();
LongRunningRecognizeResponse.fromJson(core.Map _json) {
if (_json.containsKey('results')) {
results = (_json['results'] as core.List)
.map<SpeechRecognitionResult>((value) =>
SpeechRecognitionResult.fromJson(
value as core.Map<core.String, core.dynamic>))
.toList();
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (results != null) {
_json['results'] = results.map((value) => value.toJson()).toList();
}
return _json;
}
}
/// This resource represents a long-running operation that is the result of a
/// network API call.
class Operation {
/// If the value is `false`, it means the operation is still in progress. If
/// `true`, the operation is completed, and either `error` or `response` is
/// available.
core.bool done;
/// The error result of the operation in case of failure or cancellation.
Status error;
/// Service-specific metadata associated with the operation. It typically
/// contains progress information and common metadata such as create time.
/// Some services might not provide such metadata. Any method that returns a
/// long-running operation should document the metadata type, if any.
///
/// The values for Object must be JSON objects. It can consist of `num`,
/// `String`, `bool` and `null` as well as `Map` and `List` values.
core.Map<core.String, core.Object> metadata;
/// The server-assigned name, which is only unique within the same service
/// that originally returns it. If you use the default HTTP mapping, the
/// `name` should be a resource name ending with `operations/{unique_id}`.
core.String name;
/// The normal response of the operation in case of success. If the original
/// method returns no data on success, such as `Delete`, the response is
/// `google.protobuf.Empty`. If the original method is standard
/// `Get`/`Create`/`Update`, the response should be the resource. For other
/// methods, the response should have the type `XxxResponse`, where `Xxx` is
/// the original method name. For example, if the original method name is
/// `TakeSnapshot()`, the inferred response type is `TakeSnapshotResponse`.
///
/// The values for Object must be JSON objects. It can consist of `num`,
/// `String`, `bool` and `null` as well as `Map` and `List` values.
core.Map<core.String, core.Object> response;
Operation();
Operation.fromJson(core.Map _json) {
if (_json.containsKey('done')) {
done = _json['done'] as core.bool;
}
if (_json.containsKey('error')) {
error = Status.fromJson(
_json['error'] as core.Map<core.String, core.dynamic>);
}
if (_json.containsKey('metadata')) {
metadata =
(_json['metadata'] as core.Map).cast<core.String, core.Object>().map(
(key, item) => core.MapEntry(
key,
item as core.Object,
),
);
}
if (_json.containsKey('name')) {
name = _json['name'] as core.String;
}
if (_json.containsKey('response')) {
response =
(_json['response'] as core.Map).cast<core.String, core.Object>().map(
(key, item) => core.MapEntry(
key,
item as core.Object,
),
);
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (done != null) {
_json['done'] = done;
}
if (error != null) {
_json['error'] = error.toJson();
}
if (metadata != null) {
_json['metadata'] = metadata;
}
if (name != null) {
_json['name'] = name;
}
if (response != null) {
_json['response'] = response;
}
return _json;
}
}
/// Contains audio data in the encoding specified in the `RecognitionConfig`.
/// Either `content` or `uri` must be supplied. Supplying both or neither
/// returns google.rpc.Code.INVALID_ARGUMENT. See [content
/// limits](https://cloud.google.com/speech-to-text/quotas#content).
class RecognitionAudio {
/// The audio data bytes encoded as specified in `RecognitionConfig`. Note: as
/// with all bytes fields, proto buffers use a pure binary representation,
/// whereas JSON representations use base64.
core.String content;
core.List<core.int> get contentAsBytes => convert.base64.decode(content);
set contentAsBytes(core.List<core.int> _bytes) {
content =
convert.base64.encode(_bytes).replaceAll('/', '_').replaceAll('+', '-');
}
/// URI that points to a file that contains audio data bytes as specified in
/// `RecognitionConfig`. The file must not be compressed (for example, gzip).
/// Currently, only Google Cloud Storage URIs are supported, which must be
/// specified in the following format: `gs://bucket_name/object_name` (other
/// URI formats return google.rpc.Code.INVALID_ARGUMENT). For more
/// information, see [Request
/// URIs](https://cloud.google.com/storage/docs/reference-uris).
core.String uri;
RecognitionAudio();
RecognitionAudio.fromJson(core.Map _json) {
if (_json.containsKey('content')) {
content = _json['content'] as core.String;
}
if (_json.containsKey('uri')) {
uri = _json['uri'] as core.String;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (content != null) {
_json['content'] = content;
}
if (uri != null) {
_json['uri'] = uri;
}
return _json;
}
}
/// Provides information to the recognizer that specifies how to process the
/// request.
class RecognitionConfig {
/// The number of channels in the input audio data. ONLY set this for
/// MULTI-CHANNEL recognition. Valid values for LINEAR16 and FLAC are `1`-`8`.
/// Valid values for OGG_OPUS are '1'-'254'. Valid value for MULAW, AMR,
/// AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. If `0` or omitted, defaults
/// to one channel (mono). Note: We only recognize the first channel by
/// default. To perform independent recognition on each channel set
/// `enable_separate_recognition_per_channel` to 'true'.
core.int audioChannelCount;
/// Config to enable speaker diarization and set additional parameters to make
/// diarization better suited for your application. Note: When this is
/// enabled, we send all the words from the beginning of the audio for the top
/// alternative in every consecutive STREAMING responses. This is done in
/// order to improve our speaker tags as our models learn to identify the
/// speakers in the conversation over time. For non-streaming requests, the
/// diarization results will be provided only in the top alternative of the
/// FINAL SpeechRecognitionResult.
SpeakerDiarizationConfig diarizationConfig;
/// If 'true', adds punctuation to recognition result hypotheses. This feature
/// is only available in select languages. Setting this for requests in other
/// languages has no effect at all. The default 'false' value does not add
/// punctuation to result hypotheses.
core.bool enableAutomaticPunctuation;
/// This needs to be set to `true` explicitly and `audio_channel_count` > 1 to
/// get each channel recognized separately. The recognition result will
/// contain a `channel_tag` field to state which channel that result belongs
/// to. If this is not true, we will only recognize the first channel. The
/// request is billed cumulatively for all channels recognized:
/// `audio_channel_count` multiplied by the length of the audio.
core.bool enableSeparateRecognitionPerChannel;
/// If `true`, the top result includes a list of words and the start and end
/// time offsets (timestamps) for those words. If `false`, no word-level time
/// offset information is returned. The default is `false`.
core.bool enableWordTimeOffsets;
/// Encoding of audio data sent in all `RecognitionAudio` messages. This field
/// is optional for `FLAC` and `WAV` audio files and required for all other
/// audio formats. For details, see AudioEncoding.
/// Possible string values are:
/// - "ENCODING_UNSPECIFIED" : Not specified.
/// - "LINEAR16" : Uncompressed 16-bit signed little-endian samples (Linear
/// PCM).
/// - "FLAC" : `FLAC` (Free Lossless Audio Codec) is the recommended encoding
/// because it is lossless--therefore recognition is not compromised--and
/// requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
/// encoding supports 16-bit and 24-bit samples, however, not all fields in
/// `STREAMINFO` are supported.
/// - "MULAW" : 8-bit samples that compand 14-bit audio samples using G.711
/// PCMU/mu-law.
/// - "AMR" : Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must
/// be 8000.
/// - "AMR_WB" : Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must
/// be 16000.
/// - "OGG_OPUS" : Opus encoded audio frames in Ogg container
/// ([OggOpus](https://wiki.xiph.org/OggOpus)). `sample_rate_hertz` must be
/// one of 8000, 12000, 16000, 24000, or 48000.
/// - "SPEEX_WITH_HEADER_BYTE" : Although the use of lossy encodings is not
/// recommended, if a very low bitrate encoding is required, `OGG_OPUS` is
/// highly preferred over Speex encoding. The [Speex](https://speex.org/)
/// encoding supported by Cloud Speech API has a header byte in each block, as
/// in MIME type `audio/x-speex-with-header-byte`. It is a variant of the RTP
/// Speex encoding defined in [RFC 5574](https://tools.ietf.org/html/rfc5574).
/// The stream is a sequence of blocks, one block per RTP packet. Each block
/// starts with a byte containing the length of the block, in bytes, followed
/// by one or more frames of Speex data, padded to an integral number of bytes
/// (octets) as specified in RFC 5574. In other words, each RTP header is
/// replaced with a single byte containing the block length. Only Speex
/// wideband is supported. `sample_rate_hertz` must be 16000.
core.String encoding;
/// Required. The language of the supplied audio as a
/// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
/// Example: "en-US". See [Language
/// Support](https://cloud.google.com/speech-to-text/docs/languages) for a
/// list of the currently supported language codes.
core.String languageCode;
/// Maximum number of recognition hypotheses to be returned. Specifically, the
/// maximum number of `SpeechRecognitionAlternative` messages within each
/// `SpeechRecognitionResult`. The server may return fewer than
/// `max_alternatives`. Valid values are `0`-`30`. A value of `0` or `1` will
/// return a maximum of one. If omitted, will return a maximum of one.
core.int maxAlternatives;
/// Metadata regarding this request.
RecognitionMetadata metadata;
/// Which model to select for the given request. Select the model best suited
/// to your domain to get best results. If a model is not explicitly
/// specified, then we auto-select a model based on the parameters in the
/// RecognitionConfig. *Model* *Description* command_and_search Best for short
/// queries such as voice commands or voice search. phone_call Best for audio
/// that originated from a phone call (typically recorded at an 8khz sampling
/// rate). video Best for audio that originated from from video or includes
/// multiple speakers. Ideally the audio is recorded at a 16khz or greater
/// sampling rate. This is a premium model that costs more than the standard
/// rate. default Best for audio that is not one of the specific audio models.
/// For example, long-form audio. Ideally the audio is high-fidelity, recorded
/// at a 16khz or greater sampling rate.
core.String model;
/// If set to `true`, the server will attempt to filter out profanities,
/// replacing all but the initial character in each filtered word with
/// asterisks, e.g. "f***". If set to `false` or omitted, profanities won't be
/// filtered out.
core.bool profanityFilter;
/// Sample rate in Hertz of the audio data sent in all `RecognitionAudio`
/// messages. Valid values are: 8000-48000. 16000 is optimal. For best
/// results, set the sampling rate of the audio source to 16000 Hz. If that's
/// not possible, use the native sample rate of the audio source (instead of
/// re-sampling). This field is optional for FLAC and WAV audio files, but is
/// required for all other audio formats. For details, see AudioEncoding.
core.int sampleRateHertz;
/// Array of SpeechContext. A means to provide context to assist the speech
/// recognition. For more information, see [speech
/// adaptation](https://cloud.google.com/speech-to-text/docs/context-strength).
core.List<SpeechContext> speechContexts;
/// Set to true to use an enhanced model for speech recognition. If
/// `use_enhanced` is set to true and the `model` field is not set, then an
/// appropriate enhanced model is chosen if an enhanced model exists for the
/// audio. If `use_enhanced` is true and an enhanced version of the specified
/// model does not exist, then the speech is recognized using the standard
/// version of the specified model.
core.bool useEnhanced;
RecognitionConfig();
RecognitionConfig.fromJson(core.Map _json) {
if (_json.containsKey('audioChannelCount')) {
audioChannelCount = _json['audioChannelCount'] as core.int;
}
if (_json.containsKey('diarizationConfig')) {
diarizationConfig = SpeakerDiarizationConfig.fromJson(
_json['diarizationConfig'] as core.Map<core.String, core.dynamic>);
}
if (_json.containsKey('enableAutomaticPunctuation')) {
enableAutomaticPunctuation =
_json['enableAutomaticPunctuation'] as core.bool;
}
if (_json.containsKey('enableSeparateRecognitionPerChannel')) {
enableSeparateRecognitionPerChannel =
_json['enableSeparateRecognitionPerChannel'] as core.bool;
}
if (_json.containsKey('enableWordTimeOffsets')) {
enableWordTimeOffsets = _json['enableWordTimeOffsets'] as core.bool;
}
if (_json.containsKey('encoding')) {
encoding = _json['encoding'] as core.String;
}
if (_json.containsKey('languageCode')) {
languageCode = _json['languageCode'] as core.String;
}
if (_json.containsKey('maxAlternatives')) {
maxAlternatives = _json['maxAlternatives'] as core.int;
}
if (_json.containsKey('metadata')) {
metadata = RecognitionMetadata.fromJson(
_json['metadata'] as core.Map<core.String, core.dynamic>);
}
if (_json.containsKey('model')) {
model = _json['model'] as core.String;
}
if (_json.containsKey('profanityFilter')) {
profanityFilter = _json['profanityFilter'] as core.bool;
}
if (_json.containsKey('sampleRateHertz')) {
sampleRateHertz = _json['sampleRateHertz'] as core.int;
}
if (_json.containsKey('speechContexts')) {
speechContexts = (_json['speechContexts'] as core.List)
.map<SpeechContext>((value) => SpeechContext.fromJson(
value as core.Map<core.String, core.dynamic>))
.toList();
}
if (_json.containsKey('useEnhanced')) {
useEnhanced = _json['useEnhanced'] as core.bool;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (audioChannelCount != null) {
_json['audioChannelCount'] = audioChannelCount;
}
if (diarizationConfig != null) {
_json['diarizationConfig'] = diarizationConfig.toJson();
}
if (enableAutomaticPunctuation != null) {
_json['enableAutomaticPunctuation'] = enableAutomaticPunctuation;
}
if (enableSeparateRecognitionPerChannel != null) {
_json['enableSeparateRecognitionPerChannel'] =
enableSeparateRecognitionPerChannel;
}
if (enableWordTimeOffsets != null) {
_json['enableWordTimeOffsets'] = enableWordTimeOffsets;
}
if (encoding != null) {
_json['encoding'] = encoding;
}
if (languageCode != null) {
_json['languageCode'] = languageCode;
}
if (maxAlternatives != null) {
_json['maxAlternatives'] = maxAlternatives;
}
if (metadata != null) {
_json['metadata'] = metadata.toJson();
}
if (model != null) {
_json['model'] = model;
}
if (profanityFilter != null) {
_json['profanityFilter'] = profanityFilter;
}
if (sampleRateHertz != null) {
_json['sampleRateHertz'] = sampleRateHertz;
}
if (speechContexts != null) {
_json['speechContexts'] =
speechContexts.map((value) => value.toJson()).toList();
}
if (useEnhanced != null) {
_json['useEnhanced'] = useEnhanced;
}
return _json;
}
}
/// Description of audio data to be recognized.
class RecognitionMetadata {
/// Description of the content. Eg. "Recordings of federal supreme court
/// hearings from 2012".
core.String audioTopic;
/// The industry vertical to which this speech recognition request most
/// closely applies. This is most indicative of the topics contained in the
/// audio. Use the 6-digit NAICS code to identify the industry vertical - see
/// https://www.naics.com/search/.
core.int industryNaicsCodeOfAudio;
/// The use case most closely describing the audio content to be recognized.
/// Possible string values are:
/// - "INTERACTION_TYPE_UNSPECIFIED" : Use case is either unknown or is
/// something other than one of the other values below.
/// - "DISCUSSION" : Multiple people in a conversation or discussion. For
/// example in a meeting with two or more people actively participating.
/// Typically all the primary people speaking would be in the same room (if
/// not, see PHONE_CALL)
/// - "PRESENTATION" : One or more persons lecturing or presenting to others,
/// mostly uninterrupted.
/// - "PHONE_CALL" : A phone-call or video-conference in which two or more
/// people, who are not in the same room, are actively participating.
/// - "VOICEMAIL" : A recorded message intended for another person to listen
/// to.
/// - "PROFESSIONALLY_PRODUCED" : Professionally produced audio (eg. TV Show,
/// Podcast).
/// - "VOICE_SEARCH" : Transcribe spoken questions and queries into text.
/// - "VOICE_COMMAND" : Transcribe voice commands, such as for controlling a
/// device.
/// - "DICTATION" : Transcribe speech to text to create a written document,
/// such as a text-message, email or report.
core.String interactionType;
/// The audio type that most closely describes the audio being recognized.
/// Possible string values are:
/// - "MICROPHONE_DISTANCE_UNSPECIFIED" : Audio type is not known.
/// - "NEARFIELD" : The audio was captured from a closely placed microphone.
/// Eg. phone, dictaphone, or handheld microphone. Generally if there speaker
/// is within 1 meter of the microphone.
/// - "MIDFIELD" : The speaker if within 3 meters of the microphone.
/// - "FARFIELD" : The speaker is more than 3 meters away from the microphone.
core.String microphoneDistance;
/// The original media the speech was recorded on.
/// Possible string values are:
/// - "ORIGINAL_MEDIA_TYPE_UNSPECIFIED" : Unknown original media type.
/// - "AUDIO" : The speech data is an audio recording.
/// - "VIDEO" : The speech data originally recorded on a video.
core.String originalMediaType;
/// Mime type of the original audio file. For example `audio/m4a`,
/// `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`. A list of possible audio
/// mime types is maintained at
/// http://www.iana.org/assignments/media-types/media-types.xhtml#audio
core.String originalMimeType;
/// The device used to make the recording. Examples 'Nexus 5X' or 'Polycom
/// SoundStation IP 6000' or 'POTS' or 'VoIP' or 'Cardioid Microphone'.
core.String recordingDeviceName;
/// The type of device the speech was recorded with.
/// Possible string values are:
/// - "RECORDING_DEVICE_TYPE_UNSPECIFIED" : The recording device is unknown.
/// - "SMARTPHONE" : Speech was recorded on a smartphone.
/// - "PC" : Speech was recorded using a personal computer or tablet.
/// - "PHONE_LINE" : Speech was recorded over a phone line.
/// - "VEHICLE" : Speech was recorded in a vehicle.
/// - "OTHER_OUTDOOR_DEVICE" : Speech was recorded outdoors.
/// - "OTHER_INDOOR_DEVICE" : Speech was recorded indoors.
core.String recordingDeviceType;
RecognitionMetadata();
RecognitionMetadata.fromJson(core.Map _json) {
if (_json.containsKey('audioTopic')) {
audioTopic = _json['audioTopic'] as core.String;
}
if (_json.containsKey('industryNaicsCodeOfAudio')) {
industryNaicsCodeOfAudio = _json['industryNaicsCodeOfAudio'] as core.int;
}
if (_json.containsKey('interactionType')) {
interactionType = _json['interactionType'] as core.String;
}
if (_json.containsKey('microphoneDistance')) {
microphoneDistance = _json['microphoneDistance'] as core.String;
}
if (_json.containsKey('originalMediaType')) {
originalMediaType = _json['originalMediaType'] as core.String;
}
if (_json.containsKey('originalMimeType')) {
originalMimeType = _json['originalMimeType'] as core.String;
}
if (_json.containsKey('recordingDeviceName')) {
recordingDeviceName = _json['recordingDeviceName'] as core.String;
}
if (_json.containsKey('recordingDeviceType')) {
recordingDeviceType = _json['recordingDeviceType'] as core.String;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (audioTopic != null) {
_json['audioTopic'] = audioTopic;
}
if (industryNaicsCodeOfAudio != null) {
_json['industryNaicsCodeOfAudio'] = industryNaicsCodeOfAudio;
}
if (interactionType != null) {
_json['interactionType'] = interactionType;
}
if (microphoneDistance != null) {
_json['microphoneDistance'] = microphoneDistance;
}
if (originalMediaType != null) {
_json['originalMediaType'] = originalMediaType;
}
if (originalMimeType != null) {
_json['originalMimeType'] = originalMimeType;
}
if (recordingDeviceName != null) {
_json['recordingDeviceName'] = recordingDeviceName;
}
if (recordingDeviceType != null) {
_json['recordingDeviceType'] = recordingDeviceType;
}
return _json;
}
}
/// The top-level message sent by the client for the `Recognize` method.
class RecognizeRequest {
/// Required. The audio data to be recognized.
RecognitionAudio audio;
/// Required. Provides information to the recognizer that specifies how to
/// process the request.
RecognitionConfig config;
RecognizeRequest();
RecognizeRequest.fromJson(core.Map _json) {
if (_json.containsKey('audio')) {
audio = RecognitionAudio.fromJson(
_json['audio'] as core.Map<core.String, core.dynamic>);
}
if (_json.containsKey('config')) {
config = RecognitionConfig.fromJson(
_json['config'] as core.Map<core.String, core.dynamic>);
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (audio != null) {
_json['audio'] = audio.toJson();
}
if (config != null) {
_json['config'] = config.toJson();
}
return _json;
}
}
/// The only message returned to the client by the `Recognize` method. It
/// contains the result as zero or more sequential `SpeechRecognitionResult`
/// messages.
class RecognizeResponse {
/// Sequential list of transcription results corresponding to sequential
/// portions of audio.
core.List<SpeechRecognitionResult> results;
RecognizeResponse();
RecognizeResponse.fromJson(core.Map _json) {
if (_json.containsKey('results')) {
results = (_json['results'] as core.List)
.map<SpeechRecognitionResult>((value) =>
SpeechRecognitionResult.fromJson(
value as core.Map<core.String, core.dynamic>))
.toList();
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (results != null) {
_json['results'] = results.map((value) => value.toJson()).toList();
}
return _json;
}
}
/// Config to enable speaker diarization.
class SpeakerDiarizationConfig {
/// If 'true', enables speaker detection for each recognized word in the top
/// alternative of the recognition result using a speaker_tag provided in the
/// WordInfo.
core.bool enableSpeakerDiarization;
/// Maximum number of speakers in the conversation. This range gives you more
/// flexibility by allowing the system to automatically determine the correct
/// number of speakers. If not set, the default value is 6.
core.int maxSpeakerCount;
/// Minimum number of speakers in the conversation. This range gives you more
/// flexibility by allowing the system to automatically determine the correct
/// number of speakers. If not set, the default value is 2.
core.int minSpeakerCount;
/// Output only. Unused.
core.int speakerTag;
SpeakerDiarizationConfig();
SpeakerDiarizationConfig.fromJson(core.Map _json) {
if (_json.containsKey('enableSpeakerDiarization')) {
enableSpeakerDiarization = _json['enableSpeakerDiarization'] as core.bool;
}
if (_json.containsKey('maxSpeakerCount')) {
maxSpeakerCount = _json['maxSpeakerCount'] as core.int;
}
if (_json.containsKey('minSpeakerCount')) {
minSpeakerCount = _json['minSpeakerCount'] as core.int;
}
if (_json.containsKey('speakerTag')) {
speakerTag = _json['speakerTag'] as core.int;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (enableSpeakerDiarization != null) {
_json['enableSpeakerDiarization'] = enableSpeakerDiarization;
}
if (maxSpeakerCount != null) {
_json['maxSpeakerCount'] = maxSpeakerCount;
}
if (minSpeakerCount != null) {
_json['minSpeakerCount'] = minSpeakerCount;
}
if (speakerTag != null) {
_json['speakerTag'] = speakerTag;
}
return _json;
}
}
/// Provides "hints" to the speech recognizer to favor specific words and
/// phrases in the results.
class SpeechContext {
/// A list of strings containing words and phrases "hints" so that the speech
/// recognition is more likely to recognize them. This can be used to improve
/// the accuracy for specific words and phrases, for example, if specific
/// commands are typically spoken by the user. This can also be used to add
/// additional words to the vocabulary of the recognizer. See [usage
/// limits](https://cloud.google.com/speech-to-text/quotas#content). List
/// items can also be set to classes for groups of words that represent common
/// concepts that occur in natural language. For example, rather than
/// providing phrase hints for every month of the year, using the $MONTH class
/// improves the likelihood of correctly transcribing audio that includes
/// months.
core.List<core.String> phrases;
SpeechContext();
SpeechContext.fromJson(core.Map _json) {
if (_json.containsKey('phrases')) {
phrases = (_json['phrases'] as core.List)
.map<core.String>((value) => value as core.String)
.toList();
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (phrases != null) {
_json['phrases'] = phrases;
}
return _json;
}
}
/// Alternative hypotheses (a.k.a. n-best list).
class SpeechRecognitionAlternative {
/// The confidence estimate between 0.0 and 1.0. A higher number indicates an
/// estimated greater likelihood that the recognized words are correct. This
/// field is set only for the top alternative of a non-streaming result or, of
/// a streaming result where `is_final=true`. This field is not guaranteed to
/// be accurate and users should not rely on it to be always provided. The
/// default of 0.0 is a sentinel value indicating `confidence` was not set.
core.double confidence;
/// Transcript text representing the words that the user spoke.
core.String transcript;
/// A list of word-specific information for each recognized word. Note: When
/// `enable_speaker_diarization` is true, you will see all the words from the
/// beginning of the audio.
core.List<WordInfo> words;
SpeechRecognitionAlternative();
SpeechRecognitionAlternative.fromJson(core.Map _json) {
if (_json.containsKey('confidence')) {
confidence = (_json['confidence'] as core.num).toDouble();
}
if (_json.containsKey('transcript')) {
transcript = _json['transcript'] as core.String;
}
if (_json.containsKey('words')) {
words = (_json['words'] as core.List)
.map<WordInfo>((value) =>
WordInfo.fromJson(value as core.Map<core.String, core.dynamic>))
.toList();
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (confidence != null) {
_json['confidence'] = confidence;
}
if (transcript != null) {
_json['transcript'] = transcript;
}
if (words != null) {
_json['words'] = words.map((value) => value.toJson()).toList();
}
return _json;
}
}
/// A speech recognition result corresponding to a portion of the audio.
class SpeechRecognitionResult {
/// May contain one or more recognition hypotheses (up to the maximum
/// specified in `max_alternatives`). These alternatives are ordered in terms
/// of accuracy, with the top (first) alternative being the most probable, as
/// ranked by the recognizer.
core.List<SpeechRecognitionAlternative> alternatives;
/// For multi-channel audio, this is the channel number corresponding to the
/// recognized result for the audio from that channel. For audio_channel_count
/// = N, its output values can range from '1' to 'N'.
core.int channelTag;
SpeechRecognitionResult();
SpeechRecognitionResult.fromJson(core.Map _json) {
if (_json.containsKey('alternatives')) {
alternatives = (_json['alternatives'] as core.List)
.map<SpeechRecognitionAlternative>((value) =>
SpeechRecognitionAlternative.fromJson(
value as core.Map<core.String, core.dynamic>))
.toList();
}
if (_json.containsKey('channelTag')) {
channelTag = _json['channelTag'] as core.int;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (alternatives != null) {
_json['alternatives'] =
alternatives.map((value) => value.toJson()).toList();
}
if (channelTag != null) {
_json['channelTag'] = channelTag;
}
return _json;
}
}
/// The `Status` type defines a logical error model that is suitable for
/// different programming environments, including REST APIs and RPC APIs. It is
/// used by [gRPC](https://github.com/grpc). Each `Status` message contains
/// three pieces of data: error code, error message, and error details. You can
/// find out more about this error model and how to work with it in the [API
/// Design Guide](https://cloud.google.com/apis/design/errors).
class Status {
/// The status code, which should be an enum value of google.rpc.Code.
core.int code;
/// A list of messages that carry the error details. There is a common set of
/// message types for APIs to use.
///
/// The values for Object must be JSON objects. It can consist of `num`,
/// `String`, `bool` and `null` as well as `Map` and `List` values.
core.List<core.Map<core.String, core.Object>> details;
/// A developer-facing error message, which should be in English. Any
/// user-facing error message should be localized and sent in the
/// google.rpc.Status.details field, or localized by the client.
core.String message;
Status();
Status.fromJson(core.Map _json) {
if (_json.containsKey('code')) {
code = _json['code'] as core.int;
}
if (_json.containsKey('details')) {
details = (_json['details'] as core.List)
.map<core.Map<core.String, core.Object>>((value) =>
(value as core.Map).cast<core.String, core.Object>().map(
(key, item) => core.MapEntry(
key,
item as core.Object,
),
))
.toList();
}
if (_json.containsKey('message')) {
message = _json['message'] as core.String;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (code != null) {
_json['code'] = code;
}
if (details != null) {
_json['details'] = details;
}
if (message != null) {
_json['message'] = message;
}
return _json;
}
}
/// Word-specific information for recognized words.
class WordInfo {
/// Time offset relative to the beginning of the audio, and corresponding to
/// the end of the spoken word. This field is only set if
/// `enable_word_time_offsets=true` and only in the top hypothesis. This is an
/// experimental feature and the accuracy of the time offset can vary.
core.String endTime;
/// Output only. A distinct integer value is assigned for every speaker within
/// the audio. This field specifies which one of those speakers was detected
/// to have spoken this word. Value ranges from '1' to
/// diarization_speaker_count. speaker_tag is set if
/// enable_speaker_diarization = 'true' and only in the top alternative.
core.int speakerTag;
/// Time offset relative to the beginning of the audio, and corresponding to
/// the start of the spoken word. This field is only set if
/// `enable_word_time_offsets=true` and only in the top hypothesis. This is an
/// experimental feature and the accuracy of the time offset can vary.
core.String startTime;
/// The word corresponding to this set of information.
core.String word;
WordInfo();
WordInfo.fromJson(core.Map _json) {
if (_json.containsKey('endTime')) {
endTime = _json['endTime'] as core.String;
}
if (_json.containsKey('speakerTag')) {
speakerTag = _json['speakerTag'] as core.int;
}
if (_json.containsKey('startTime')) {
startTime = _json['startTime'] as core.String;
}
if (_json.containsKey('word')) {
word = _json['word'] as core.String;
}
}
core.Map<core.String, core.Object> toJson() {
final _json = <core.String, core.Object>{};
if (endTime != null) {
_json['endTime'] = endTime;
}
if (speakerTag != null) {
_json['speakerTag'] = speakerTag;
}
if (startTime != null) {
_json['startTime'] = startTime;
}
if (word != null) {
_json['word'] = word;
}
return _json;
}
}