From f6135d75db79b1d48fad3a3b3080d37be20a2313 Mon Sep 17 00:00:00 2001 From: Atneya Nair Date: Mon, 12 Dec 2022 17:21:25 -0800 Subject: [PATCH] Add hotword stream configuration to AudioRecord In order to enable AGSA Natural Conversation, the AudioRecord API needs to be able to express opening streams that provide content from the hotword pipeline, and/or content which represents audio captured a short duration in the past. Test: atest AudioRecordTest, ST smoke test Bug: 237449755 Change-Id: I570d0fa6059a3fcdd9532b04a42a75acddbeabe3 --- core/api/system-current.txt | 4 + core/jni/android_media_AudioRecord.cpp | 10 +- media/java/android/media/AudioRecord.java | 130 +++++++++++++++++++--- 3 files changed, 124 insertions(+), 20 deletions(-) diff --git a/core/api/system-current.txt b/core/api/system-current.txt index 1eed2d4c0d6de..1317292fcd3b4 100644 --- a/core/api/system-current.txt +++ b/core/api/system-current.txt @@ -6714,12 +6714,16 @@ package android.media { public class AudioRecord implements android.media.AudioRecordingMonitor android.media.AudioRouting android.media.MicrophoneDirection { ctor @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public AudioRecord(android.media.AudioAttributes, android.media.AudioFormat, int, int) throws java.lang.IllegalArgumentException; method public static long getMaxSharedAudioHistoryMillis(); + method public boolean isHotwordLookbackStream(); + method public boolean isHotwordStream(); method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.MediaSyncEvent shareAudioHistory(@NonNull String, @IntRange(from=0) long); } public static class AudioRecord.Builder { method public android.media.AudioRecord.Builder setAudioAttributes(@NonNull android.media.AudioAttributes) throws java.lang.IllegalArgumentException; method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.AudioRecord.Builder setMaxSharedAudioHistoryMillis(long) throws java.lang.IllegalArgumentException; + method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.AudioRecord.Builder setRequestHotwordLookbackStream(boolean); + method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.AudioRecord.Builder setRequestHotwordStream(boolean); method public android.media.AudioRecord.Builder setSessionId(int) throws java.lang.IllegalArgumentException; method @NonNull public android.media.AudioRecord.Builder setSharedAudioEvent(@NonNull android.media.MediaSyncEvent) throws java.lang.IllegalArgumentException; } diff --git a/core/jni/android_media_AudioRecord.cpp b/core/jni/android_media_AudioRecord.cpp index 30333d20d7759..19103319671fc 100644 --- a/core/jni/android_media_AudioRecord.cpp +++ b/core/jni/android_media_AudioRecord.cpp @@ -149,7 +149,8 @@ static jint android_media_AudioRecord_setup(JNIEnv *env, jobject thiz, jobject w jint channelIndexMask, jint audioFormat, jint buffSizeInBytes, jintArray jSession, jobject jAttributionSource, jlong nativeRecordInJavaObj, - jint sharedAudioHistoryMs) { + jint sharedAudioHistoryMs, + jint halFlags) { //ALOGV(">> Entering android_media_AudioRecord_setup"); //ALOGV("sampleRate=%d, audioFormat=%d, channel mask=%x, buffSizeInBytes=%d " // "nativeRecordInJavaObj=0x%llX", @@ -239,10 +240,7 @@ static jint android_media_AudioRecord_setup(JNIEnv *env, jobject thiz, jobject w } ALOGV("AudioRecord_setup for source=%d tags=%s flags=%08x", paa->source, paa->tags, paa->flags); - audio_input_flags_t flags = AUDIO_INPUT_FLAG_NONE; - if (paa->flags & AUDIO_FLAG_HW_HOTWORD) { - flags = AUDIO_INPUT_FLAG_HW_HOTWORD; - } + const auto flags = static_cast(halFlags); // create the callback information: // this data will be passed with every AudioRecord callback // we use a weak reference so the AudioRecord object can be garbage collected. @@ -831,7 +829,7 @@ static const JNINativeMethod gMethods[] = { {"native_start", "(II)I", (void *)android_media_AudioRecord_start}, {"native_stop", "()V", (void *)android_media_AudioRecord_stop}, {"native_setup", - "(Ljava/lang/Object;Ljava/lang/Object;[IIIII[ILandroid/os/Parcel;JI)I", + "(Ljava/lang/Object;Ljava/lang/Object;[IIIII[ILandroid/os/Parcel;JII)I", (void *)android_media_AudioRecord_setup}, {"native_finalize", "()V", (void *)android_media_AudioRecord_finalize}, {"native_release", "()V", (void *)android_media_AudioRecord_release}, diff --git a/media/java/android/media/AudioRecord.java b/media/java/android/media/AudioRecord.java index 6e3829a040a57..b6ab262b60c58 100644 --- a/media/java/android/media/AudioRecord.java +++ b/media/java/android/media/AudioRecord.java @@ -37,6 +37,7 @@ import android.content.AttributionSource.ScopedParcelState; import android.content.Context; import android.media.MediaRecorder.Source; +import android.media.audio.common.AudioInputFlags; import android.media.audiopolicy.AudioMix; import android.media.audiopolicy.AudioMixingRule; import android.media.audiopolicy.AudioPolicy; @@ -275,6 +276,11 @@ public class AudioRecord implements AudioRouting, MicrophoneDirection, * Audio session ID */ private int mSessionId = AudioManager.AUDIO_SESSION_ID_GENERATE; + /** + * Audio HAL Input Flags as bitfield. + */ + private int mHalInputFlags = 0; + /** * AudioAttributes */ @@ -360,7 +366,8 @@ public AudioRecord(int audioSource, int sampleRateInHz, int channelConfig, int a public AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSizeInBytes, int sessionId) throws IllegalArgumentException { this(attributes, format, bufferSizeInBytes, sessionId, - ActivityThread.currentApplication(), 0 /*maxSharedAudioHistoryMs*/); + ActivityThread.currentApplication(), + 0 /*maxSharedAudioHistoryMs*/, 0 /* halInputFlags */); } /** @@ -382,14 +389,15 @@ public AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSiz * time. See also {@link AudioManager#generateAudioSessionId()} to obtain a session ID before * construction. * @param context An optional context on whose behalf the recoding is performed. - * + * @param maxSharedAudioHistoryMs + * @param halInputFlags Bitfield indexed by {@link AudioInputFlags} which is passed to the HAL. * @throws IllegalArgumentException */ private AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSizeInBytes, int sessionId, @Nullable Context context, - int maxSharedAudioHistoryMs) throws IllegalArgumentException { + int maxSharedAudioHistoryMs, int halInputFlags) throws IllegalArgumentException { mRecordingState = RECORDSTATE_STOPPED; - + mHalInputFlags = halInputFlags; if (attributes == null) { throw new IllegalArgumentException("Illegal null AudioAttributes"); } @@ -469,7 +477,7 @@ private AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSi int initResult = native_setup(new WeakReference(this), mAudioAttributes, sampleRate, mChannelMask, mChannelIndexMask, mAudioFormat, mNativeBufferSizeInBytes, session, attributionSourceState.getParcel(), - 0 /*nativeRecordInJavaObj*/, maxSharedAudioHistoryMs); + 0 /*nativeRecordInJavaObj*/, maxSharedAudioHistoryMs, mHalInputFlags); if (initResult != SUCCESS) { loge("Error code " + initResult + " when initializing native AudioRecord object."); return; // with mState == STATE_UNINITIALIZED @@ -535,7 +543,8 @@ private void unregisterAudioPolicyOnRelease(AudioPolicy audioPolicy) { session, attributionSourceState.getParcel(), nativeRecordInJavaObj, - 0); + 0 /*maxSharedAudioHistoryMs*/, + 0 /*halInputFlags*/); } if (initResult != SUCCESS) { loge("Error code "+initResult+" when initializing native AudioRecord object."); @@ -597,6 +606,8 @@ public static class Builder { private int mPrivacySensitive = PRIVACY_SENSITIVE_DEFAULT; private int mMaxSharedAudioHistoryMs = 0; private int mCallRedirectionMode = AudioManager.CALL_REDIRECT_NONE; + private boolean mIsHotwordStream = false; + private boolean mIsHotwordLookback = false; private static final int PRIVACY_SENSITIVE_DEFAULT = -1; private static final int PRIVACY_SENSITIVE_DISABLED = 0; @@ -905,18 +916,74 @@ public Builder setSessionId(int sessionId) throws IllegalArgumentException { return this; } + /** + * @hide + * Set to indicate that the requested AudioRecord object should produce the same type + * of audio content that the hotword recognition model consumes. SoundTrigger hotword + * recognition will not be disrupted. The source in the set AudioAttributes and the set + * audio source will be overridden if this API is used. + *
Use {@link AudioManager#isHotwordStreamSupported(boolean)} to query support. + * @param hotwordContent true if AudioRecord should produce content captured from the + * hotword pipeline. false if AudioRecord should produce content captured outside + * the hotword pipeline. + * @return the same Builder instance. + **/ + @SystemApi + @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) + public @NonNull Builder setRequestHotwordStream(boolean hotwordContent) { + mIsHotwordStream = hotwordContent; + return this; + } + + /** + * @hide + * Set to indicate that the requested AudioRecord object should produce the same type + * of audio content that the hotword recognition model consumes and that the stream will + * be able to provide buffered audio content from an unspecified duration prior to stream + * open. The source in the set AudioAttributes and the set audio source will be overridden + * if this API is used. + *
Use {@link AudioManager#isHotwordStreamSupported(boolean)} to query support. + *
If this is set, {@link AudioRecord.Builder#setRequestHotwordStream(boolean)} + * must not be set, or {@link AudioRecord.Builder#build()} will throw. + * @param hotwordLookbackContent true if AudioRecord should produce content captured from + * the hotword pipeline with capture content from prior to open. false if AudioRecord + * should not capture such content. + * to stream open is requested. + * @return the same Builder instance. + **/ + @SystemApi + @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) + public @NonNull Builder setRequestHotwordLookbackStream(boolean hotwordLookbackContent) { + mIsHotwordLookback = hotwordLookbackContent; + return this; + } + + /** * @return a new {@link AudioRecord} instance successfully initialized with all * the parameters set on this Builder. * @throws UnsupportedOperationException if the parameters set on the Builder - * were incompatible, or if they are not supported by the device, - * or if the device was not available. + * were incompatible, if the parameters are not supported by the device, if the caller + * does not hold the appropriate permissions, or if the device was not available. */ @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public AudioRecord build() throws UnsupportedOperationException { if (mAudioPlaybackCaptureConfiguration != null) { return buildAudioPlaybackCaptureRecord(); } + int halInputFlags = 0; + if (mIsHotwordStream) { + if (mIsHotwordLookback) { + throw new UnsupportedOperationException( + "setRequestHotwordLookbackStream and " + + "setRequestHotwordStream used concurrently"); + } else { + halInputFlags = (1 << AudioInputFlags.HOTWORD_TAP); + } + } else if (mIsHotwordLookback) { + halInputFlags = (1 << AudioInputFlags.HOTWORD_TAP) | + (1 << AudioInputFlags.HW_LOOKBACK); + } if (mFormat == null) { mFormat = new AudioFormat.Builder() @@ -942,6 +1009,12 @@ public AudioRecord build() throws UnsupportedOperationException { .build(); } + if (mIsHotwordStream || mIsHotwordLookback) { + mAttributes = new AudioAttributes.Builder(mAttributes) + .setInternalCapturePreset(MediaRecorder.AudioSource.VOICE_RECOGNITION) + .build(); + } + // If mPrivacySensitive is default, the privacy flag is already set // according to audio source in audio attributes. if (mPrivacySensitive != PRIVACY_SENSITIVE_DEFAULT) { @@ -980,7 +1053,7 @@ public AudioRecord build() throws UnsupportedOperationException { } final AudioRecord record = new AudioRecord( mAttributes, mFormat, mBufferSizeInBytes, mSessionId, mContext, - mMaxSharedAudioHistoryMs); + mMaxSharedAudioHistoryMs, halInputFlags); if (record.getState() == STATE_UNINITIALIZED) { // release is not necessary throw new UnsupportedOperationException("Cannot create AudioRecord"); @@ -1041,8 +1114,8 @@ private static int getChannelMaskFromLegacyConfig(int inChannelConfig, case AudioFormat.CHANNEL_IN_DEFAULT: // AudioFormat.CHANNEL_CONFIGURATION_DEFAULT case AudioFormat.CHANNEL_IN_MONO: case AudioFormat.CHANNEL_CONFIGURATION_MONO: - mask = AudioFormat.CHANNEL_IN_MONO; - break; + mask = AudioFormat.CHANNEL_IN_MONO; + break; case AudioFormat.CHANNEL_IN_STEREO: case AudioFormat.CHANNEL_CONFIGURATION_STEREO: mask = AudioFormat.CHANNEL_IN_STEREO; @@ -1384,6 +1457,35 @@ public boolean isPrivacySensitive() { return (mAudioAttributes.getAllFlags() & AudioAttributes.FLAG_CAPTURE_PRIVATE) != 0; } + /** + * @hide + * Returns whether the AudioRecord object produces the same type of audio content that + * the hotword recognition model consumes. + *
If {@link isHotwordLookbackStream(boolean)} is true, this will return false + *
See {@link Builder#setRequestHotwordStream(boolean)} + * @return true if AudioRecord produces hotword content, false otherwise + **/ + @SystemApi + public boolean isHotwordStream() { + return ((mHalInputFlags & (1 << AudioInputFlags.HOTWORD_TAP)) != 0 && + (mHalInputFlags & (1 << AudioInputFlags.HW_LOOKBACK)) == 0); + } + + /** + * @hide + * Returns whether the AudioRecord object produces the same type of audio content that + * the hotword recognition model consumes, and includes capture content from prior to + * stream open. + *
See {@link Builder#setRequestHotwordLookbackStream(boolean)} + * @return true if AudioRecord produces hotword capture content from + * prior to stream open, false otherwise + **/ + @SystemApi + public boolean isHotwordLookbackStream() { + return ((mHalInputFlags & (1 << AudioInputFlags.HW_LOOKBACK)) != 0); + } + + //--------------------------------------------------------- // Transport control methods //-------------------- @@ -2346,13 +2448,13 @@ private int native_setup(Object audiorecordThis, Object /*AudioAttributes*/ attributes, int[] sampleRate, int channelMask, int channelIndexMask, int audioFormat, int buffSizeInBytes, int[] sessionId, String opPackageName, - long nativeRecordInJavaObj) { + long nativeRecordInJavaObj, int halInputFlags) { AttributionSource attributionSource = AttributionSource.myAttributionSource() .withPackageName(opPackageName); try (ScopedParcelState attributionSourceState = attributionSource.asScopedParcelState()) { return native_setup(audiorecordThis, attributes, sampleRate, channelMask, channelIndexMask, audioFormat, buffSizeInBytes, sessionId, - attributionSourceState.getParcel(), nativeRecordInJavaObj, 0); + attributionSourceState.getParcel(), nativeRecordInJavaObj, 0, halInputFlags); } } @@ -2360,7 +2462,7 @@ private native int native_setup(Object audiorecordThis, Object /*AudioAttributes*/ attributes, int[] sampleRate, int channelMask, int channelIndexMask, int audioFormat, int buffSizeInBytes, int[] sessionId, @NonNull Parcel attributionSource, - long nativeRecordInJavaObj, int maxSharedAudioHistoryMs); + long nativeRecordInJavaObj, int maxSharedAudioHistoryMs, int halInputFlags); // TODO remove: implementation calls directly into implementation of native_release() private native void native_finalize();