Skip to content
This repository has been archived by the owner on Nov 8, 2023. It is now read-only.

Commit

Permalink
Add hotword stream configuration to AudioRecord
Browse files Browse the repository at this point in the history
In order to enable AGSA Natural Conversation, the AudioRecord API
needs to be able to express opening streams that provide content
from the hotword pipeline, and/or content which represents audio
captured a short duration in the past.

Test: atest AudioRecordTest, ST smoke test
Bug: 237449755
Change-Id: I570d0fa6059a3fcdd9532b04a42a75acddbeabe3
  • Loading branch information
atneya committed Dec 22, 2022
1 parent 41993ac commit f6135d7
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 20 deletions.
4 changes: 4 additions & 0 deletions core/api/system-current.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6714,12 +6714,16 @@ package android.media {
public class AudioRecord implements android.media.AudioRecordingMonitor android.media.AudioRouting android.media.MicrophoneDirection {
ctor @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public AudioRecord(android.media.AudioAttributes, android.media.AudioFormat, int, int) throws java.lang.IllegalArgumentException;
method public static long getMaxSharedAudioHistoryMillis();
method public boolean isHotwordLookbackStream();
method public boolean isHotwordStream();
method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.MediaSyncEvent shareAudioHistory(@NonNull String, @IntRange(from=0) long);
}

public static class AudioRecord.Builder {
method public android.media.AudioRecord.Builder setAudioAttributes(@NonNull android.media.AudioAttributes) throws java.lang.IllegalArgumentException;
method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.AudioRecord.Builder setMaxSharedAudioHistoryMillis(long) throws java.lang.IllegalArgumentException;
method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.AudioRecord.Builder setRequestHotwordLookbackStream(boolean);
method @NonNull @RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD) public android.media.AudioRecord.Builder setRequestHotwordStream(boolean);
method public android.media.AudioRecord.Builder setSessionId(int) throws java.lang.IllegalArgumentException;
method @NonNull public android.media.AudioRecord.Builder setSharedAudioEvent(@NonNull android.media.MediaSyncEvent) throws java.lang.IllegalArgumentException;
}
Expand Down
10 changes: 4 additions & 6 deletions core/jni/android_media_AudioRecord.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ static jint android_media_AudioRecord_setup(JNIEnv *env, jobject thiz, jobject w
jint channelIndexMask, jint audioFormat,
jint buffSizeInBytes, jintArray jSession,
jobject jAttributionSource, jlong nativeRecordInJavaObj,
jint sharedAudioHistoryMs) {
jint sharedAudioHistoryMs,
jint halFlags) {
//ALOGV(">> Entering android_media_AudioRecord_setup");
//ALOGV("sampleRate=%d, audioFormat=%d, channel mask=%x, buffSizeInBytes=%d "
// "nativeRecordInJavaObj=0x%llX",
Expand Down Expand Up @@ -239,10 +240,7 @@ static jint android_media_AudioRecord_setup(JNIEnv *env, jobject thiz, jobject w
}
ALOGV("AudioRecord_setup for source=%d tags=%s flags=%08x", paa->source, paa->tags, paa->flags);

audio_input_flags_t flags = AUDIO_INPUT_FLAG_NONE;
if (paa->flags & AUDIO_FLAG_HW_HOTWORD) {
flags = AUDIO_INPUT_FLAG_HW_HOTWORD;
}
const auto flags = static_cast<audio_input_flags_t>(halFlags);
// create the callback information:
// this data will be passed with every AudioRecord callback
// we use a weak reference so the AudioRecord object can be garbage collected.
Expand Down Expand Up @@ -831,7 +829,7 @@ static const JNINativeMethod gMethods[] = {
{"native_start", "(II)I", (void *)android_media_AudioRecord_start},
{"native_stop", "()V", (void *)android_media_AudioRecord_stop},
{"native_setup",
"(Ljava/lang/Object;Ljava/lang/Object;[IIIII[ILandroid/os/Parcel;JI)I",
"(Ljava/lang/Object;Ljava/lang/Object;[IIIII[ILandroid/os/Parcel;JII)I",
(void *)android_media_AudioRecord_setup},
{"native_finalize", "()V", (void *)android_media_AudioRecord_finalize},
{"native_release", "()V", (void *)android_media_AudioRecord_release},
Expand Down
130 changes: 116 additions & 14 deletions media/java/android/media/AudioRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import android.content.AttributionSource.ScopedParcelState;
import android.content.Context;
import android.media.MediaRecorder.Source;
import android.media.audio.common.AudioInputFlags;
import android.media.audiopolicy.AudioMix;
import android.media.audiopolicy.AudioMixingRule;
import android.media.audiopolicy.AudioPolicy;
Expand Down Expand Up @@ -275,6 +276,11 @@ public class AudioRecord implements AudioRouting, MicrophoneDirection,
* Audio session ID
*/
private int mSessionId = AudioManager.AUDIO_SESSION_ID_GENERATE;
/**
* Audio HAL Input Flags as bitfield.
*/
private int mHalInputFlags = 0;

/**
* AudioAttributes
*/
Expand Down Expand Up @@ -360,7 +366,8 @@ public AudioRecord(int audioSource, int sampleRateInHz, int channelConfig, int a
public AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSizeInBytes,
int sessionId) throws IllegalArgumentException {
this(attributes, format, bufferSizeInBytes, sessionId,
ActivityThread.currentApplication(), 0 /*maxSharedAudioHistoryMs*/);
ActivityThread.currentApplication(),
0 /*maxSharedAudioHistoryMs*/, 0 /* halInputFlags */);
}

/**
Expand All @@ -382,14 +389,15 @@ public AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSiz
* time. See also {@link AudioManager#generateAudioSessionId()} to obtain a session ID before
* construction.
* @param context An optional context on whose behalf the recoding is performed.
*
* @param maxSharedAudioHistoryMs
* @param halInputFlags Bitfield indexed by {@link AudioInputFlags} which is passed to the HAL.
* @throws IllegalArgumentException
*/
private AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSizeInBytes,
int sessionId, @Nullable Context context,
int maxSharedAudioHistoryMs) throws IllegalArgumentException {
int maxSharedAudioHistoryMs, int halInputFlags) throws IllegalArgumentException {
mRecordingState = RECORDSTATE_STOPPED;

mHalInputFlags = halInputFlags;
if (attributes == null) {
throw new IllegalArgumentException("Illegal null AudioAttributes");
}
Expand Down Expand Up @@ -469,7 +477,7 @@ private AudioRecord(AudioAttributes attributes, AudioFormat format, int bufferSi
int initResult = native_setup(new WeakReference<AudioRecord>(this), mAudioAttributes,
sampleRate, mChannelMask, mChannelIndexMask, mAudioFormat,
mNativeBufferSizeInBytes, session, attributionSourceState.getParcel(),
0 /*nativeRecordInJavaObj*/, maxSharedAudioHistoryMs);
0 /*nativeRecordInJavaObj*/, maxSharedAudioHistoryMs, mHalInputFlags);
if (initResult != SUCCESS) {
loge("Error code " + initResult + " when initializing native AudioRecord object.");
return; // with mState == STATE_UNINITIALIZED
Expand Down Expand Up @@ -535,7 +543,8 @@ private void unregisterAudioPolicyOnRelease(AudioPolicy audioPolicy) {
session,
attributionSourceState.getParcel(),
nativeRecordInJavaObj,
0);
0 /*maxSharedAudioHistoryMs*/,
0 /*halInputFlags*/);
}
if (initResult != SUCCESS) {
loge("Error code "+initResult+" when initializing native AudioRecord object.");
Expand Down Expand Up @@ -597,6 +606,8 @@ public static class Builder {
private int mPrivacySensitive = PRIVACY_SENSITIVE_DEFAULT;
private int mMaxSharedAudioHistoryMs = 0;
private int mCallRedirectionMode = AudioManager.CALL_REDIRECT_NONE;
private boolean mIsHotwordStream = false;
private boolean mIsHotwordLookback = false;

private static final int PRIVACY_SENSITIVE_DEFAULT = -1;
private static final int PRIVACY_SENSITIVE_DISABLED = 0;
Expand Down Expand Up @@ -905,18 +916,74 @@ public Builder setSessionId(int sessionId) throws IllegalArgumentException {
return this;
}

/**
* @hide
* Set to indicate that the requested AudioRecord object should produce the same type
* of audio content that the hotword recognition model consumes. SoundTrigger hotword
* recognition will not be disrupted. The source in the set AudioAttributes and the set
* audio source will be overridden if this API is used.
* <br> Use {@link AudioManager#isHotwordStreamSupported(boolean)} to query support.
* @param hotwordContent true if AudioRecord should produce content captured from the
* hotword pipeline. false if AudioRecord should produce content captured outside
* the hotword pipeline.
* @return the same Builder instance.
**/
@SystemApi
@RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD)
public @NonNull Builder setRequestHotwordStream(boolean hotwordContent) {
mIsHotwordStream = hotwordContent;
return this;
}

/**
* @hide
* Set to indicate that the requested AudioRecord object should produce the same type
* of audio content that the hotword recognition model consumes and that the stream will
* be able to provide buffered audio content from an unspecified duration prior to stream
* open. The source in the set AudioAttributes and the set audio source will be overridden
* if this API is used.
* <br> Use {@link AudioManager#isHotwordStreamSupported(boolean)} to query support.
* <br> If this is set, {@link AudioRecord.Builder#setRequestHotwordStream(boolean)}
* must not be set, or {@link AudioRecord.Builder#build()} will throw.
* @param hotwordLookbackContent true if AudioRecord should produce content captured from
* the hotword pipeline with capture content from prior to open. false if AudioRecord
* should not capture such content.
* to stream open is requested.
* @return the same Builder instance.
**/
@SystemApi
@RequiresPermission(android.Manifest.permission.CAPTURE_AUDIO_HOTWORD)
public @NonNull Builder setRequestHotwordLookbackStream(boolean hotwordLookbackContent) {
mIsHotwordLookback = hotwordLookbackContent;
return this;
}


/**
* @return a new {@link AudioRecord} instance successfully initialized with all
* the parameters set on this <code>Builder</code>.
* @throws UnsupportedOperationException if the parameters set on the <code>Builder</code>
* were incompatible, or if they are not supported by the device,
* or if the device was not available.
* were incompatible, if the parameters are not supported by the device, if the caller
* does not hold the appropriate permissions, or if the device was not available.
*/
@RequiresPermission(android.Manifest.permission.RECORD_AUDIO)
public AudioRecord build() throws UnsupportedOperationException {
if (mAudioPlaybackCaptureConfiguration != null) {
return buildAudioPlaybackCaptureRecord();
}
int halInputFlags = 0;
if (mIsHotwordStream) {
if (mIsHotwordLookback) {
throw new UnsupportedOperationException(
"setRequestHotwordLookbackStream and " +
"setRequestHotwordStream used concurrently");
} else {
halInputFlags = (1 << AudioInputFlags.HOTWORD_TAP);
}
} else if (mIsHotwordLookback) {
halInputFlags = (1 << AudioInputFlags.HOTWORD_TAP) |
(1 << AudioInputFlags.HW_LOOKBACK);
}

if (mFormat == null) {
mFormat = new AudioFormat.Builder()
Expand All @@ -942,6 +1009,12 @@ public AudioRecord build() throws UnsupportedOperationException {
.build();
}

if (mIsHotwordStream || mIsHotwordLookback) {
mAttributes = new AudioAttributes.Builder(mAttributes)
.setInternalCapturePreset(MediaRecorder.AudioSource.VOICE_RECOGNITION)
.build();
}

// If mPrivacySensitive is default, the privacy flag is already set
// according to audio source in audio attributes.
if (mPrivacySensitive != PRIVACY_SENSITIVE_DEFAULT) {
Expand Down Expand Up @@ -980,7 +1053,7 @@ public AudioRecord build() throws UnsupportedOperationException {
}
final AudioRecord record = new AudioRecord(
mAttributes, mFormat, mBufferSizeInBytes, mSessionId, mContext,
mMaxSharedAudioHistoryMs);
mMaxSharedAudioHistoryMs, halInputFlags);
if (record.getState() == STATE_UNINITIALIZED) {
// release is not necessary
throw new UnsupportedOperationException("Cannot create AudioRecord");
Expand Down Expand Up @@ -1041,8 +1114,8 @@ private static int getChannelMaskFromLegacyConfig(int inChannelConfig,
case AudioFormat.CHANNEL_IN_DEFAULT: // AudioFormat.CHANNEL_CONFIGURATION_DEFAULT
case AudioFormat.CHANNEL_IN_MONO:
case AudioFormat.CHANNEL_CONFIGURATION_MONO:
mask = AudioFormat.CHANNEL_IN_MONO;
break;
mask = AudioFormat.CHANNEL_IN_MONO;
break;
case AudioFormat.CHANNEL_IN_STEREO:
case AudioFormat.CHANNEL_CONFIGURATION_STEREO:
mask = AudioFormat.CHANNEL_IN_STEREO;
Expand Down Expand Up @@ -1384,6 +1457,35 @@ public boolean isPrivacySensitive() {
return (mAudioAttributes.getAllFlags() & AudioAttributes.FLAG_CAPTURE_PRIVATE) != 0;
}

/**
* @hide
* Returns whether the AudioRecord object produces the same type of audio content that
* the hotword recognition model consumes.
* <br> If {@link isHotwordLookbackStream(boolean)} is true, this will return false
* <br> See {@link Builder#setRequestHotwordStream(boolean)}
* @return true if AudioRecord produces hotword content, false otherwise
**/
@SystemApi
public boolean isHotwordStream() {
return ((mHalInputFlags & (1 << AudioInputFlags.HOTWORD_TAP)) != 0 &&
(mHalInputFlags & (1 << AudioInputFlags.HW_LOOKBACK)) == 0);
}

/**
* @hide
* Returns whether the AudioRecord object produces the same type of audio content that
* the hotword recognition model consumes, and includes capture content from prior to
* stream open.
* <br> See {@link Builder#setRequestHotwordLookbackStream(boolean)}
* @return true if AudioRecord produces hotword capture content from
* prior to stream open, false otherwise
**/
@SystemApi
public boolean isHotwordLookbackStream() {
return ((mHalInputFlags & (1 << AudioInputFlags.HW_LOOKBACK)) != 0);
}


//---------------------------------------------------------
// Transport control methods
//--------------------
Expand Down Expand Up @@ -2346,21 +2448,21 @@ private int native_setup(Object audiorecordThis,
Object /*AudioAttributes*/ attributes,
int[] sampleRate, int channelMask, int channelIndexMask, int audioFormat,
int buffSizeInBytes, int[] sessionId, String opPackageName,
long nativeRecordInJavaObj) {
long nativeRecordInJavaObj, int halInputFlags) {
AttributionSource attributionSource = AttributionSource.myAttributionSource()
.withPackageName(opPackageName);
try (ScopedParcelState attributionSourceState = attributionSource.asScopedParcelState()) {
return native_setup(audiorecordThis, attributes, sampleRate, channelMask,
channelIndexMask, audioFormat, buffSizeInBytes, sessionId,
attributionSourceState.getParcel(), nativeRecordInJavaObj, 0);
attributionSourceState.getParcel(), nativeRecordInJavaObj, 0, halInputFlags);
}
}

private native int native_setup(Object audiorecordThis,
Object /*AudioAttributes*/ attributes,
int[] sampleRate, int channelMask, int channelIndexMask, int audioFormat,
int buffSizeInBytes, int[] sessionId, @NonNull Parcel attributionSource,
long nativeRecordInJavaObj, int maxSharedAudioHistoryMs);
long nativeRecordInJavaObj, int maxSharedAudioHistoryMs, int halInputFlags);

// TODO remove: implementation calls directly into implementation of native_release()
private native void native_finalize();
Expand Down

0 comments on commit f6135d7

Please sign in to comment.