minimal-examples/secure-streams/minimal-secure-streams-alexa/audio.c - platform/external/libwebsockets - Git at Google

 /*
  * alsa audio handling
  *
  * Written in 2010-2020 by Andy Green <[email protected]>
  *
  * This file is made available under the Creative Commons CC0 1.0
  * Universal Public Domain Dedication.
  */

 #include <libwebsockets.h>
 #include <string.h>
 #include <signal.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>

 #include <alsa/asoundlib.h>
 #include <pv_porcupine.h>

 #include <mpg123.h>

 #include "private.h"

 extern struct lws_ss_handle *hss_avs_event, *hss_avs_sync;

 int
 avs_query_start(struct lws_context *context);

 enum {
 	MODE_IDLE,
 	MODE_CAPTURING,
 	MODE_PLAYING
 };

 struct raw_vhd {
 	int16_t			p[8 * 1024]; /* 500ms at 16kHz 16-bit PCM */
 	pv_porcupine_object_t	*porc;
 	snd_pcm_t		*pcm_capture;
 	snd_pcm_t		*pcm_playback;
 	snd_pcm_hw_params_t	*params;
 	snd_pcm_uframes_t	frames;
 	int16_t			*porcbuf;

 	mpg123_handle		*mh;

 	mp3_done_cb		done_cb;
 	void			*opaque;

 	int			mode;
 	int			rate;

 	int			porc_spf;
 	int			filefd;
 	int			rpos;
 	int			wpos;
 	int			porcpos;
 	int			npos;
 	int			times;
 	int			quietcount;
 	int			anycount;

 	int			wplay;
 	int			rplay;

 	char			last_wake_detect;
 	char			destroy_mh_on_drain;
 };

 static struct raw_vhd *avhd;

 /*
  * called from alexa.c to grab the next chunk of audio capture buffer
  * for upload
  */

 int
 spool_capture(uint8_t *buf, size_t len)
 {
 	int16_t *sam = (int16_t *)buf;
 	size_t s, os;

 	if (avhd->mode != MODE_CAPTURING)
 		return -1;

 	os = s = len / 2;

 	while (s && avhd->wpos != avhd->npos) {
 		*sam++ = avhd->p[avhd->npos];
 		avhd->npos = (avhd->npos + 1)  % LWS_ARRAY_SIZE(avhd->p);
 		s--;
 	}

 	lwsl_info("Copied %d samples (%d %d)\n", (int)(os - s),
 			avhd->wpos, avhd->npos);

 	return (os - s) * 2;
 }

 /*
  * Called from alexa.c to control when the mp3 playback should begin and end
  */

 int
 play_mp3(mpg123_handle *mh, mp3_done_cb cb, void *opaque)
 {
 	if (mh) {
 		avhd->mh = mh;
 		avhd->mode = MODE_PLAYING;
 		snd_pcm_prepare(avhd->pcm_playback);

 		return 0;
 	}

 	avhd->destroy_mh_on_drain = 1;
 	avhd->done_cb = cb;
 	avhd->opaque = opaque;

 	return 0;
 }

 /*
  * Helper used to set alsa hwparams on both capture and playback channels
  */

 static int
 set_hw_params(struct lws_vhost *vh, snd_pcm_t **pcm, int type)
 {
 	unsigned int rate = pv_sample_rate(); /* it's 16kHz */
 	snd_pcm_hw_params_t *params;
 	lws_sock_file_fd_type u;
 	struct pollfd pfd;
 	struct lws *wsi1;
 	int n;

 	n = snd_pcm_open(pcm, "default", type, SND_PCM_NONBLOCK);
 	if (n < 0) {
 		lwsl_err("%s: Can't open default for playback: %s\n",
 			 __func__, snd_strerror(n));

 		return -1;
 	}

 	if (snd_pcm_poll_descriptors(*pcm, &pfd, 1) != 1) {
 		lwsl_err("%s: failed to get playback desc\n", __func__);
 		return -1;
 	}

 	u.filefd = (lws_filefd_type)(long long)pfd.fd;
 	wsi1 = lws_adopt_descriptor_vhost(vh, LWS_ADOPT_RAW_FILE_DESC, u,
 					  "lws-audio-test", NULL);
 	if (!wsi1) {
 		lwsl_err("%s: Failed to adopt playback desc\n", __func__);
 		goto bail;
 	}
 	if (type == SND_PCM_STREAM_PLAYBACK)
 		lws_rx_flow_control(wsi1, 0); /* no POLLIN */

 	snd_pcm_hw_params_malloc(&params);
 	snd_pcm_hw_params_any(*pcm, params);

 	n = snd_pcm_hw_params_set_access(*pcm, params,
 					 SND_PCM_ACCESS_RW_INTERLEAVED);
 	if (n < 0)
 		goto bail1;

 	n = snd_pcm_hw_params_set_format(*pcm, params, SND_PCM_FORMAT_S16_LE);
 	if (n < 0)
 		goto bail1;

 	n = snd_pcm_hw_params_set_channels(*pcm, params, 1);
 	if (n < 0)
 		goto bail1;

 	n = snd_pcm_hw_params_set_rate_near(*pcm, params, &rate, 0);
 	if (n < 0)
 		goto bail1;

 	lwsl_notice("%s: %s rate %d\n", __func__,
 		type == SND_PCM_STREAM_PLAYBACK ? "Playback" : "Capture", rate);

 	n = snd_pcm_hw_params(*pcm, params);
 	snd_pcm_hw_params_free(params);
 	if (n < 0)
 		goto bail;

 	return 0;

 bail1:
 	snd_pcm_hw_params_free(params);
 bail:
 	lwsl_err("%s: Set hw params failed: %s\n", __func__, snd_strerror(n));

 	return -1;
 }

 /*
  * The lws RAW file protocol handler that wraps ALSA.
  *
  * The timing is coming from ALSA capture channel... since they are both set to
  * 16kHz, it's enough just to have the one.
  */

 static int
 callback_audio(struct lws *wsi, enum lws_callback_reasons reason, void *user,
 	       void *in, size_t len)
 {
 	struct raw_vhd *vhd = (struct raw_vhd *)lws_protocol_vh_priv_get(
 				   lws_get_vhost(wsi), lws_get_protocol(wsi));
 	uint16_t rands[50];
 	int16_t temp[256];
 	bool det;
 	long avg;
 	int n, s;

 	switch (reason) {
 	case LWS_CALLBACK_PROTOCOL_INIT:

 		if (avhd) /* just on one vhost */
 			return 0;

 		avhd = vhd = lws_protocol_vh_priv_zalloc(lws_get_vhost(wsi),
 				lws_get_protocol(wsi), sizeof(struct raw_vhd));

 		/*
 		 * Set up the wakeword library
 		 */

 		n = pv_porcupine_init("porcupine_params.pv", "alexa_linux.ppn",
 					1.0, &vhd->porc);
 		if (n) {
 			lwsl_err("%s: porcupine init fail %d\n", __func__, n);

 			return -1;
 		}
 		vhd->porc_spf = pv_porcupine_frame_length();
 		vhd->porcbuf = malloc(vhd->porc_spf * 2);
 		lwsl_info("%s: %s porc frame length is %d samples\n", __func__,
 				lws_get_vhost_name(lws_get_vhost(wsi)),
 				vhd->porc_spf);

 		vhd->rate = pv_sample_rate(); /* 16kHz */

 		/* set up alsa */

 		if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_playback,
 				  SND_PCM_STREAM_PLAYBACK))  {
 			lwsl_err("%s: Can't open default for playback\n",
 				 __func__);

 			return -1;
 		}

 		if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_capture,
 				  SND_PCM_STREAM_CAPTURE))  {
 			lwsl_err("%s: Can't open default for capture\n",
 				 __func__);

 			return -1;
 		}

 		snd_config_update_free_global();

 		break;

 	case LWS_CALLBACK_PROTOCOL_DESTROY:
 		lwsl_info("%s: LWS_CALLBACK_PROTOCOL_DESTROY\n", __func__);
 		if (!vhd)
 			break;

 		if (vhd->porcbuf) {
 			free(vhd->porcbuf);
 			vhd->porcbuf = NULL;
 		}
 		if (vhd->pcm_playback) {
 			snd_pcm_drop(vhd->pcm_playback);
 			snd_pcm_close(vhd->pcm_playback);
 			vhd->pcm_playback = NULL;
 		}
 		if (vhd->pcm_capture) {
 			snd_pcm_drop(vhd->pcm_capture);
 			snd_pcm_close(vhd->pcm_capture);
 			vhd->pcm_capture = NULL;
 		}
 		if (vhd->porc) {
 			pv_porcupine_delete(vhd->porc);
 			vhd->porc = NULL;
 		}

 		/* avoid most of the valgrind mess from alsa */
 		snd_config_update_free_global();

 		break;

 	case LWS_CALLBACK_RAW_CLOSE_FILE:
 		lwsl_info("%s: closed\n", __func__);
 		break;

 	case LWS_CALLBACK_RAW_RX_FILE:
 		/* we come here about every 250ms */

 		/*
 		 * Playing back the mp3?
 		 */
 		if (vhd->mode == MODE_PLAYING && vhd->mh) {
 			size_t amt, try;

 			do {
 				try = snd_pcm_avail(vhd->pcm_playback);
 				if (try > LWS_ARRAY_SIZE(vhd->p))
 					try = LWS_ARRAY_SIZE(vhd->p);

 				n = mpg123_read(vhd->mh, (uint8_t *)vhd->p,
 						try * 2, &amt);
 				lwsl_info("%s: PLAYING: mpg123 read %d, n %d\n",
 						__func__, (int)amt, n);
 				if (n == MPG123_NEW_FORMAT) {
 					snd_pcm_start(vhd->pcm_playback);
 					memset(vhd->p, 0, try);
 					snd_pcm_writei(vhd->pcm_playback,
 						       vhd->p, try / 2);
 					snd_pcm_prepare(vhd->pcm_playback);
 				}
 			} while (n == MPG123_NEW_FORMAT);

 			if (amt) {
 				n = snd_pcm_writei(vhd->pcm_playback,
 						   vhd->p, amt / 2);
 				if (n < 0)
 					lwsl_notice("%s: snd_pcm_writei: %d %s\n",
 						    __func__, n, snd_strerror(n));
 				if (n == -EPIPE) {
 					lwsl_err("%s: did EPIPE prep\n", __func__);
 					snd_pcm_prepare(vhd->pcm_playback);
 				}
 			} else
 				if (vhd->destroy_mh_on_drain &&
 				    n != MPG123_NEW_FORMAT) {
 					snd_pcm_drain(vhd->pcm_playback);
 					vhd->destroy_mh_on_drain = 0;
 					lwsl_notice("%s: mp3 destroyed\n",
 							__func__);
 					mpg123_close(vhd->mh);
 					mpg123_delete(vhd->mh);
 					vhd->mh = NULL;
 					vhd->mode = MODE_IDLE;

 					if (vhd->done_cb)
 						vhd->done_cb(vhd->opaque);
 				}
 		}

 		/*
 		 * Get the capture data
 		 */

 		n = snd_pcm_readi(vhd->pcm_capture, temp, LWS_ARRAY_SIZE(temp));
 		s = 0;
 		while (s < n) {
 			vhd->p[(vhd->wpos + s) % LWS_ARRAY_SIZE(vhd->p)] = temp[s];
 			s++;
 		}

 		if (vhd->mode == MODE_CAPTURING) {

 			/*
 			 * We are recording an utterance.
 			 *
 			 * Estimate the sound density in the frame by picking 50
 			 * samples at random and averaging the sampled
 			 * [abs()^2] / 10000 to create a Figure of Merit.
 			 *
 			 * Speaking on my laptop gets us 1000 - 5000, silence
 			 * is typ under 30.  The wakeword tells us there was
 			 * speech at the start, end the capture when there's
 			 * ~750ms (12000 samples) under 125 FOM.
 			 */

 #define SILENCE_THRESH 125

 			avg = 0;
 			lws_get_random(lws_get_context(wsi), rands, sizeof(rands));
 			for (s = 0; s < (int)LWS_ARRAY_SIZE(rands); s++) {
 				long q;

 				q = temp[rands[s] % n];

 				avg += (q * q);
 			}
 			avg = (avg / (int)LWS_ARRAY_SIZE(rands)) / 10000;

 			lwsl_notice("est audio energy: %ld %d\n", avg, vhd->mode);

 			/*
 			 * Only start looking for "silence" after 1.5s, in case
 			 * he does a long pause after the wakeword
 			 */

 			if (vhd->anycount < (3 *vhd->rate) / 2 &&
 			    avg < SILENCE_THRESH) {
 				vhd->quietcount += n;
 				/* then 500ms of "silence" does it for us */
 				if (vhd->quietcount >= ((vhd->rate * 3) / 4)) {
 					lwsl_warn("%s: ended capture\n", __func__);
 					vhd->mode = MODE_IDLE;
 					vhd->quietcount = 0;
 				}
 			}

 			/* if we're not "silent", reset the count */
 			if (avg > SILENCE_THRESH * 2)
 				vhd->quietcount = 0;

 			/*
 			 * Since we are in capturing mode, we have something
 			 * new to send now.
 			 *
 			 * We must send an extra one at the end so we can finish
 			 * the tx.
 			 */
 			lws_ss_request_tx(hss_avs_sync);
 		}

 		/*
 		 * Just waiting for a wakeword
 		 */

 		while (vhd->mode == MODE_IDLE) {
 			int m = 0, ppold = vhd->porcpos;

 			s = (vhd->wpos - vhd->porcpos) % LWS_ARRAY_SIZE(vhd->p);
 			if (s < vhd->porc_spf)
 				goto eol;

 			while (m < vhd->porc_spf) {
 				vhd->porcbuf[m++] = avhd->p[vhd->porcpos];
 				vhd->porcpos = (vhd->porcpos + 1) %
 							LWS_ARRAY_SIZE(vhd->p);
 			}

 			if (pv_porcupine_process(vhd->porc, vhd->porcbuf, &det))
 				lwsl_err("%s: porc_process failed\n", __func__);

 			if (!det && vhd->last_wake_detect &&
 			    vhd->mode == MODE_IDLE) {
 				lwsl_warn("************* Wakeword\n");
 				if (!avs_query_start(lws_get_context(wsi))) {
 					vhd->mode = MODE_CAPTURING;
 					vhd->quietcount = 0;
 					vhd->last_wake_detect = det;
 					vhd->npos = ppold;
 					break;
 				}
 			}
 			vhd->last_wake_detect = det;
 		}

 eol:
 		vhd->wpos = (vhd->wpos + n) % LWS_ARRAY_SIZE(vhd->p);
 		break;

 	default:
 		break;
 	}

 	return 0;
 }

 struct lws_protocols protocol_audio_test =
 	{ "lws-audio-test", callback_audio, 0, 0 };
	/*
	* alsa audio handling
	*
	* Written in 2010-2020 by Andy Green <[email protected]>
	*
	* This file is made available under the Creative Commons CC0 1.0
	* Universal Public Domain Dedication.
	*/

	#include <libwebsockets.h>
	#include <string.h>
	#include <signal.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <fcntl.h>

	#include <alsa/asoundlib.h>
	#include <pv_porcupine.h>

	#include <mpg123.h>

	#include "private.h"

	extern struct lws_ss_handle hss_avs_event, hss_avs_sync;

	int
	avs_query_start(struct lws_context *context);

	enum {
	MODE_IDLE,
	MODE_CAPTURING,
	MODE_PLAYING
	};

	struct raw_vhd {
	int16_t p[8 * 1024]; /* 500ms at 16kHz 16-bit PCM */
	pv_porcupine_object_t *porc;
	snd_pcm_t *pcm_capture;
	snd_pcm_t *pcm_playback;
	snd_pcm_hw_params_t *params;
	snd_pcm_uframes_t frames;
	int16_t *porcbuf;

	mpg123_handle *mh;

	mp3_done_cb done_cb;
	void *opaque;

	int mode;
	int rate;

	int porc_spf;
	int filefd;
	int rpos;
	int wpos;
	int porcpos;
	int npos;
	int times;
	int quietcount;
	int anycount;

	int wplay;
	int rplay;

	char last_wake_detect;
	char destroy_mh_on_drain;
	};

	static struct raw_vhd *avhd;

	/*
	* called from alexa.c to grab the next chunk of audio capture buffer
	* for upload
	*/

	int
	spool_capture(uint8_t *buf, size_t len)
	{
	int16_t sam = (int16_t )buf;
	size_t s, os;

	if (avhd->mode != MODE_CAPTURING)
	return -1;

	os = s = len / 2;

	while (s && avhd->wpos != avhd->npos) {
	*sam++ = avhd->p[avhd->npos];
	avhd->npos = (avhd->npos + 1) % LWS_ARRAY_SIZE(avhd->p);
	s--;
	}

	lwsl_info("Copied %d samples (%d %d)\n", (int)(os - s),
	avhd->wpos, avhd->npos);

	return (os - s) * 2;
	}

	/*
	* Called from alexa.c to control when the mp3 playback should begin and end
	*/

	int
	play_mp3(mpg123_handle mh, mp3_done_cb cb, void opaque)
	{
	if (mh) {
	avhd->mh = mh;
	avhd->mode = MODE_PLAYING;
	snd_pcm_prepare(avhd->pcm_playback);

	return 0;
	}

	avhd->destroy_mh_on_drain = 1;
	avhd->done_cb = cb;
	avhd->opaque = opaque;

	return 0;
	}

	/*
	* Helper used to set alsa hwparams on both capture and playback channels
	*/

	static int
	set_hw_params(struct lws_vhost vh, snd_pcm_t *pcm, int type)
	{
	unsigned int rate = pv_sample_rate(); /* it's 16kHz */
	snd_pcm_hw_params_t *params;
	lws_sock_file_fd_type u;
	struct pollfd pfd;
	struct lws *wsi1;
	int n;

	n = snd_pcm_open(pcm, "default", type, SND_PCM_NONBLOCK);
	if (n < 0) {
	lwsl_err("%s: Can't open default for playback: %s\n",
	__func__, snd_strerror(n));

	return -1;
	}

	if (snd_pcm_poll_descriptors(*pcm, &pfd, 1) != 1) {
	lwsl_err("%s: failed to get playback desc\n", __func__);
	return -1;
	}

	u.filefd = (lws_filefd_type)(long long)pfd.fd;
	wsi1 = lws_adopt_descriptor_vhost(vh, LWS_ADOPT_RAW_FILE_DESC, u,
	"lws-audio-test", NULL);
	if (!wsi1) {
	lwsl_err("%s: Failed to adopt playback desc\n", __func__);
	goto bail;
	}
	if (type == SND_PCM_STREAM_PLAYBACK)
	lws_rx_flow_control(wsi1, 0); /* no POLLIN */

	snd_pcm_hw_params_malloc(&params);
	snd_pcm_hw_params_any(*pcm, params);

	n = snd_pcm_hw_params_set_access(*pcm, params,
	SND_PCM_ACCESS_RW_INTERLEAVED);
	if (n < 0)
	goto bail1;

	n = snd_pcm_hw_params_set_format(*pcm, params, SND_PCM_FORMAT_S16_LE);
	if (n < 0)
	goto bail1;

	n = snd_pcm_hw_params_set_channels(*pcm, params, 1);
	if (n < 0)
	goto bail1;

	n = snd_pcm_hw_params_set_rate_near(*pcm, params, &rate, 0);
	if (n < 0)
	goto bail1;

	lwsl_notice("%s: %s rate %d\n", __func__,
	type == SND_PCM_STREAM_PLAYBACK ? "Playback" : "Capture", rate);

	n = snd_pcm_hw_params(*pcm, params);
	snd_pcm_hw_params_free(params);
	if (n < 0)
	goto bail;

	return 0;

	bail1:
	snd_pcm_hw_params_free(params);
	bail:
	lwsl_err("%s: Set hw params failed: %s\n", __func__, snd_strerror(n));

	return -1;
	}

	/*
	* The lws RAW file protocol handler that wraps ALSA.
	*
	* The timing is coming from ALSA capture channel... since they are both set to
	* 16kHz, it's enough just to have the one.
	*/

	static int
	callback_audio(struct lws wsi, enum lws_callback_reasons reason, void user,
	void *in, size_t len)
	{
	struct raw_vhd vhd = (struct raw_vhd )lws_protocol_vh_priv_get(
	lws_get_vhost(wsi), lws_get_protocol(wsi));
	uint16_t rands[50];
	int16_t temp[256];
	bool det;
	long avg;
	int n, s;

	switch (reason) {
	case LWS_CALLBACK_PROTOCOL_INIT:

	if (avhd) /* just on one vhost */
	return 0;

	avhd = vhd = lws_protocol_vh_priv_zalloc(lws_get_vhost(wsi),
	lws_get_protocol(wsi), sizeof(struct raw_vhd));

	/*
	* Set up the wakeword library
	*/

	n = pv_porcupine_init("porcupine_params.pv", "alexa_linux.ppn",
	1.0, &vhd->porc);
	if (n) {
	lwsl_err("%s: porcupine init fail %d\n", __func__, n);

	return -1;
	}
	vhd->porc_spf = pv_porcupine_frame_length();
	vhd->porcbuf = malloc(vhd->porc_spf * 2);
	lwsl_info("%s: %s porc frame length is %d samples\n", __func__,
	lws_get_vhost_name(lws_get_vhost(wsi)),
	vhd->porc_spf);

	vhd->rate = pv_sample_rate(); /* 16kHz */

	/* set up alsa */

	if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_playback,
	SND_PCM_STREAM_PLAYBACK)) {
	lwsl_err("%s: Can't open default for playback\n",
	__func__);

	return -1;
	}

	if (set_hw_params(lws_get_vhost(wsi), &vhd->pcm_capture,
	SND_PCM_STREAM_CAPTURE)) {
	lwsl_err("%s: Can't open default for capture\n",
	__func__);

	return -1;
	}

	snd_config_update_free_global();

	break;

	case LWS_CALLBACK_PROTOCOL_DESTROY:
	lwsl_info("%s: LWS_CALLBACK_PROTOCOL_DESTROY\n", __func__);
	if (!vhd)
	break;

	if (vhd->porcbuf) {
	free(vhd->porcbuf);
	vhd->porcbuf = NULL;
	}
	if (vhd->pcm_playback) {
	snd_pcm_drop(vhd->pcm_playback);
	snd_pcm_close(vhd->pcm_playback);
	vhd->pcm_playback = NULL;
	}
	if (vhd->pcm_capture) {
	snd_pcm_drop(vhd->pcm_capture);
	snd_pcm_close(vhd->pcm_capture);
	vhd->pcm_capture = NULL;
	}
	if (vhd->porc) {
	pv_porcupine_delete(vhd->porc);
	vhd->porc = NULL;
	}

	/* avoid most of the valgrind mess from alsa */
	snd_config_update_free_global();

	break;

	case LWS_CALLBACK_RAW_CLOSE_FILE:
	lwsl_info("%s: closed\n", __func__);
	break;

	case LWS_CALLBACK_RAW_RX_FILE:
	/* we come here about every 250ms */

	/*
	* Playing back the mp3?
	*/
	if (vhd->mode == MODE_PLAYING && vhd->mh) {
	size_t amt, try;

	do {
	try = snd_pcm_avail(vhd->pcm_playback);
	if (try > LWS_ARRAY_SIZE(vhd->p))
	try = LWS_ARRAY_SIZE(vhd->p);

	n = mpg123_read(vhd->mh, (uint8_t *)vhd->p,
	try * 2, &amt);
	lwsl_info("%s: PLAYING: mpg123 read %d, n %d\n",
	__func__, (int)amt, n);
	if (n == MPG123_NEW_FORMAT) {
	snd_pcm_start(vhd->pcm_playback);
	memset(vhd->p, 0, try);
	snd_pcm_writei(vhd->pcm_playback,
	vhd->p, try / 2);
	snd_pcm_prepare(vhd->pcm_playback);
	}
	} while (n == MPG123_NEW_FORMAT);

	if (amt) {
	n = snd_pcm_writei(vhd->pcm_playback,
	vhd->p, amt / 2);
	if (n < 0)
	lwsl_notice("%s: snd_pcm_writei: %d %s\n",
	__func__, n, snd_strerror(n));
	if (n == -EPIPE) {
	lwsl_err("%s: did EPIPE prep\n", __func__);
	snd_pcm_prepare(vhd->pcm_playback);
	}
	} else
	if (vhd->destroy_mh_on_drain &&
	n != MPG123_NEW_FORMAT) {
	snd_pcm_drain(vhd->pcm_playback);
	vhd->destroy_mh_on_drain = 0;
	lwsl_notice("%s: mp3 destroyed\n",
	__func__);
	mpg123_close(vhd->mh);
	mpg123_delete(vhd->mh);
	vhd->mh = NULL;
	vhd->mode = MODE_IDLE;

	if (vhd->done_cb)
	vhd->done_cb(vhd->opaque);
	}
	}

	/*
	* Get the capture data
	*/

	n = snd_pcm_readi(vhd->pcm_capture, temp, LWS_ARRAY_SIZE(temp));
	s = 0;
	while (s < n) {
	vhd->p[(vhd->wpos + s) % LWS_ARRAY_SIZE(vhd->p)] = temp[s];
	s++;
	}

	if (vhd->mode == MODE_CAPTURING) {

	/*
	* We are recording an utterance.
	*
	* Estimate the sound density in the frame by picking 50
	* samples at random and averaging the sampled
	* [abs()^2] / 10000 to create a Figure of Merit.
	*
	* Speaking on my laptop gets us 1000 - 5000, silence
	* is typ under 30. The wakeword tells us there was
	* speech at the start, end the capture when there's
	* ~750ms (12000 samples) under 125 FOM.
	*/

	#define SILENCE_THRESH 125

	avg = 0;
	lws_get_random(lws_get_context(wsi), rands, sizeof(rands));
	for (s = 0; s < (int)LWS_ARRAY_SIZE(rands); s++) {
	long q;

	q = temp[rands[s] % n];

	avg += (q * q);
	}
	avg = (avg / (int)LWS_ARRAY_SIZE(rands)) / 10000;

	lwsl_notice("est audio energy: %ld %d\n", avg, vhd->mode);

	/*
	* Only start looking for "silence" after 1.5s, in case
	* he does a long pause after the wakeword
	*/

	if (vhd->anycount < (3 *vhd->rate) / 2 &&
	avg < SILENCE_THRESH) {
	vhd->quietcount += n;
	/* then 500ms of "silence" does it for us */
	if (vhd->quietcount >= ((vhd->rate * 3) / 4)) {
	lwsl_warn("%s: ended capture\n", __func__);
	vhd->mode = MODE_IDLE;
	vhd->quietcount = 0;
	}
	}

	/* if we're not "silent", reset the count */
	if (avg > SILENCE_THRESH * 2)
	vhd->quietcount = 0;

	/*
	* Since we are in capturing mode, we have something
	* new to send now.
	*
	* We must send an extra one at the end so we can finish
	* the tx.
	*/
	lws_ss_request_tx(hss_avs_sync);
	}

	/*
	* Just waiting for a wakeword
	*/

	while (vhd->mode == MODE_IDLE) {
	int m = 0, ppold = vhd->porcpos;

	s = (vhd->wpos - vhd->porcpos) % LWS_ARRAY_SIZE(vhd->p);
	if (s < vhd->porc_spf)
	goto eol;

	while (m < vhd->porc_spf) {
	vhd->porcbuf[m++] = avhd->p[vhd->porcpos];
	vhd->porcpos = (vhd->porcpos + 1) %
	LWS_ARRAY_SIZE(vhd->p);
	}

	if (pv_porcupine_process(vhd->porc, vhd->porcbuf, &det))
	lwsl_err("%s: porc_process failed\n", __func__);

	if (!det && vhd->last_wake_detect &&
	vhd->mode == MODE_IDLE) {
	lwsl_warn("************* Wakeword\n");
	if (!avs_query_start(lws_get_context(wsi))) {
	vhd->mode = MODE_CAPTURING;
	vhd->quietcount = 0;
	vhd->last_wake_detect = det;
	vhd->npos = ppold;
	break;
	}
	}
	vhd->last_wake_detect = det;
	}

	eol:
	vhd->wpos = (vhd->wpos + n) % LWS_ARRAY_SIZE(vhd->p);
	break;

	default:
	break;
	}

	return 0;
	}

	struct lws_protocols protocol_audio_test =
	{ "lws-audio-test", callback_audio, 0, 0 };