lib/misc/diskcache.c - platform/external/libwebsockets - Git at Google

 /*
  * libwebsockets - small server side websockets and web server implementation
  *
  * Copyright (C) 2010 - 2019 Andy Green <[email protected]>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */

 #if !defined(_GNU_SOURCE)
 #define _GNU_SOURCE
 #endif
 #include <pthread.h>

 #include <libwebsockets.h>
 #include "private-lib-core.h"

 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <dirent.h>
 #include <time.h>
 #include <errno.h>
 #include <stdarg.h>

 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>

 #if defined(__APPLE__)
 #include <sys/dirent.h>
 /* Travis OSX does not have DT_REG... */
 #if !defined(DT_REG)
 #define DT_REG 8
 #endif
 #endif

 struct file_entry {
 	lws_list_ptr sorted;
 	lws_list_ptr prev;
 	char name[64];
 	time_t modified;
 	size_t size;
 };

 struct lws_diskcache_scan {
 	struct file_entry *batch;
 	const char *cache_dir_base;
 	lws_list_ptr head;
 	time_t last_scan_completed;
 	uint64_t agg_size;
 	uint64_t cache_size_limit;
 	uint64_t avg_size;
 	uint64_t cache_tries;
 	uint64_t cache_hits;
 	int cache_subdir;
 	int batch_in_use;
 	int agg_file_count;
 	int secs_waiting;
 };

 #define KIB (1024)
 #define MIB (KIB * KIB)

 #define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)

 static const char *hex = "0123456789abcdef";

 #define BATCH_COUNT 128

 static int
 fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
 {
 	struct file_entry *p1 = lp_to_fe(a, sorted), *p2 = lp_to_fe(b, sorted);

 	return (int)((long)p2->modified - (long)p1->modified);
 }

 struct lws_diskcache_scan *
 lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
 {
 	struct lws_diskcache_scan *lds = lws_malloc(sizeof(*lds), "cachescan");

 	if (!lds)
 		return NULL;

 	memset(lds, 0, sizeof(*lds));

 	lds->cache_dir_base = cache_dir_base;
 	lds->cache_size_limit = cache_size_limit;

 	return lds;
 }

 void
 lws_diskcache_destroy(struct lws_diskcache_scan **lds)
 {
 	if ((*lds)->batch)
 		lws_free((*lds)->batch);
 	lws_free(*lds);
 	*lds = NULL;
 }

 int
 lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid)
 {
 	char dir[256];
 	int n, m;

 	(void)mkdir(cache_base_dir, (unsigned short)mode);
 	if (chown(cache_base_dir, uid, (gid_t)-1))
 		lwsl_err("%s: %s: unable to chown %d\n", __func__,
 			 cache_base_dir, uid);

 	for (n = 0; n < 16; n++) {
 		lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
 		(void)mkdir(dir, (mode_t)mode);
 		if (chown(dir, uid, (uid_t)-1))
 			lwsl_err("%s: %s: unable to chown %d\n", __func__,
 						 dir, uid);
 		for (m = 0; m < 16; m++) {
 			lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
 				     cache_base_dir, hex[n], hex[m]);
 			(void)mkdir(dir, (mode_t)mode);
 			if (chown(dir, uid, (uid_t)-1))
 				lwsl_err("%s: %s: unable to chown %d\n",
 					 __func__, dir, uid);
 		}
 	}

 	return 0;
 }

 /* copies and then truncates the incoming name, and renames the file at the
  * untruncated path to have the new truncated name */

 int
 lws_diskcache_finalize_name(char *cache)
 {
 	char ren[256], *p;

 	strncpy(ren, cache, sizeof(ren) - 1);
 	ren[sizeof(ren) - 1] = '\0';
 	p = strchr(cache, '~');
 	if (p) {
 		*p = '\0';
 		if (rename(ren, cache)) {
 			lwsl_err("%s: problem renaming %s to %s\n", __func__,
 				 ren, cache);
 			return 1;
 		}

 		return 0;
 	}

 	return 1;
 }

 int
 lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
 		    const char *hash_hex, int *_fd, char *cache, int cache_len,
 		    size_t *extant_cache_len)
 {
 	struct stat s;
 	int n;

 	/* caching is disabled? */
 	if (!lds->cache_dir_base)
 		return LWS_DISKCACHE_QUERY_NO_CACHE;

 	if (!is_bot)
 		lds->cache_tries++;

 	n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
 			 hash_hex[0], hash_hex[1], hash_hex);

 	lwsl_info("%s: job cache %s\n", __func__, cache);

 	*_fd = open(cache, O_RDONLY);
 	if (*_fd >= 0) {
 		int fd;

 		if (!is_bot)
 			lds->cache_hits++;

 		if (fstat(*_fd, &s)) {
 			close(*_fd);

 			return LWS_DISKCACHE_QUERY_NO_CACHE;
 		}

 		*extant_cache_len = (size_t)s.st_size;

 		/* "touch" the hit cache file so it's last for LRU now */
 		fd = open(cache, O_RDWR);
 		if (fd >= 0)
 			close(fd);

 		return LWS_DISKCACHE_QUERY_EXISTS;
 	}

 	/* bots are too random to pollute the cache with their antics */
 	if (is_bot)
 		return LWS_DISKCACHE_QUERY_NO_CACHE;

 	/* let's create it first with a unique temp name */

 	lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(),
 		     extant_cache_len);

 	*_fd = open(cache, O_RDWR | O_CREAT | O_TRUNC, 0600);
 	if (*_fd < 0) {
 		/* well... ok... we will proceed without cache then... */
 		lwsl_notice("%s: Problem creating cache %s: errno %d\n",
 			    __func__, cache, errno);
 		return LWS_DISKCACHE_QUERY_NO_CACHE;
 	}

 	return LWS_DISKCACHE_QUERY_CREATING;
 }

 int
 lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
 {
 	return lds->secs_waiting;
 }

 /*
  * The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
  * the dirs under the cache dir.  Since we don't need or want a full list of
  * files in there in memory at once, we restrict the linked-list size to
  * BATCH_COUNT entries, and once it is full, simply ignore any further files
  * that are newer than the newest one on that list.  Files older than the
  * newest guy already on the list evict the newest guy already on the list
  * and are sorted into the correct order.  In this way no matter the number
  * of files to be processed the memory requirement is fixed at BATCH_COUNT
  * struct file_entry-s.
  *
  * The oldest subset of BATCH_COUNT files are sorted into the cd->batch
  * allocation in more recent -> least recent order.
  *
  * We want to track the total size of all files we saw as well, so we know if
  * we need to actually do anything yet to restrict how much space it's taking
  * up.
  *
  * And we want to do those things statefully and incrementally instead of one
  * big atomic operation, since the user may want a huge cache, so we look in
  * one cache dir at a time and track state in the repodir struct.
  *
  * When we have seen everything, we add the doubly-linked prev pointers and then
  * if we are over the limit, start deleting up to BATCH_COUNT files working back
  * from the end.
  */

 int
 lws_diskcache_trim(struct lws_diskcache_scan *lds)
 {
 	size_t cache_size_limit = (size_t)lds->cache_size_limit;
 	char dirpath[132], filepath[132 + 32];
 	lws_list_ptr lp, op = NULL;
 	int files_trimmed = 0;
 	struct file_entry *p;
 	int fd, n, ret = -1;
 	size_t trimmed = 0;
 	struct dirent *de;
 	struct stat s;
 	DIR *dir;

 	if (!lds->cache_subdir) {

 		if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
 			return 0;

 		lds->batch = lws_malloc(sizeof(struct file_entry) *
 				BATCH_COUNT, "cache_trim");
 		if (!lds->batch) {
 			lwsl_err("%s: OOM\n", __func__);

 			return 1;
 		}
 		lds->agg_size = 0;
 		lds->head = NULL;
 		lds->batch_in_use = 0;
 		lds->agg_file_count = 0;
 	}

 	lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
 		     lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
 		     hex[lds->cache_subdir & 15]);

 	dir = opendir(dirpath);
 	if (!dir) {
 		lwsl_err("Unable to walk repo dir '%s'\n",
 			 lds->cache_dir_base);
 		return -1;
 	}

 	do {
 		de = readdir(dir);
 		if (!de)
 			break;

 		if (de->d_type != DT_REG)
 			continue;

 		lds->agg_file_count++;

 		lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
 			     de->d_name);

 		fd = open(filepath, O_RDONLY);
 		if (fd < 0) {
 			lwsl_err("%s: cannot open %s\n", __func__, filepath);

 			continue;
 		}

 		n = fstat(fd, &s);
 		close(fd);
 		if (n) {
 			lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
 			continue;
 		}

 		lds->agg_size += (uint64_t)s.st_size;

 		if (lds->batch_in_use == BATCH_COUNT) {
 			/*
 			 * once we filled up the batch with candidates, we don't
 			 * need to consider any files newer than the newest guy
 			 * on the list...
 			 */
 			if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
 				continue;

 			/*
 			 * ... and if we find an older file later, we know it
 			 * will be replacing the newest guy on the list, so use
 			 * that directly...
 			 */
 			p = lds->head;
 			lds->head = p->sorted;
 		} else
 			/* we are still accepting anything to fill the batch */

 			p = &lds->batch[lds->batch_in_use++];

 		p->sorted = NULL;
 		strncpy(p->name, de->d_name, sizeof(p->name) - 1);
 		p->name[sizeof(p->name) - 1] = '\0';
 		p->modified = s.st_mtime;
 		p->size = (size_t)s.st_size;

 		lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
 	} while (de);

 	ret = 0;

 	lds->cache_subdir++;
 	if (lds->cache_subdir != 0x100)
 		goto done;

 	/* we completed the whole scan... */

 	/* if really no guidence, then 256MiB */
 	if (!cache_size_limit)
 		cache_size_limit = 256 * 1024 * 1024;

 	if (lds->agg_size > cache_size_limit) {

 		/* apply prev pointers to make the list doubly-linked */

 		lp = lds->head;
 		while (lp) {
 			p = lp_to_fe(lp, sorted);

 			p->prev = op;
 			op = &p->prev;
 			lp = p->sorted;
 		}

 		/*
 		 * reverse the list (start from tail, now traverse using
 		 * .prev)... it's oldest-first now...
 		 */

 		lp = op;

 		while (lp && lds->agg_size > cache_size_limit) {
 			p = lp_to_fe(lp, prev);

 			lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
 				     lds->cache_dir_base, p->name[0],
 				     p->name[1], p->name);

 			if (!unlink(filepath)) {
 				lds->agg_size -= p->size;
 				trimmed += p->size;
 				files_trimmed++;
 			} else
 				lwsl_notice("%s: Failed to unlink %s\n",
 					    __func__, filepath);

 			lp = p->prev;
 		}

 		if (files_trimmed)
 			lwsl_notice("%s: %s: trimmed %d files totalling "
 				    "%lldKib, leaving %lldMiB\n", __func__,
 				    lds->cache_dir_base, files_trimmed,
 				    ((unsigned long long)trimmed) / KIB,
 				    ((unsigned long long)lds->agg_size) / MIB);
 	}

 	if (lds->agg_size && lds->agg_file_count)
 		lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count;

 	/*
 	 * estimate how long we can go before scanning again... default we need
 	 * to start again immediately
 	 */

 	lds->last_scan_completed = time(NULL);
 	lds->secs_waiting = 1;

 	if (lds->agg_size < cache_size_limit) {
 		uint64_t avg = 4096, capacity, projected;

 		/* let's use 80% of the real average for margin */
 		if (lds->agg_size && lds->agg_file_count)
 			avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10;

 		/*
 		 * if we collected BATCH_COUNT files of the average size,
 		 * how much can we clean up in 256s?
 		 */

 		capacity = avg * BATCH_COUNT;

 		/*
 		 * if the cache grew by 10%, would we hit the limit even then?
 		 */
 		projected = (lds->agg_size * 11) / 10;
 		if (projected < cache_size_limit)
 			/* no... */
 			lds->secs_waiting  = (int)((256 / 2) * ((cache_size_limit -
 						    projected) / capacity));

 		/*
 		 * large waits imply we may not have enough info yet, so
 		 * check once an hour at least.
 		 */

 		if (lds->secs_waiting > 3600)
 			lds->secs_waiting = 3600;
 	} else
 		lds->secs_waiting = 0;

 	lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
 		  lds->cache_dir_base,
 		  (unsigned long long)lds->agg_size / KIB,
 		  (unsigned long long)cache_size_limit / KIB,
 		  lds->secs_waiting);

 	lws_free(lds->batch);
 	lds->batch = NULL;

 	lds->cache_subdir = 0;

 done:
 	closedir(dir);

 	return ret;
 }
	/*
	* libwebsockets - small server side websockets and web server implementation
	*
	* Copyright (C) 2010 - 2019 Andy Green <[email protected]>
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in
	* all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	* IN THE SOFTWARE.
	*/

	#if !defined(_GNU_SOURCE)
	#define _GNU_SOURCE
	#endif
	#include <pthread.h>

	#include <libwebsockets.h>
	#include "private-lib-core.h"

	#include <string.h>
	#include <stdio.h>
	#include <unistd.h>
	#include <fcntl.h>
	#include <dirent.h>
	#include <time.h>
	#include <errno.h>
	#include <stdarg.h>

	#include <sys/stat.h>
	#include <sys/time.h>
	#include <sys/types.h>

	#if defined(__APPLE__)
	#include <sys/dirent.h>
	/* Travis OSX does not have DT_REG... */
	#if !defined(DT_REG)
	#define DT_REG 8
	#endif
	#endif

	struct file_entry {
	lws_list_ptr sorted;
	lws_list_ptr prev;
	char name[64];
	time_t modified;
	size_t size;
	};

	struct lws_diskcache_scan {
	struct file_entry *batch;
	const char *cache_dir_base;
	lws_list_ptr head;
	time_t last_scan_completed;
	uint64_t agg_size;
	uint64_t cache_size_limit;
	uint64_t avg_size;
	uint64_t cache_tries;
	uint64_t cache_hits;
	int cache_subdir;
	int batch_in_use;
	int agg_file_count;
	int secs_waiting;
	};

	#define KIB (1024)
	#define MIB (KIB * KIB)

	#define lp_to_fe(p, _n) lws_list_ptr_container(p, struct file_entry, _n)

	static const char *hex = "0123456789abcdef";

	#define BATCH_COUNT 128

	static int
	fe_modified_sort(lws_list_ptr a, lws_list_ptr b)
	{
	struct file_entry p1 = lp_to_fe(a, sorted), p2 = lp_to_fe(b, sorted);

	return (int)((long)p2->modified - (long)p1->modified);
	}

	struct lws_diskcache_scan *
	lws_diskcache_create(const char *cache_dir_base, uint64_t cache_size_limit)
	{
	struct lws_diskcache_scan lds = lws_malloc(sizeof(lds), "cachescan");

	if (!lds)
	return NULL;

	memset(lds, 0, sizeof(*lds));

	lds->cache_dir_base = cache_dir_base;
	lds->cache_size_limit = cache_size_limit;

	return lds;
	}

	void
	lws_diskcache_destroy(struct lws_diskcache_scan **lds)
	{
	if ((*lds)->batch)
	lws_free((*lds)->batch);
	lws_free(*lds);
	*lds = NULL;
	}

	int
	lws_diskcache_prepare(const char *cache_base_dir, int mode, uid_t uid)
	{
	char dir[256];
	int n, m;

	(void)mkdir(cache_base_dir, (unsigned short)mode);
	if (chown(cache_base_dir, uid, (gid_t)-1))
	lwsl_err("%s: %s: unable to chown %d\n", __func__,
	cache_base_dir, uid);

	for (n = 0; n < 16; n++) {
	lws_snprintf(dir, sizeof(dir), "%s/%c", cache_base_dir, hex[n]);
	(void)mkdir(dir, (mode_t)mode);
	if (chown(dir, uid, (uid_t)-1))
	lwsl_err("%s: %s: unable to chown %d\n", __func__,
	dir, uid);
	for (m = 0; m < 16; m++) {
	lws_snprintf(dir, sizeof(dir), "%s/%c/%c",
	cache_base_dir, hex[n], hex[m]);
	(void)mkdir(dir, (mode_t)mode);
	if (chown(dir, uid, (uid_t)-1))
	lwsl_err("%s: %s: unable to chown %d\n",
	__func__, dir, uid);
	}
	}

	return 0;
	}

	/* copies and then truncates the incoming name, and renames the file at the
	* untruncated path to have the new truncated name */

	int
	lws_diskcache_finalize_name(char *cache)
	{
	char ren[256], *p;

	strncpy(ren, cache, sizeof(ren) - 1);
	ren[sizeof(ren) - 1] = '\0';
	p = strchr(cache, '~');
	if (p) {
	*p = '\0';
	if (rename(ren, cache)) {
	lwsl_err("%s: problem renaming %s to %s\n", __func__,
	ren, cache);
	return 1;
	}

	return 0;
	}

	return 1;
	}

	int
	lws_diskcache_query(struct lws_diskcache_scan *lds, int is_bot,
	const char hash_hex, int _fd, char *cache, int cache_len,
	size_t *extant_cache_len)
	{
	struct stat s;
	int n;

	/* caching is disabled? */
	if (!lds->cache_dir_base)
	return LWS_DISKCACHE_QUERY_NO_CACHE;

	if (!is_bot)
	lds->cache_tries++;

	n = lws_snprintf(cache, (size_t)cache_len, "%s/%c/%c/%s", lds->cache_dir_base,
	hash_hex[0], hash_hex[1], hash_hex);

	lwsl_info("%s: job cache %s\n", __func__, cache);

	*_fd = open(cache, O_RDONLY);
	if (*_fd >= 0) {
	int fd;

	if (!is_bot)
	lds->cache_hits++;

	if (fstat(*_fd, &s)) {
	close(*_fd);

	return LWS_DISKCACHE_QUERY_NO_CACHE;
	}

	*extant_cache_len = (size_t)s.st_size;

	/* "touch" the hit cache file so it's last for LRU now */
	fd = open(cache, O_RDWR);
	if (fd >= 0)
	close(fd);

	return LWS_DISKCACHE_QUERY_EXISTS;
	}

	/* bots are too random to pollute the cache with their antics */
	if (is_bot)
	return LWS_DISKCACHE_QUERY_NO_CACHE;

	/* let's create it first with a unique temp name */

	lws_snprintf(cache + n, (size_t)cache_len - (unsigned int)n, "~%d-%p", (int)getpid(),
	extant_cache_len);

	*_fd = open(cache, O_RDWR \| O_CREAT \| O_TRUNC, 0600);
	if (*_fd < 0) {
	/* well... ok... we will proceed without cache then... */
	lwsl_notice("%s: Problem creating cache %s: errno %d\n",
	__func__, cache, errno);
	return LWS_DISKCACHE_QUERY_NO_CACHE;
	}

	return LWS_DISKCACHE_QUERY_CREATING;
	}

	int
	lws_diskcache_secs_to_idle(struct lws_diskcache_scan *lds)
	{
	return lds->secs_waiting;
	}

	/*
	* The goal is to collect the oldest BATCH_COUNT filepaths and filesizes from
	* the dirs under the cache dir. Since we don't need or want a full list of
	* files in there in memory at once, we restrict the linked-list size to
	* BATCH_COUNT entries, and once it is full, simply ignore any further files
	* that are newer than the newest one on that list. Files older than the
	* newest guy already on the list evict the newest guy already on the list
	* and are sorted into the correct order. In this way no matter the number
	* of files to be processed the memory requirement is fixed at BATCH_COUNT
	* struct file_entry-s.
	*
	* The oldest subset of BATCH_COUNT files are sorted into the cd->batch
	* allocation in more recent -> least recent order.
	*
	* We want to track the total size of all files we saw as well, so we know if
	* we need to actually do anything yet to restrict how much space it's taking
	* up.
	*
	* And we want to do those things statefully and incrementally instead of one
	* big atomic operation, since the user may want a huge cache, so we look in
	* one cache dir at a time and track state in the repodir struct.
	*
	* When we have seen everything, we add the doubly-linked prev pointers and then
	* if we are over the limit, start deleting up to BATCH_COUNT files working back
	* from the end.
	*/

	int
	lws_diskcache_trim(struct lws_diskcache_scan *lds)
	{
	size_t cache_size_limit = (size_t)lds->cache_size_limit;
	char dirpath[132], filepath[132 + 32];
	lws_list_ptr lp, op = NULL;
	int files_trimmed = 0;
	struct file_entry *p;
	int fd, n, ret = -1;
	size_t trimmed = 0;
	struct dirent *de;
	struct stat s;
	DIR *dir;

	if (!lds->cache_subdir) {

	if (lds->last_scan_completed + lds->secs_waiting > time(NULL))
	return 0;

	lds->batch = lws_malloc(sizeof(struct file_entry) *
	BATCH_COUNT, "cache_trim");
	if (!lds->batch) {
	lwsl_err("%s: OOM\n", __func__);

	return 1;
	}
	lds->agg_size = 0;
	lds->head = NULL;
	lds->batch_in_use = 0;
	lds->agg_file_count = 0;
	}

	lws_snprintf(dirpath, sizeof(dirpath), "%s/%c/%c",
	lds->cache_dir_base, hex[(lds->cache_subdir >> 4) & 15],
	hex[lds->cache_subdir & 15]);

	dir = opendir(dirpath);
	if (!dir) {
	lwsl_err("Unable to walk repo dir '%s'\n",
	lds->cache_dir_base);
	return -1;
	}

	do {
	de = readdir(dir);
	if (!de)
	break;

	if (de->d_type != DT_REG)
	continue;

	lds->agg_file_count++;

	lws_snprintf(filepath, sizeof(filepath), "%s/%s", dirpath,
	de->d_name);

	fd = open(filepath, O_RDONLY);
	if (fd < 0) {
	lwsl_err("%s: cannot open %s\n", __func__, filepath);

	continue;
	}

	n = fstat(fd, &s);
	close(fd);
	if (n) {
	lwsl_notice("%s: cannot stat %s\n", __func__, filepath);
	continue;
	}

	lds->agg_size += (uint64_t)s.st_size;

	if (lds->batch_in_use == BATCH_COUNT) {
	/*
	* once we filled up the batch with candidates, we don't
	* need to consider any files newer than the newest guy
	* on the list...
	*/
	if (lp_to_fe(lds->head, sorted)->modified < s.st_mtime)
	continue;

	/*
	* ... and if we find an older file later, we know it
	* will be replacing the newest guy on the list, so use
	* that directly...
	*/
	p = lds->head;
	lds->head = p->sorted;
	} else
	/* we are still accepting anything to fill the batch */

	p = &lds->batch[lds->batch_in_use++];

	p->sorted = NULL;
	strncpy(p->name, de->d_name, sizeof(p->name) - 1);
	p->name[sizeof(p->name) - 1] = '\0';
	p->modified = s.st_mtime;
	p->size = (size_t)s.st_size;

	lws_list_ptr_insert(&lds->head, &p->sorted, fe_modified_sort);
	} while (de);

	ret = 0;

	lds->cache_subdir++;
	if (lds->cache_subdir != 0x100)
	goto done;

	/* we completed the whole scan... */

	/* if really no guidence, then 256MiB */
	if (!cache_size_limit)
	cache_size_limit = 256 * 1024 * 1024;

	if (lds->agg_size > cache_size_limit) {

	/* apply prev pointers to make the list doubly-linked */

	lp = lds->head;
	while (lp) {
	p = lp_to_fe(lp, sorted);

	p->prev = op;
	op = &p->prev;
	lp = p->sorted;
	}

	/*
	* reverse the list (start from tail, now traverse using
	* .prev)... it's oldest-first now...
	*/

	lp = op;

	while (lp && lds->agg_size > cache_size_limit) {
	p = lp_to_fe(lp, prev);

	lws_snprintf(filepath, sizeof(filepath), "%s/%c/%c/%s",
	lds->cache_dir_base, p->name[0],
	p->name[1], p->name);

	if (!unlink(filepath)) {
	lds->agg_size -= p->size;
	trimmed += p->size;
	files_trimmed++;
	} else
	lwsl_notice("%s: Failed to unlink %s\n",
	__func__, filepath);

	lp = p->prev;
	}

	if (files_trimmed)
	lwsl_notice("%s: %s: trimmed %d files totalling "
	"%lldKib, leaving %lldMiB\n", __func__,
	lds->cache_dir_base, files_trimmed,
	((unsigned long long)trimmed) / KIB,
	((unsigned long long)lds->agg_size) / MIB);
	}

	if (lds->agg_size && lds->agg_file_count)
	lds->avg_size = lds->agg_size / (uint64_t)lds->agg_file_count;

	/*
	* estimate how long we can go before scanning again... default we need
	* to start again immediately
	*/

	lds->last_scan_completed = time(NULL);
	lds->secs_waiting = 1;

	if (lds->agg_size < cache_size_limit) {
	uint64_t avg = 4096, capacity, projected;

	/* let's use 80% of the real average for margin */
	if (lds->agg_size && lds->agg_file_count)
	avg = ((lds->agg_size * 8) / (uint64_t)lds->agg_file_count) / 10;

	/*
	* if we collected BATCH_COUNT files of the average size,
	* how much can we clean up in 256s?
	*/

	capacity = avg * BATCH_COUNT;

	/*
	* if the cache grew by 10%, would we hit the limit even then?
	*/
	projected = (lds->agg_size * 11) / 10;
	if (projected < cache_size_limit)
	/* no... */
	lds->secs_waiting = (int)((256 / 2) * ((cache_size_limit -
	projected) / capacity));

	/*
	* large waits imply we may not have enough info yet, so
	* check once an hour at least.
	*/

	if (lds->secs_waiting > 3600)
	lds->secs_waiting = 3600;
	} else
	lds->secs_waiting = 0;

	lwsl_info("%s: cache %s: %lldKiB / %lldKiB, next scan %ds\n", __func__,
	lds->cache_dir_base,
	(unsigned long long)lds->agg_size / KIB,
	(unsigned long long)cache_size_limit / KIB,
	lds->secs_waiting);

	lws_free(lds->batch);
	lds->batch = NULL;

	lds->cache_subdir = 0;

	done:
	closedir(dir);

	return ret;
	}