
/* Copyright (c) 2008-2009 Nall Design Works
   Copyright 2006 Trusted Computer Solutions, Inc. */

/*
 Exported Interface

 int init_translations(void);
 void finish_context_translations(void);
 int trans_context(const char *, char **);
 int untrans_context(const char *, char **);

*/

#include <math.h>
#include <glob.h>
#include <values.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <ctype.h>
#include <selinux/selinux.h>
#include <selinux/context.h>
#include <syslog.h>
#include <errno.h>
#include <pcre.h>
#include <ctype.h>
#include <time.h>
#include <sys/time.h>


#include "mls_level.h"
#include "mcstrans.h"

#define N_BUCKETS 1453
#define OVECCOUNT (512*3)

#define log_error(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)

#ifdef DEBUG
#define log_debug(fmt, ...) fprintf(stderr, fmt, __VA_ARGS__)
#else
#define log_debug(fmt, ...) do {} while (0)
#endif

static unsigned int maxbit=0;

/* Define data structures */
typedef struct context_map {
	char *raw;
	char *trans;
} context_map_t;

typedef struct context_map_node {
	char *key;
	context_map_t *map;
	struct context_map_node *next;
} context_map_node_t;

typedef struct affix {
	char *text;
	struct affix *next;
} affix_t;

typedef struct word {
	char *text;
	ebitmap_t cat;
	ebitmap_t normal;
	ebitmap_t inverse;
	struct word *next;
} word_t;

typedef struct word_group {
	char *name;
	char *whitespace;
	char *join;

	affix_t *prefixes;
	affix_t *suffixes;
	word_t *words;

	pcre *prefix_regexp;
	pcre *word_regexp;
	pcre *suffix_regexp;

	ebitmap_t def;

	word_t **sword;
	unsigned int sword_len;

	struct word_group *next;
} word_group_t;

typedef struct base_classification {
	char *trans;
	mls_level_t *level;
	struct base_classification *next;
} base_classification_t;

typedef struct domain {
	char *name;

	context_map_node_t *raw_to_trans[N_BUCKETS];
	context_map_node_t *trans_to_raw[N_BUCKETS];

	base_classification_t *base_classifications;
	word_group_t *groups;

	pcre *base_classification_regexp;
	struct domain *next;
} domain_t;

static domain_t *domains;

typedef struct sens_constraint {
	char op;
	char *text;
	unsigned int sens;
	ebitmap_t cat;
	struct sens_constraint *next;
} sens_constraint_t;

static sens_constraint_t *sens_constraints;

typedef struct cat_constraint {
	char op;
	char *text;
	int nbits;
	ebitmap_t mask;
	ebitmap_t cat;
	struct cat_constraint *next;
} cat_constraint_t;

static cat_constraint_t *cat_constraints;

unsigned int
hash(const char *str) {
	unsigned int hash = 5381;
	int c;

	while ((c = *(unsigned const char *)str++))
		hash = ((hash << 5) + hash) + c;

	return hash;
}

static int
add_to_hashtable(context_map_node_t **table, char *key, context_map_t *map) {
	unsigned int bucket = hash(key) % N_BUCKETS;
	context_map_node_t **n;
	for (n = &table[bucket]; *n; n = &(*n)->next)
		;
	*n = malloc(sizeof(context_map_node_t));
	if (! *n)
		goto err;
	(*n)->key = key;
	(*n)->map = map;
	(*n)->next = NULL;
	return 0;
err:
	syslog(LOG_ERR, "add_to_hashtable: allocation error");
	return -1;
}

static int
numdigits(unsigned int n)
{
	int count = 1;
	while ((n = n / 10))
		count++;
	return count;
}

static int
parse_category(ebitmap_t *e, const char *raw, int allowinverse)
{
	int inverse = 0;
	unsigned int low, high;
	while (*raw) {
		if (allowinverse && *raw == '~') {
			inverse = !inverse;
			raw++;
			continue;
		}
		if (sscanf(raw,"c%u", &low) != 1)
			return -1;
		raw += numdigits(low) + 1;
		if (*raw == '.') {
			raw++;
			if (sscanf(raw,"c%u", &high) != 1)
				return -1;
			raw += numdigits(high) + 1;
		} else {
			high = low;
		}
		while (low <= high) {
			if (low >= maxbit)
				maxbit = low + 1;
			if (ebitmap_set_bit(e, low, inverse ? 0 : 1) < 0)
				return -1;
			low++;
		}
		if (*raw == ',') {
			raw++;
			inverse = 0;
		} else if (*raw != '\0') {
			return -1;
		}
	}
	return 0;
}

int
parse_ebitmap(ebitmap_t *e, ebitmap_t *def, const char *raw) {
	int rc = ebitmap_cpy(e, def);
	if (rc < 0)
		return rc;
	rc = parse_category(e, raw, 1);
	if (rc < 0)
		return rc;
	return 0;
}

mls_level_t *
parse_raw(const char *raw) {
	mls_level_t *mls = calloc(1, sizeof(mls_level_t));
	if (!mls)
		goto err;
	if (sscanf(raw,"s%u", &mls->sens) != 1)
		goto err;
	raw += numdigits(mls->sens) + 1;
	if (*raw == ':') {
		raw++;
		if (parse_category(&mls->cat, raw, 0) < 0)
			goto err;
	} else if (*raw != '\0') {
		goto err;
	}

	return mls;

err:
	ebitmap_destroy(&mls->cat);
	free(mls);
	return NULL;
}

void
destroy_word(word_t **list, word_t *word) {
	if (!word) {
		return;
	}
	for (; list && *list; list = &(*list)->next) {
		if (*list == word) {
			*list = word->next;
			break;
		}
	}
	free(word->text);
	ebitmap_destroy(&word->cat);
	ebitmap_destroy(&word->normal);
	ebitmap_destroy(&word->inverse);
	memset(word, 0, sizeof(word_t));
	free(word);
}

word_t *
create_word(word_t **list, const char *text) {
	word_t *w = calloc(1, sizeof(word_t));
	if (!w) {
		goto err;
	}
	w->text = strdup(text);
	if (!w->text) {
		goto err;
	}
	if (list) {
		for (; *list; list = &(*list)->next)
			;
		*list = w;
	}

	return w;

err:
	log_error("create_word: allocation error %s", strerror(errno));
	destroy_word(NULL, w);
	return NULL;
}

void
destroy_group(word_group_t **list, word_group_t *group) {
	for (; list && *list; list = &(*list)->next) {
		if (*list == group) {
			*list = group->next;
			break;
		}
	}
	while(group->prefixes) {
		affix_t *next = group->prefixes->next;
		free(group->prefixes->text);
		free(group->prefixes);
		group->prefixes=next;
	}
	while(group->suffixes) {
		affix_t *next = group->suffixes->next;
		free(group->suffixes->text);
		free(group->suffixes);
		group->suffixes=next;
	}
	while(group->words)
		destroy_word(&group->words, group->words);
	free(group->whitespace);
	free(group->name);
	free(group->sword);
	free(group->join);
	pcre_free(group->prefix_regexp);
	pcre_free(group->word_regexp);
	pcre_free(group->suffix_regexp);
	ebitmap_destroy(&group->def);
	free(group);
}

word_group_t *
create_group(word_group_t **list, const char *name) {
	word_group_t *group = calloc(1, sizeof(word_group_t));
	if (!group)
		return NULL;
	group->name = strdup(name);
	if (!group->name) {
		goto err;
	}
	group->join = strdup(" ");
	if (!group->join) {
		goto err;
	}
	group->whitespace = strdup(" ");
	if (!group->whitespace) {
		goto err;
	}
	group->sword = NULL;

	if (list) {
		for (; *list; list = &(*list)->next)
			;
		*list = group;
	}

	return group;

err:
	log_error("allocation error %s", strerror(errno));
	destroy_group(NULL, group);
	return NULL;
}

void
destroy_domain(domain_t *domain) {
	int i;
        unsigned int rt = 0, tr = 0;
	for (i=0; i < N_BUCKETS; i++) {
		context_map_node_t *ptr;
		for (ptr = domain->trans_to_raw[i]; ptr;)  {
			context_map_node_t *t = ptr->next;
			free(ptr);
			ptr = t;
			tr++;
		}
		domain->trans_to_raw[i] = NULL;
	}
	for (i=0; i < N_BUCKETS; i++) {
		context_map_node_t *ptr;
		for (ptr = domain->raw_to_trans[i]; ptr;)  {
			context_map_node_t *t = ptr->next;
			free(ptr->map->raw);
			free(ptr->map->trans);
			free(ptr->map);
			free(ptr);
			ptr = t;
			rt++;
		}
		domain->raw_to_trans[i] = NULL;
	}
	while (domain->base_classifications)  {
		base_classification_t *next = domain->base_classifications->next;
		free(domain->base_classifications->trans);
		ebitmap_destroy(&domain->base_classifications->level->cat);
		free(domain->base_classifications->level);
		free(domain->base_classifications);
		domain->base_classifications = next;
	}
	pcre_free(domain->base_classification_regexp);
	while (domain->groups)
		destroy_group(&domain->groups, domain->groups);
	free(domain->name);
	free(domain);

	syslog(LOG_INFO, "cache sizes: tr = %u, rt = %u", tr, rt);
}

domain_t *
create_domain(const char *name) {
	domain_t *domain = calloc(1, sizeof(domain_t));
	if (!domain) {
		goto err;
	}
	domain->name = strdup(name);
	if (!domain->name) {
		goto err;
	}

	domain_t **d = &domains;
	for (; *d; d = &(*d)->next)
		;
	*d = domain;

	return domain;

err:
	log_error("allocation error %s", strerror(errno));
	destroy_domain(domain);
	return NULL;
}

int
add_word(word_group_t *group, char *raw, char *trans) {
	if (strchr(trans,'-')) {
		log_error("'%s'is invalid because '-' is illegal in modifiers.\n", trans);
		return -1;
	}
	word_t *word = create_word(&group->words, trans);
	int rc = parse_ebitmap(&word->cat, &group->def, raw);
	if (rc < 0) {
		log_error(" syntax error in %s\n", raw);
		destroy_word(&group->words, word);
		return -1;
	}
	if (ebitmap_andnot(&word->normal, &word->cat, &group->def, maxbit) < 0)
		return -1;

	ebitmap_t temp;
	if (ebitmap_xor(&temp, &word->cat, &group->def) < 0)
		return -1;
	if (ebitmap_and(&word->inverse, &temp, &group->def) < 0)
		return -1;
	ebitmap_destroy(&temp);

	return 0;
}

int
add_constraint(char op, char *raw, char *tok) {
	log_debug("%s\n", "add_constraint");
	ebitmap_t empty;
	ebitmap_init(&empty);
	if (!raw || !*raw) {
		syslog(LOG_ERR, "unable to parse line");
		return -1;
	}
	if (*raw == 's') {
		sens_constraint_t *constraint = calloc(1, sizeof(sens_constraint_t));
		if (!constraint) {
			log_error("allocation error %s", strerror(errno));
			return -1;
		}
		if (sscanf(raw,"s%u", &constraint->sens) != 1) {
			syslog(LOG_ERR, "unable to parse level");
			free(constraint);
			return -1;
		}
		if (parse_ebitmap(&constraint->cat, &empty, tok) < 0) {
			syslog(LOG_ERR, "unable to parse cat");
			free(constraint);
			return -1;
		}
		if (asprintf(&constraint->text, "%s%c%s", raw, op, tok) < 0) {
			log_error("asprintf failed %s", strerror(errno));
			return -1;
		}
		constraint->op = op;
		sens_constraint_t **p;
		for (p= &sens_constraints; *p; p = &(*p)->next)
                        ;
                *p = constraint;
		return 0;
	} else if (*raw == 'c' ) {
		cat_constraint_t *constraint = calloc(1, sizeof(cat_constraint_t));
		if (!constraint) {
			log_error("allocation error %s", strerror(errno));
			return -1;
		}
		if (parse_ebitmap(&constraint->mask, &empty, raw) < 0) {
			syslog(LOG_ERR, "unable to parse mask");
			free(constraint);
			return -1;
		}
		if (parse_ebitmap(&constraint->cat, &empty, tok) < 0) {
			syslog(LOG_ERR, "unable to parse cat");
			ebitmap_destroy(&constraint->mask);
			free(constraint);
			return -1;
		}
		if (asprintf(&constraint->text, "%s%c%s", raw, op, tok) < 0) {
			log_error("asprintf failed %s", strerror(errno));
			return -1;
		}
		constraint->nbits = ebitmap_cardinality(&constraint->cat);
		constraint->op = op;
		cat_constraint_t **p;
		for (p= &cat_constraints; *p; p = &(*p)->next)
                        ;
                *p = constraint;
		return 0;
	} else {
		return -1;
	}
	
	return 0;
}

int
violates_constraints(mls_level_t *l) {
	int nbits;
	sens_constraint_t *s;
	ebitmap_t common;
	for (s=sens_constraints; s; s=s->next) {
		if (s->sens == l->sens) {
			if (ebitmap_and(&common, &s->cat, &l->cat) < 0)
				return 1;
			nbits = ebitmap_cardinality(&common);
			ebitmap_destroy(&common);
			if (nbits) {
				char *text = mls_level_to_string(l);
				syslog(LOG_WARNING, "%s violates %s", text, s->text);
				free(text);
				return 1;
			}
		}
	}
	cat_constraint_t *c;
	for (c=cat_constraints; c; c=c->next) {
		if (ebitmap_and(&common, &c->mask, &l->cat) < 0)
			return 1;
		nbits = ebitmap_cardinality(&common);
		ebitmap_destroy(&common);
		if (nbits > 0) {
			if (ebitmap_and(&common, &c->cat, &l->cat) < 0)
				return 1;
			nbits = ebitmap_cardinality(&common);
			ebitmap_destroy(&common);
			if ((c->op == '!' && nbits) ||
			    (c->op == '>' && nbits != c->nbits)) {
				char *text = mls_level_to_string(l);
				syslog(LOG_WARNING, "%s violates %s (%d,%d)", text, c->text, nbits, c->nbits);
				free(text);
				return 1;
			}
		}
	}
	return 0;
}

void
destroy_sens_constraint(sens_constraint_t **list, sens_constraint_t *constraint) {
	if (!constraint) {
		return;
	}
	for (; list && *list; list = &(*list)->next) {
		if (*list == constraint) {
			*list = constraint->next;
			break;
		}
	}
	ebitmap_destroy(&constraint->cat);
	free(constraint->text);
	memset(constraint, 0, sizeof(sens_constraint_t));
	free(constraint);
}

void
destroy_cat_constraint(cat_constraint_t **list, cat_constraint_t *constraint) {
	if (!constraint) {
		return;
	}
	for (; list && *list; list = &(*list)->next) {
		if (*list == constraint) {
			*list = constraint->next;
			break;
		}
	}
	ebitmap_destroy(&constraint->mask);
	ebitmap_destroy(&constraint->cat);
	free(constraint->text);
	memset(constraint, 0, sizeof(cat_constraint_t));
	free(constraint);
}


static int
add_base_classification(domain_t *domain, char *raw, char *trans) {
	mls_level_t *level = parse_raw(raw);
	if (level) {
		base_classification_t **i;
		base_classification_t *base_classification = calloc(1, sizeof(base_classification_t));
		if (!base_classification) {
			log_error("allocation error %s", strerror(errno));
			return -1;
		}
		base_classification->trans=strdup(trans);
		if (!base_classification->trans) {
			log_error("allocation error %s", strerror(errno));
			free(base_classification);
			return -1;
		}
		base_classification->level=level;

		for (i=&domain->base_classifications; *i; i=&(*i)->next)
		;
		*i = base_classification;
			return 0;
		}
	log_error(" add_base_classification error %s %s\n", raw, trans);
	return -1;
}

static int
add_cache(domain_t *domain, char *raw, char *trans) {
	context_map_t *map = calloc(1, sizeof(context_map_t));
	if (!map) goto err;

	map->raw = strdup(raw);
	if (!map->raw) {
		goto err;
	}
	map->trans = strdup(trans);
	if (!map->trans) {
		goto err;
	}

	log_debug(" add_cache (%s,%s)\n", raw, trans);
	if (add_to_hashtable(domain->raw_to_trans, map->raw, map) < 0)
		goto err;

	if (add_to_hashtable(domain->trans_to_raw, map->trans, map) < 0)
		goto err;

	return 0;
err:
	log_error("%s: allocation error", "add_cache");
	return -1;
}

static context_map_t *
find_in_table(context_map_node_t **table, const char *key) {
	unsigned int bucket = hash(key) % N_BUCKETS;
	context_map_node_t **n;
	for (n = &table[bucket]; *n; n = &(*n)->next)
		if (!strcmp((*n)->key, key))
			return (*n)->map;
	return NULL;
}

char *
trim(char *str, const char *whitespace) {
	char *p = str + strlen(str);

	while (p > str && strchr(whitespace, *(p-1)) != NULL)
		*--p = 0;
	return str;
}

char *
triml(char *str, const char *whitespace) {
	char *p = str;

	while (*p && (strchr(whitespace, *p) != NULL))
		p++;
	return p;
}

int
update(char **p, char *const val) {
	free (*p);
	*p = strdup(val);
	if (!*p) {
		log_error("allocation error %s", strerror(errno));
		return -1;
	}
	return 0;
}

int
append(affix_t **affixes, const char *val) {
	affix_t *affix = calloc(1, sizeof(affix_t));
	if (!affix) {
		goto err;
	}
	affix->text = strdup(val);
	if (!affix->text)
		goto err;
	for (;*affixes; affixes = &(*affixes)->next)
		;
	*affixes = affix;
	return 0;

err:
	log_error("allocation error %s", strerror(errno));
	free(affix);
	return -1;
}

static int read_translations(const char *filename);

/* Process line from translation file.
   Remove white space and set raw do data before the "=" and tok to data after it
   Modifies the data pointed to by the buffer parameter
 */
static int
process_trans(char *buffer) {
	static domain_t *domain;
	static word_group_t *group;
	static int base_classification;
	static int lineno = 0;
	char op='\0';

	lineno++;
	log_debug("%d: %s", lineno, buffer);

	/* zap leading whitespace */
	buffer = triml(buffer, "\t ");

	/* Ignore comments */
	if (*buffer == '#') return 0;
	char *comment = strpbrk (buffer, "#");
	if (comment) {
		*comment = '\0';
	}

	/* zap trailing whitespace */
	buffer = trim(buffer, "\t \r\n");

	if (*buffer == 0) return 0;

	char *delim = strpbrk (buffer, "=!>");
	if (! delim) {
		syslog(LOG_ERR, "invalid line (no !, = or >) %d", lineno);
		return -1;
	}

	op = *delim;
	*delim = '\0';
	char *raw = buffer;
	char *tok = delim+1;

	/* remove trailing/leading whitespace from the split tokens */
	trim(raw, "\t ");
	tok = triml(tok, "\t ");

	if (! *raw) {
		syslog(LOG_ERR, "invalid line %d", lineno);
		return -1;
	}

	if (! *tok) {
		syslog(LOG_ERR, "invalid line %d", lineno);
		return -1;
	}

	/* constraints have different syntax */
	if (op == '!' || op == '>') {
		return add_constraint(op, raw, tok);
	}

	if (!strcmp(raw, "Domain")) {
		domain = create_domain(tok);
		group = NULL;
		return 0;
	}

	if (!domain) {
		domain = create_domain("Default");
		if (!domain)
			return -1;
		group = NULL;
	}

	if (!group &&
	    (!strcmp(raw, "Whitespace") || !strcmp(raw, "Join") ||
	     !strcmp(raw, "Prefix") || !strcmp(raw, "Suffix") ||
		 !strcmp(raw, "Default"))) {
		syslog(LOG_ERR, "expected  ModifierGroup declaration on line %d", lineno);
		return -1;
	}

	if (!strcmp(raw, "Include")) {
		unsigned int n;
		glob_t g;
		g.gl_offs = 0;
		if (glob(tok, GLOB_ERR, NULL, &g) < 0) {
			globfree(&g);
			return -1;
		}
		for (n=0; n < g.gl_pathc; n++) {
			if (read_translations(g.gl_pathv[n]) < 0) {
				globfree(&g);
				return -1;
			}
		}
		globfree(&g);
	} else if (!strcmp(raw, "Base")) {
		base_classification = 1;
	} else if (!strcmp(raw, "ModifierGroup")) {
		group = create_group(&domain->groups, tok);
		if (!group)
			return -1;
		base_classification = 0;
	} else if (!strcmp(raw, "Whitespace")) {
		if (update (&group->whitespace, tok) < 0)
			return -1;
	} else if (!strcmp(raw, "Join")) {
		if (update (&group->join, tok) < 0)
			return -1;
	} else if (!strcmp(raw, "Prefix")) {
		if (append (&group->prefixes, tok) < 0)
			return -1;
	} else if (!strcmp(raw, "Suffix")) {
		if (append (&group->suffixes, tok) < 0)
			return -1;
	} else if (!strcmp(raw, "Default")) {
		ebitmap_t empty;
		ebitmap_init(&empty);
		if (parse_ebitmap(&group->def, &empty, tok) < 0) {
			syslog(LOG_ERR, "unable to parse Default %d", lineno);
			return -1;
		}
	} else if (group) {
		if (add_word(group, raw, tok) < 0) {
			syslog(LOG_ERR, "unable to add base_classification on line %d", lineno);
			return -1;
		}
	} else {
		if (base_classification) {
			if (add_base_classification(domain, raw, tok) < 0) {
				syslog(LOG_ERR, "unable to add base_classification on line %d", lineno);
				return -1;
			}
		}
		if (add_cache(domain, raw, tok) < 0)
			return -1;
	}
	return 0;
}

int
read_translations(const char *filename) {
	size_t size = 0;
	char *buffer = NULL;
	int rval = 0;

	FILE *cfg = fopen(filename,"r");
	if (!cfg) {
		syslog(LOG_ERR, "%s file open failed", filename);
		return -1;
	}

	__fsetlocking(cfg, FSETLOCKING_BYCALLER);
	while (getline(&buffer, &size, cfg) > 0) {
		if( process_trans(buffer) < 0 ) {
			syslog(LOG_ERR, "%s file read failed", filename);
			rval = -1;
			break;
		}
	}
	free(buffer);
	fclose(cfg);
	return rval;
}

int
init_translations(void) {
	if (is_selinux_mls_enabled() <= 0)
		return -1;

	return(read_translations(selinux_translations_path()));
}

char *
extract_range(const char *incon) {
	context_t con = context_new(incon);
	if (!con) {
		syslog(LOG_ERR, "extract_range context_new(%s) failed: %s", incon, strerror(errno));
		return NULL;
	}

	const char *range = context_range_get(con);
	if (!range) {
		syslog(LOG_ERR, "extract_range: context_range_get(%s) failed: %m", incon);
		context_free(con);
		return NULL;
	}
	char *r = strdup(range);
	if (!r) {
		log_error("extract_range: allocation error %s", strerror(errno));
		return NULL;
	}
	context_free(con);
	return r;
}

char *
new_context_str(const char *incon, const char *range) {
	char *rcon = NULL;
	context_t con = context_new(incon);
	if (!con) {
		goto exit;
	}
	context_range_set(con, range);
	rcon = strdup(context_str(con));
	context_free(con);
	if (!rcon) {
		goto exit;
	}

	return rcon;

exit:
	log_error("new_context_str: %s %s", incon, strerror(errno));
	return NULL;
}

char *
find_in_hashtable(const char *range, domain_t *domain, context_map_node_t **table) {
	char *trans = NULL;
	context_map_t *map = find_in_table(table, range);
	if (map) {
		trans = strdup((table == domain->raw_to_trans) ? map->trans: map->raw);
		if (!trans) {
			log_error("find_in_hashtable: allocation error %s", strerror(errno));
			return NULL;
		}
		log_debug(" found %s in hashtable returning %s\n", range, trans);
	}
	return trans;
}

void
emit_whitespace(char*buffer, char *whitespace) {
	strcat(buffer, "[");
	strcat(buffer, whitespace);
	strcat(buffer, "]");
}

static int
string_size(const void *p1, const void *p2) {
	return strlen(*(char **)p2) - strlen(*(char **)p1);
}

static int
word_size(const void *p1, const void *p2) {
	word_t *w1 = *(word_t **)p1;
	word_t *w2 = *(word_t **)p2;
	int w1_len=strlen(w1->text);
	int w2_len=strlen(w2->text);
	if (w1_len == w2_len)
		return strcmp(w1->text, w2->text);
	return (w2_len - w1_len);
}

void
build_regexp(pcre **r, char *buffer) {
	const char *error;
	int error_offset;
	if (*r)
		pcre_free(*r);
	*r = pcre_compile(buffer, PCRE_CASELESS, &error, &error_offset, NULL);
	if (error) {
		log_error("pcre=%s, error=%s\n", buffer, error ? error: "none");
	}
	buffer[0] = '\0';
}

int
build_regexps(domain_t *domain) {
	char buffer[1024 * 128];
	buffer[0] = '\0';
	base_classification_t *bc;
	word_group_t *g;
	affix_t *a;
	word_t *w;
	size_t n_el, i;

	for (n_el = 0, bc = domain->base_classifications; bc; bc = bc->next) {
		n_el++;
	}

	char **sortable = calloc(n_el, sizeof(char *));
	if (!sortable) {
		log_error("allocation error %s", strerror(errno));
		return -1;
	}

	for (i=0, bc = domain->base_classifications; bc; bc = bc->next) {
		sortable[i++] = bc->trans;
	}

	qsort(sortable, n_el, sizeof(char *), string_size);

	for (i = 0; i < n_el; i++) {
		strcat(buffer, sortable[i]);
		if (i < n_el) strcat(buffer,"|");
	}

	free(sortable);

	log_debug(">>> %s classification regexp=%s\n", domain->name, buffer);
	build_regexp(&domain->base_classification_regexp, buffer);

	for (g = domain->groups; g; g = g->next) {
		if (g->prefixes) {
			strcat(buffer,"(?:");
			for (a = g->prefixes; a; a = a->next) {
				strcat(buffer, a->text);
				if (a->next) strcat(buffer,"|");
			}
			strcat(buffer,")");
			strcat(buffer,"[ 	]+");
			log_debug(">>> %s %s prefix regexp=%s\n", domain->name, g->name, buffer);
			build_regexp(&g->prefix_regexp, buffer);
		}

		if (g->prefixes)
			strcat(buffer, "^");
		strcat(buffer, "(?:");

		g->sword_len=0;
		for (w = g->words; w; w = w->next)
			g->sword_len++;

		g->sword = calloc(g->sword_len, sizeof(word_t *));
		if (!g->sword) {
			log_error("allocation error %s", strerror(errno));
			return -1;
		}

		i=0;
		for (w = g->words; w; w = w->next)
			g->sword[i++]=w;

		qsort(g->sword, g->sword_len, sizeof(word_t *), word_size);

		for (i=0; i < g->sword_len; i++) {
			if (i) strcat(buffer,"|");
			strcat(buffer,"\\b");
			strcat(buffer, g->sword[i]->text);
			strcat(buffer,"\\b");
		}

		if (g->whitespace) {
			strcat(buffer,"|[");
			strcat(buffer, g->whitespace);
			strcat(buffer, "]+");
		}

		strcat(buffer, ")+");
		if (g->suffixes)
			strcat(buffer, "$");

		log_debug(">>> %s %s modifier regexp=%s\n", domain->name, g->name, buffer);
		build_regexp(&g->word_regexp, buffer);
		if (g->suffixes) {
			strcat(buffer,"[ 	]+");
			strcat(buffer,"(?:");
			for (a = g->suffixes; a; a = a->next) {
				strcat(buffer, a->text);
				if (a->next) strcat(buffer,"|");
			}
			strcat(buffer,")");
			log_debug(">>> %s %s suffix regexp=%s\n", domain->name, g->name, buffer);
			build_regexp(&g->suffix_regexp, buffer);
		}
	}

	return 0;
}

char *
compute_raw_from_trans(const char *level, domain_t *domain) {

#ifdef DEBUG
	struct timeval startTime;
	gettimeofday(&startTime, 0);
#endif

	int rc = 0;
	int ovector[OVECCOUNT];
	word_group_t *g = NULL;
	char *work = NULL;
	char *r = NULL;
	const char * match = NULL;
	int work_len;
	mls_level_t *mraw = NULL;
	ebitmap_t set, clear, tmp;

	ebitmap_init(&set);
	ebitmap_init(&clear);
	ebitmap_init(&tmp);

	work = strdup(level);
	if (!work) {
		log_error("compute_raw_from_trans: allocation error %s", strerror(errno));
		goto err;
	}
	work_len = strlen(work);

	if (!domain->base_classification_regexp)
		if (build_regexps(domain) < 0)
			goto err;
	if (!domain->base_classification_regexp)
		goto err;
	log_debug(" compute_raw_from_trans work = %s\n", work);
	rc = pcre_exec(domain->base_classification_regexp, 0, work, work_len, 0, PCRE_ANCHORED, ovector, OVECCOUNT);
	if (rc > 0) {
		match = NULL;
		pcre_get_substring(work, ovector, rc, 0, &match);
		log_debug(" compute_raw_from_trans match = %s len = %u\n", match, strlen(match));
		base_classification_t *bc;
		for (bc = domain->base_classifications; bc; bc = bc->next) {
			if (!strcmp(bc->trans, match)) {
				log_debug(" compute_raw_from_trans base classification %s matched %s\n", level, bc->trans);
				mraw = malloc(sizeof(mls_level_t));
				if (!mraw) {
					log_error("allocation error %s", strerror(errno));
					goto err;
				}
				if (mls_level_cpy(mraw, bc->level) < 0)
					goto err;
				break;
			}
		}

		memset(work + ovector[0], '#', ovector[1] - ovector[0]);
		char *p=work + ovector[0] + ovector[1];
		while (*p && (strchr(" 	", *p) != NULL))
			*p++ = '#';
		pcre_free((char *)match);
		match = NULL;
	} else {
		log_debug(" compute_raw_from_trans no base classification matched %s\n", level);
	}

	if (mraw == NULL) {
		goto err;
	}

	int complete = 0;
	int change = 1;
	while(change && !complete) {
		change = 0;
		for (g = domain->groups; g && !change && !complete; g = g->next) {
			int prefix = 0, suffix = 0;
			int prefix_offset = 0, prefix_len = 0;
			int suffix_offset = 0, suffix_len = 0;
			if (g->prefix_regexp) {
				rc = pcre_exec(g->prefix_regexp, 0, work, work_len, 0, 0, ovector, OVECCOUNT);
				if (rc > 0) {
					prefix = 1;
					prefix_offset = ovector[0];
					prefix_len = ovector[1] - ovector[0];
				}
			}
			if (g->suffix_regexp) {
				rc = pcre_exec(g->suffix_regexp, 0, work, work_len, 0, 0, ovector, OVECCOUNT);
				if (rc > 0) {
					suffix = 1;
					suffix_offset = ovector[0];
					suffix_len = ovector[1] - ovector[0];
				}
			}

/* anchors prefix ^, suffix $ */
			if (((!g->prefixes && !g->suffixes) ||
			     (g->prefixes && prefix) ||
			     (g->suffixes && suffix)) &&
			     g->word_regexp) {
				char *s = work + prefix_offset + prefix_len;
				int l = (suffix_len ? suffix_offset : work_len) - prefix_len - prefix_offset;
				rc = pcre_exec(g->word_regexp, 0, s, l, 0, 0, ovector, OVECCOUNT);
				if (rc > 0) {
					match = NULL;
					pcre_get_substring(s, ovector, rc, 0, &match);
					trim((char *)match, g->whitespace);
					if (*match) {
						char *p = triml((char *)match, g->whitespace);
						while (p && *p) {
							int plen = strlen(p);
							unsigned int i;
							for (i = 0; i < g->sword_len; i++) {
								word_t *w = g->sword[i];
								int wlen = strlen(w->text);
								if (plen >= wlen && !strncmp(w->text, p, strlen(w->text))){
									if (ebitmap_andnot(&set, &w->cat, &g->def, maxbit) < 0) goto err;

									if (ebitmap_xor(&tmp, &w->cat, &g->def) < 0) goto err;
									if (ebitmap_and(&clear, &tmp, &g->def) < 0) goto err;
									if (ebitmap_union(&mraw->cat, &set) < 0) goto err;

									ebitmap_destroy(&tmp);
									if (ebitmap_cpy(&tmp, &mraw->cat) < 0) goto err;
									ebitmap_destroy(&mraw->cat);
									if (ebitmap_andnot(&mraw->cat, &tmp, &clear, maxbit) < 0) goto err;

									ebitmap_destroy(&tmp);
									ebitmap_destroy(&set);
									ebitmap_destroy(&clear);
									p += strlen(w->text);
									change++;
									break;
								}
							}
							if (i == g->sword_len) {
								syslog(LOG_ERR, "conversion error");
								break;
							}
							p = triml(p, g->whitespace);
						}
						memset(work + prefix_offset, '#', prefix_len);
						memset(work + suffix_offset, '#', suffix_len);
						memset(s + ovector[0], '#', ovector[1] - ovector[0]);
					}
					pcre_free((void *)match);
					match = NULL;
				}
			}
/* YYY */
			complete=1;
			char *p = work;
			while(*p) {
				if (isalnum(*p++)) {
					complete=0;
					break;
				}
			}
		}
	}
	free(work);
	if (violates_constraints(mraw)) {
		complete = 0;
	}
	if (complete)
		r = mls_level_to_string(mraw);
	mls_level_destroy(mraw);
	free(mraw);

#ifdef DEBUG
	struct timeval stopTime;
	gettimeofday(&stopTime, 0);
	long int ms;
	if (startTime.tv_usec > stopTime.tv_usec)
		ms = (stopTime.tv_sec - startTime.tv_sec - 1) * 1000 + (stopTime.tv_usec/1000 + 1000 - startTime.tv_usec/1000);
	else
		ms = (stopTime.tv_sec - startTime.tv_sec    ) * 1000 + (stopTime.tv_usec/1000        - startTime.tv_usec/1000);
	log_debug(" compute_raw_from_trans in %ld ms'\n", ms);
#endif

	return r;

err:
	mls_level_destroy(mraw);
	free(mraw);
	free(work);
	pcre_free((void *)match);
	ebitmap_destroy(&tmp);
	ebitmap_destroy(&set);
	ebitmap_destroy(&clear);
	return NULL;
}

char *
compute_trans_from_raw(const char *level, domain_t *domain) {

#ifdef DEBUG
	struct timeval startTime;
	gettimeofday(&startTime, 0);
#endif

	word_group_t *g;
	mls_level_t *l = NULL;
	char *rval = NULL;
	word_group_t *groups = NULL;
	ebitmap_t bit_diff, temp, handled, nothandled, unhandled, orig_unhandled;

	ebitmap_init(&bit_diff);
	ebitmap_init(&temp);
	ebitmap_init(&handled);
	ebitmap_init(&nothandled);
	ebitmap_init(&unhandled);
	ebitmap_init(&orig_unhandled);

	if (!level)
		goto err;
	
	l = parse_raw(level);
	if (!l)
		goto err;
	log_debug(" compute_trans_from_raw raw = %s\n", level);

/* YYY */
	/* check constraints */
	if (violates_constraints(l)) {
		syslog(LOG_ERR, "%s violates constraints", level);
		goto err;
	}

	int doInverse = l->sens > 0;

	base_classification_t *bc, *last = NULL;
	int done = 0;
	for (bc = domain->base_classifications; bc && !done; bc = bc->next) {
		if (l->sens == bc->level->sens) {
			/* skip if alias of last bc */
			if (last &&
			    last->level->sens == bc->level->sens &&
			    ebitmap_cmp(&last->level->cat, &bc->level->cat) == 0)
				continue;

			/* compute bits not consumed by base classification */
			if (ebitmap_xor(&unhandled, &l->cat, &bc->level->cat) < 0)
				goto err;
			if (ebitmap_cpy(&orig_unhandled, &unhandled) < 0)
				goto err;

			/* prebuild groups */
			for (g = domain->groups; g; g = g->next) {
				word_group_t **t;
				for (t = &groups; *t; t = &(*t)->next)
					if (!strcmp(g->name, (*t)->name))
						break;

				if (! *t) {
					word_group_t *wg = create_group(&groups, g->name);
					if (g->prefixes)
						if (append(&wg->prefixes, g->prefixes->text) < 0)
							goto err;
					if (g->suffixes)
						if (append(&wg->suffixes, g->suffixes->text) < 0)
							goto err;
					if (g->join)
						if (update(&wg->join, g->join) < 0)
							goto err;
				}
			}

			int loops, hamming, change=1;
			for (loops = 50; ebitmap_cardinality(&unhandled) && loops > 0 && change; loops--) {
				change = 0;
				hamming = 10000;
				if (ebitmap_xor(&handled, &unhandled, &orig_unhandled) < 0)
					goto err;
				if (ebitmap_not(&nothandled, &handled, maxbit) < 0)
					goto err;
				word_group_t *currentGroup = NULL;
				word_t *currentWord = NULL;
				for (g = domain->groups; g && hamming; g = g->next) {
					word_t *w;
					for (w = g->words; w && hamming; w = w->next) {
						int cardinality = ebitmap_cardinality(&w->normal);
						/* If the word is all inverse bits and the level does not have inverse bits - skip */
						if (cardinality && !doInverse) {
							continue;
						}

						/* if only unhandled bits are different */
						if (ebitmap_or(&temp, &w->normal, &w->inverse) < 0)
							goto err;
						if (ebitmap_and(&bit_diff, &temp, &nothandled) < 0)
							goto err;
						ebitmap_destroy(&temp);
// xor bit_diff handled?
						if (ebitmap_and(&temp, &bit_diff, &unhandled) < 0)
							goto err;
						if (ebitmap_cmp(&bit_diff, &temp)) {
							int h = ebitmap_hamming_distance(&bit_diff, &unhandled);
							if (h < hamming) {
								hamming = h;
								currentGroup = g;
								currentWord = w;
							}
						}
						ebitmap_destroy(&bit_diff);
						ebitmap_destroy(&temp);
					}
				}
				ebitmap_destroy(&handled);
				ebitmap_destroy(&nothandled);

				if (currentWord) {
					if (ebitmap_xor(&bit_diff, &currentWord->cat, &bc->level->cat) < 0)
						goto err;

					if (ebitmap_cpy(&temp, &unhandled) < 0)
						goto err;
					ebitmap_destroy(&unhandled);
					if (ebitmap_andnot(&unhandled, &temp, &bit_diff, maxbit) < 0)
						goto err;

					ebitmap_destroy(&bit_diff);
					ebitmap_destroy(&temp);

					word_group_t **t;
					for (t = &groups; *t; t = &(*t)->next)
						if (!strcmp(currentGroup->name, (*t)->name))
							break;
					create_word(&(*t)->words, currentWord->text);
					change++;
				}
			}

			done = (ebitmap_cardinality(&unhandled) == 0);
			ebitmap_destroy(&unhandled);
			ebitmap_destroy(&orig_unhandled);
			if (done) {
				char buffer[9999];
				buffer[0] = 0;
				strcat(buffer, bc->trans);
				strcat(buffer, " ");
				for (g=groups; g; g = g->next) {
					if (g->words && g->prefixes) {
						strcat(buffer, g->prefixes->text);
						strcat(buffer, " ");
					}
					word_t *w;
					for (w=g->words; w; w = w->next) {
						strcat(buffer, w->text);
						if (w->next)
							strcat(buffer, g->join);
					}
					if (g->words && g->suffixes) {
						strcat(buffer, " ");
						strcat(buffer, g->suffixes->text);
					}
					word_group_t *n = g->next;
					while(g->words && n) {
						if (n->words) {
							strcat(buffer, " ");
							break;
						}
						n = n->next;
					}
				}
				rval = strdup(buffer);
				if (!rval) {
					log_error("compute_trans_from_raw: allocation error %s", strerror(errno));
					goto err;
				}
			}
			/* clean up */
			while (groups)
				destroy_group(&groups, groups);
		}
		last = bc;
	}
	if (l) {
		mls_level_destroy(l);
		free(l);
	}

#ifdef DEBUG
	struct timeval stopTime;
	gettimeofday(&stopTime, 0);
	long int ms;
	if (startTime.tv_usec > stopTime.tv_usec)
		ms = (stopTime.tv_sec - startTime.tv_sec - 1) * 1000 + (stopTime.tv_usec/1000 + 1000 - startTime.tv_usec/1000);
	else
		ms = (stopTime.tv_sec - startTime.tv_sec    ) * 1000 + (stopTime.tv_usec/1000        - startTime.tv_usec/1000);

	log_debug(" compute_trans_from_raw in %ld ms'\n", ms);
#endif

	return rval;

err:
	while (groups)
		destroy_group(&groups, groups);
	mls_level_destroy(l);
	free(l);
	return NULL;
}

int
trans_context(const char *incon, char **rcon) {
	char *trans = NULL;
	*rcon = NULL;

#ifdef DEBUG
	struct timeval startTime;
	gettimeofday(&startTime, 0);
#endif

	log_debug(" trans_context input = %s\n", incon);
	char *range = extract_range(incon);
	if (!range) return -1;

	domain_t *domain = domains;
	for (;domain; domain = domain->next) {
		trans = find_in_hashtable(range, domain, domain->raw_to_trans);
		if (trans) break;

		/* try split and translate */
		char *lrange = NULL, *urange = NULL;
		char *ltrans = NULL, *utrans = NULL;
		char *dashp = strchr(range,'-');
		if (dashp) {
			*dashp = 0;
			lrange = range;
			urange = dashp+1;
		} else {
			trans = compute_trans_from_raw(range, domain);
			if (trans)
				if (add_cache(domain, range, trans) < 0) {
					free(range);
					return -1;
				}
		}

		if (lrange && urange) {
			ltrans = find_in_hashtable(lrange, domain, domain->raw_to_trans);
			if (! ltrans) {
				ltrans = compute_trans_from_raw(lrange, domain);
				if (ltrans) {
					if (add_cache(domain, lrange, ltrans) < 0) {
						free(range);
						return -1;
					}
				} else {
					ltrans = strdup(lrange);
					if (! ltrans) {
						log_error("strdup failed %s", strerror(errno));
						free(range);
						return -1;
					}
				}
			}

			utrans = find_in_hashtable(urange, domain, domain->raw_to_trans);
			if (! utrans) {
				utrans = compute_trans_from_raw(urange, domain);
				if (utrans) {
					if (add_cache(domain, urange, utrans) < 0) {
						free(ltrans);
						free(range);
						return -1;
					}
				} else {
					utrans = strdup(urange);
					if (! utrans) {
						log_error("strdup failed %s", strerror(errno));
						free(ltrans);
						free(range);
						return -1;
					}
				}
			}

			if (strcmp(ltrans, utrans) == 0) {
				if (asprintf(&trans, "%s", ltrans) < 0) {
					log_error("asprintf failed %s", strerror(errno));
					free(utrans);
					free(ltrans);
					free(range);
					return -1;
				}
			} else {
				if (asprintf(&trans, "%s-%s", ltrans, utrans) < 0) {
					log_error("asprintf failed %s", strerror(errno));
					free(utrans);
					free(ltrans);
					free(range);
					return -1;
				}
			}
			free(ltrans);
			free(utrans);
			*dashp = '-';
			break;
		}
		if (dashp)
			*dashp = '-';
	}

	if (trans) {
		*rcon = new_context_str(incon, trans);
		free(trans);
	} else {
		*rcon = new_context_str(incon, range);
	}
	free(range);

#ifdef DEBUG
	struct timeval stopTime;
	gettimeofday(&stopTime, 0);
	long int ms;
	if (startTime.tv_usec > stopTime.tv_usec)
		ms = (stopTime.tv_sec - startTime.tv_sec - 1) * 1000 + (stopTime.tv_usec/1000 + 1000 - startTime.tv_usec/1000);
	else
		ms = (stopTime.tv_sec - startTime.tv_sec    ) * 1000 + (stopTime.tv_usec/1000        - startTime.tv_usec/1000);

	log_debug(" trans_context input='%s' output='%s in %ld ms'\n", incon, *rcon, ms);
#endif
	return 0;
}

int
untrans_context(const char *incon, char **rcon) {
	char *raw = NULL;
	*rcon = NULL;

#ifdef DEBUG
	struct timeval startTime;
	gettimeofday(&startTime, 0);
#endif

	log_debug(" untrans_context incon = %s\n", incon);
	char *range = extract_range(incon);
	if (!range) return -1;
	log_debug(" untrans_context range = %s\n", range);

	domain_t *domain = domains;
	for (;domain; domain = domain->next) {
		raw = find_in_hashtable(range, domain, domain->trans_to_raw);
		if (raw) break;

		/* try split and translate */
		char *lrange = NULL, *urange = NULL;
		char *lraw = NULL, *uraw = NULL;
		char *dashp = strchr(range,'-');
		if (dashp) {
			*dashp = 0;
			lrange = range;
			urange = dashp+1;
		} else {
			raw = compute_raw_from_trans(range, domain);
			if (raw) {
				char *canonical = find_in_hashtable(raw, domain, domain->raw_to_trans);
				if (!canonical) {
					canonical = compute_trans_from_raw(raw, domain);
					if (canonical && strcmp(canonical, range))
						if (add_cache(domain, raw, canonical) < 0) {
							free(range);
							return -1;
						}
				}
				if (canonical)
					free(canonical);
				if (add_cache(domain, raw, range) < 0) {
					free(range);
					return -1;
				}
			} else {
				log_debug("untrans_context unable to compute raw context %s\n", range);
			}
		}

		if (lrange && urange) {
			lraw = find_in_hashtable(lrange, domain, domain->trans_to_raw);
			if (! lraw) {
				lraw = compute_raw_from_trans(lrange, domain);
				if (lraw) {
					char *canonical = find_in_hashtable(lraw, domain, domain->raw_to_trans);
					if (!canonical) {
						canonical = compute_trans_from_raw(lraw, domain);
						if (canonical)
							if (add_cache(domain, lraw, canonical) < 0) {
								free(lraw);
								free(range);
								return -1;
							}
					}
					if (canonical)
						free(canonical);
					if (add_cache(domain, lraw, lrange) < 0) {
						free(lraw);
						free(range);
						return -1;
					}
				} else {
					lraw = strdup(lrange);
					if (! lraw) {
						log_error("strdup failed %s", strerror(errno));
						free(range);
						return -1;
					}
				}
			}

			uraw = find_in_hashtable(urange, domain, domain->trans_to_raw);
			if (! uraw) {
				uraw = compute_raw_from_trans(urange, domain);
				if (uraw) {
					char *canonical = find_in_hashtable(uraw, domain, domain->raw_to_trans);
					if (!canonical) {
						canonical = compute_trans_from_raw(uraw, domain);
						if (canonical)
							if (add_cache(domain, uraw, canonical) < 0) {
								free(uraw);
								free(lraw);
								free(range);
								return -1;
							}
					}
					if (canonical)
						free(canonical);
					if (add_cache(domain, uraw, urange) < 0) {
						free(uraw);
						free(lraw);
						free(range);
						return -1;
					}
				} else {
					uraw = strdup(urange);
					if (! uraw) {
						log_error("strdup failed %s", strerror(errno));
						free(lraw);
						free(range);
						return -1;
					}
				}
			}


			if (strcmp(lraw, uraw) == 0) {
				if (asprintf(&raw, "%s", lraw) < 0) {
					log_error("asprintf failed %s", strerror(errno));
					free(uraw);
					free(lraw);
					free(range);
					return -1;
				}
			} else {
				if (asprintf(&raw, "%s-%s", lraw, uraw) < 0) {
					log_error("asprintf failed %s", strerror(errno));
					free(uraw);
					free(lraw);
					free(range);
					return -1;
				}
			}
			free(lraw);
			free(uraw);
			*dashp = '-';
			break;
		}
		if (dashp)
			*dashp = '-';
	}

	if (raw) {
		*rcon = new_context_str(incon, raw);
		free(raw);
	} else {
		*rcon = new_context_str(incon, range);
	}
	free(range);

#ifdef DEBUG
	struct timeval stopTime;
	gettimeofday(&stopTime, 0);
	long int ms;
	if (startTime.tv_usec > stopTime.tv_usec)
		ms = (stopTime.tv_sec - startTime.tv_sec - 1) * 1000 + (stopTime.tv_usec/1000 + 1000 - startTime.tv_usec/1000);
	else
		ms = (stopTime.tv_sec - startTime.tv_sec    ) * 1000 + (stopTime.tv_usec/1000        - startTime.tv_usec/1000);

	log_debug(" untrans_context input='%s' output='%s' n %ld ms\n", incon, *rcon, ms);
#endif
	return 0;
}

void
finish_context_translations(void) {
	while(domains) {
		domain_t *next = domains->next;
		destroy_domain(domains);
		domains = next;
	}
	while(sens_constraints) {
		sens_constraint_t *next = sens_constraints->next;
		destroy_sens_constraint(&sens_constraints, sens_constraints);
		sens_constraints = next;
	}
	while(cat_constraints) {
		cat_constraint_t *next = cat_constraints->next;
		destroy_cat_constraint(&cat_constraints, cat_constraints);
		cat_constraints = next;
	}
}

