| /* |
| * utils.c for libdivsufsort |
| * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person |
| * obtaining a copy of this software and associated documentation |
| * files (the "Software"), to deal in the Software without |
| * restriction, including without limitation the rights to use, |
| * copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following |
| * conditions: |
| * |
| * The above copyright notice and this permission notice shall be |
| * included in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| #include "divsufsort_private.h" |
| |
| |
| /*- Private Function -*/ |
| |
| /* Binary search for inverse bwt. */ |
| static |
| saidx_t |
| binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) { |
| saidx_t half, i; |
| for(i = 0, half = size >> 1; |
| 0 < size; |
| size = half, half >>= 1) { |
| if(A[i + half] < value) { |
| i += half + 1; |
| half -= (size & 1) ^ 1; |
| } |
| } |
| return i; |
| } |
| |
| |
| /*- Functions -*/ |
| |
| /* Burrows-Wheeler transform. */ |
| saint_t |
| bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, |
| saidx_t n, saidx_t *idx) { |
| saidx_t *A, i, j, p, t; |
| saint_t c; |
| |
| /* Check arguments. */ |
| if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } |
| if(n <= 1) { |
| if(n == 1) { U[0] = T[0]; } |
| *idx = n; |
| return 0; |
| } |
| |
| if((A = SA) == NULL) { |
| i = divbwt(T, U, NULL, n); |
| if(0 <= i) { *idx = i; i = 0; } |
| return (saint_t)i; |
| } |
| |
| /* BW transform. */ |
| if(T == U) { |
| t = n; |
| for(i = 0, j = 0; i < n; ++i) { |
| p = t - 1; |
| t = A[i]; |
| if(0 <= p) { |
| c = T[j]; |
| U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; |
| A[j] = c; |
| j++; |
| } else { |
| *idx = i; |
| } |
| } |
| p = t - 1; |
| if(0 <= p) { |
| c = T[j]; |
| U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; |
| A[j] = c; |
| } else { |
| *idx = i; |
| } |
| } else { |
| U[0] = T[n - 1]; |
| for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } |
| *idx = i + 1; |
| for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; } |
| } |
| |
| if(SA == NULL) { |
| /* Deallocate memory. */ |
| free(A); |
| } |
| |
| return 0; |
| } |
| |
| /* Inverse Burrows-Wheeler transform. */ |
| saint_t |
| inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A, |
| saidx_t n, saidx_t idx) { |
| saidx_t C[ALPHABET_SIZE]; |
| sauchar_t D[ALPHABET_SIZE]; |
| saidx_t *B; |
| saidx_t i, p; |
| saint_t c, d; |
| |
| /* Check arguments. */ |
| if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || |
| (n < idx) || ((0 < n) && (idx == 0))) { |
| return -1; |
| } |
| if(n <= 1) { return 0; } |
| |
| if((B = A) == NULL) { |
| /* Allocate n*sizeof(saidx_t) bytes of memory. */ |
| if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } |
| } |
| |
| /* Inverse BW transform. */ |
| for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } |
| for(i = 0; i < n; ++i) { ++C[T[i]]; } |
| for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) { |
| p = C[c]; |
| if(0 < p) { |
| C[c] = i; |
| D[d++] = (sauchar_t)c; |
| i += p; |
| } |
| } |
| for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } |
| for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; } |
| for(c = 0; c < d; ++c) { C[c] = C[D[c]]; } |
| for(i = 0, p = idx; i < n; ++i) { |
| U[i] = D[binarysearch_lower(C, d, p)]; |
| p = B[p - 1]; |
| } |
| |
| if(A == NULL) { |
| /* Deallocate memory. */ |
| free(B); |
| } |
| |
| return 0; |
| } |
| |
| /* Checks the suffix array SA of the string T. */ |
| saint_t |
| sufcheck(const sauchar_t *T, const saidx_t *SA, |
| saidx_t n, saint_t verbose) { |
| saidx_t C[ALPHABET_SIZE]; |
| saidx_t i, p, q, t; |
| saint_t c; |
| |
| if(verbose) { fprintf(stderr, "sufcheck: "); } |
| |
| /* Check arguments. */ |
| if((T == NULL) || (SA == NULL) || (n < 0)) { |
| if(verbose) { fprintf(stderr, "Invalid arguments.\n"); } |
| return -1; |
| } |
| if(n == 0) { |
| if(verbose) { fprintf(stderr, "Done.\n"); } |
| return 0; |
| } |
| |
| /* check range: [0..n-1] */ |
| for(i = 0; i < n; ++i) { |
| if((SA[i] < 0) || (n <= SA[i])) { |
| if(verbose) { |
| fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" |
| " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", |
| n - 1, i, SA[i]); |
| } |
| return -2; |
| } |
| } |
| |
| /* check first characters. */ |
| for(i = 1; i < n; ++i) { |
| if(T[SA[i - 1]] > T[SA[i]]) { |
| if(verbose) { |
| fprintf(stderr, "Suffixes in wrong order.\n" |
| " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" |
| " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", |
| i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); |
| } |
| return -3; |
| } |
| } |
| |
| /* check suffixes. */ |
| for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } |
| for(i = 0; i < n; ++i) { ++C[T[i]]; } |
| for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) { |
| t = C[i]; |
| C[i] = p; |
| p += t; |
| } |
| |
| q = C[T[n - 1]]; |
| C[T[n - 1]] += 1; |
| for(i = 0; i < n; ++i) { |
| p = SA[i]; |
| if(0 < p) { |
| c = T[--p]; |
| t = C[c]; |
| } else { |
| c = T[p = n - 1]; |
| t = q; |
| } |
| if((t < 0) || (p != SA[t])) { |
| if(verbose) { |
| fprintf(stderr, "Suffix in wrong position.\n" |
| " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" |
| " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", |
| t, (0 <= t) ? SA[t] : -1, i, SA[i]); |
| } |
| return -4; |
| } |
| if(t != q) { |
| ++C[c]; |
| if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } |
| } |
| } |
| |
| if(1 <= verbose) { fprintf(stderr, "Done.\n"); } |
| return 0; |
| } |
| |
| |
| static |
| int |
| _compare(const sauchar_t *T, saidx_t Tsize, |
| const sauchar_t *P, saidx_t Psize, |
| saidx_t suf, saidx_t *match) { |
| saidx_t i, j; |
| saint_t r; |
| for(i = suf + *match, j = *match, r = 0; |
| (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } |
| *match = j; |
| return (r == 0) ? -(j != Psize) : r; |
| } |
| |
| /* Search for the pattern P in the string T. */ |
| saidx_t |
| sa_search(const sauchar_t *T, saidx_t Tsize, |
| const sauchar_t *P, saidx_t Psize, |
| const saidx_t *SA, saidx_t SAsize, |
| saidx_t *idx) { |
| saidx_t size, lsize, rsize, half; |
| saidx_t match, lmatch, rmatch; |
| saidx_t llmatch, lrmatch, rlmatch, rrmatch; |
| saidx_t i, j, k; |
| saint_t r; |
| |
| if(idx != NULL) { *idx = -1; } |
| if((T == NULL) || (P == NULL) || (SA == NULL) || |
| (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } |
| if((Tsize == 0) || (SAsize == 0)) { return 0; } |
| if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; } |
| |
| for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; |
| 0 < size; |
| size = half, half >>= 1) { |
| match = MIN(lmatch, rmatch); |
| r = _compare(T, Tsize, P, Psize, SA[i + half], &match); |
| if(r < 0) { |
| i += half + 1; |
| half -= (size & 1) ^ 1; |
| lmatch = match; |
| } else if(r > 0) { |
| rmatch = match; |
| } else { |
| lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; |
| |
| /* left part */ |
| for(llmatch = lmatch, lrmatch = match, half = lsize >> 1; |
| 0 < lsize; |
| lsize = half, half >>= 1) { |
| lmatch = MIN(llmatch, lrmatch); |
| r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); |
| if(r < 0) { |
| j += half + 1; |
| half -= (lsize & 1) ^ 1; |
| llmatch = lmatch; |
| } else { |
| lrmatch = lmatch; |
| } |
| } |
| |
| /* right part */ |
| for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1; |
| 0 < rsize; |
| rsize = half, half >>= 1) { |
| rmatch = MIN(rlmatch, rrmatch); |
| r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); |
| if(r <= 0) { |
| k += half + 1; |
| half -= (rsize & 1) ^ 1; |
| rlmatch = rmatch; |
| } else { |
| rrmatch = rmatch; |
| } |
| } |
| |
| break; |
| } |
| } |
| |
| if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } |
| return k - j; |
| } |
| |
| /* Search for the character c in the string T. */ |
| saidx_t |
| sa_simplesearch(const sauchar_t *T, saidx_t Tsize, |
| const saidx_t *SA, saidx_t SAsize, |
| saint_t c, saidx_t *idx) { |
| saidx_t size, lsize, rsize, half; |
| saidx_t i, j, k, p; |
| saint_t r; |
| |
| if(idx != NULL) { *idx = -1; } |
| if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } |
| if((Tsize == 0) || (SAsize == 0)) { return 0; } |
| |
| for(i = j = k = 0, size = SAsize, half = size >> 1; |
| 0 < size; |
| size = half, half >>= 1) { |
| p = SA[i + half]; |
| r = (p < Tsize) ? T[p] - c : -1; |
| if(r < 0) { |
| i += half + 1; |
| half -= (size & 1) ^ 1; |
| } else if(r == 0) { |
| lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; |
| |
| /* left part */ |
| for(half = lsize >> 1; |
| 0 < lsize; |
| lsize = half, half >>= 1) { |
| p = SA[j + half]; |
| r = (p < Tsize) ? T[p] - c : -1; |
| if(r < 0) { |
| j += half + 1; |
| half -= (lsize & 1) ^ 1; |
| } |
| } |
| |
| /* right part */ |
| for(half = rsize >> 1; |
| 0 < rsize; |
| rsize = half, half >>= 1) { |
| p = SA[k + half]; |
| r = (p < Tsize) ? T[p] - c : -1; |
| if(r <= 0) { |
| k += half + 1; |
| half -= (rsize & 1) ^ 1; |
| } |
| } |
| |
| break; |
| } |
| } |
| |
| if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } |
| return k - j; |
| } |