#include #include inline int max(int a, int b) {return a > b ? a : b;} /* tools for diagnostics and illustration */ int count; /* count of number of character comparisons used */ int count_init(); int count_n_show(char* a, char* p, int i, int j); const MaxPatternLength = 100; /* tools for the two string search heuristics used in the Boyer-Moore alg. */ int skip[256]; void initskip(char* p); int shift[MaxPatternLength]; void initshift(char* p); int bmsearch(char* p, char* a){ /* Boyer-Moore string search. Returns index of first occurrence of string p in a. Returns length of a if there is no occurrence of p in a. */ int M = strlen(p), M1 = M - 1, N = strlen(a), right_end = M1, /* position in a */ i; /* offset from right in a and p */ initskip(p); initshift(p); count_init(); while (right_end < N){ /* 1. Get the offset from right of the first non-match */ for (i = 0; i < M && count_n_show(a, p, right_end - i, M1 - i) && a[right_end - i] == p[M1 - i]; i++); if (i == M) return right_end - M1; /* because a match has been found */ /* 2. Figure skip right that would align current character in a with the rightmost occurrence (if any) of that character in p */ int sk = skip[a[right_end - i]]; /* 3. Figure shift right that would align the current matched initial segment with the next such segment in p */ int sh = shift[i]; /* 4. Perform the largest shift to the right determined by 2. or 3. above */ right_end = max(right_end - i + sk, right_end + sh); } return N; /* because no match was found */ } void initskip(char* p){ /* initializes the skip array. skip[c] = j if the rightmost occurrence of char c in p is in location j. skip[c] = length of p if there is no occurrence of c. */ int M = strlen(p); int M1 = M - 1; for (int i = 0; i < 256; i++) skip[i] = M; for (i = 0; p[i]; i++) skip[p[i]] = M1 - i; } void initshift(char* p){ /* initializes the shift array for the "pattern" string p. */ int i, j, M = strlen(p); int M1 = M - 1; int len[MaxPatternLength]; /* 1. Set len[i] = number of characters right ended at i which match the characters right ended at p[M1]. */ for (i = 1; i < M; i++){ for (j = 0; j < M && p[M1 - j] == p[M1 - i - j]; j++); len[i] = j; } /* 2. If p[M1] does not occur again in p, then this initialization would be the proper shift */ shift[0] = 1; for (i = 1; i < M; i++) shift[i] = M; /* 3. Fix up by shifting to the rightmost occurrence of the matched chars */ for (i = M1; i > 0; i--) shift[len[i]] = i; /* 4. Fix up by considering matches that would run off the end of the pattern */ int ended = 0; for (i = 0; i < M; i++){ if (len[i] == M1 - i) ended = i; if (ended) shift[i] = ended; } } int main(){ /* tests bmsearch */ int index; char* p = "bad"; char* a = "Tad had had \"had\" where Gladis had had \"had had.\" Too bad Tad hadn't had \"had had.\"\n"; cout << "\nGiven pattern \n\"" << p << "\" and text \n\"" << a << "\"\n"; index = bmsearch(p, a); cout << "bmsearch returns " << index << ", M is " << strlen(p) << ", N is " << strlen(a) << "\n"; a = "hello, world"; cout << " Given pattern \n\"" << p << "\" and text \n\"" << a << "\"\n"; index = bmsearch(p, a); cout << "bmsearch returns " << index << ", M is " << strlen(p) << ", N is " << strlen(a) << "\n"; a = "Goodbye, cruel world"; cout << " Given pattern \n\"" << p << "\" and text \n\"" << a << "\"\n"; index = bmsearch(p, a); cout << "bmsearch returns " << index << ", M is " << strlen(p) << ", N is " << strlen(a) << "\n"; } int count_init() {count = 0;} /* initialize for diagnostics of search */ int count_n_show(char* a, char* p, int i, int j){ /* diagnostics of search */ count++; cout << "comparison of a[" << i << "] = " << a[i] ; cout << " with p[" << j << "] = " << p[j] << "\n"; 1; /* return true */ }