It uses only the first table from Boyer-Moore, which is why it's easy to implement.
First, here's the function which build the table:
1: const int SIZE = 255;
2:
3: void computeTable(int arr[], char* pattern, int len) {
4: for (int i = 0; i < SIZE; i++) {
5: arr[i] = len;
6: }
7: for (int i = 0; i < len - 1; i++) {
8: arr[pattern[i]] = len - i - 1;
9: }
10: }
And the actual string matching uses the table which we build above:
1: int findOccurences(char* pattern, int patternLen, char* str, int strLen, int* table) {
2: int i = 0;
3: int pos = 0;
4: int last = patternLen - 1;
5: while (patternLen <= strLen) {
6: for (i = last; str[i] == pattern[i] && i >= 0; i--);
7: if (i == -1)
8: //return pos;
9: cout << "found one at position: " << pos << endl;
10: if (strLen <= table[str[last]])
11: return -1;
12: int offset = table[str[last]];
13: str += offset;
14: strLen -= offset;
15: pos += offset;
16: }
17: return -1;
18: }
You can use it simply by typing this:
1: int main() {
2: char pattern[] = "gcagagag";
3: char str[] = "gcatcgcagagagtatacagtacg";
4: int patternSize = sizeof(pattern) - 1;
5: int strSize = sizeof(str) - 1;
6: int arr[SIZE];
7: computeTable(arr, pattern, patternSize);
8: cout << "Finding occurences... " << endl;
9: cout << findOccurences(pattern, patternSize, str, strSize, arr) << endl;
10: return 0;
11: }
No comments:
Post a Comment