Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aho corasick Algorithm Added #42

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions Aho-Corasick/Aho_Corasick.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// C++ program for implementation of Aho Corasick algorithm
// for string matching
using namespace std;
#include <bits/stdc++.h>

// Max number of states in the matching machine.
// Should be equal to the sum of the length of all keywords.
const int MAXS = 500;

// Maximum number of characters in input alphabet
const int MAXC = 26;

// OUTPUT FUNCTION IS IMPLEMENTED USING out[]
// Bit i in this mask is one if the word with index i
// appears when the machine enters this state.
int out[MAXS];

// FAILURE FUNCTION IS IMPLEMENTED USING f[]
int f[MAXS];

// GOTO FUNCTION (OR TRIE) IS IMPLEMENTED USING g[][]
int g[MAXS][MAXC];

// Builds the string matching machine.
// arr - array of words. The index of each keyword is important:
// "out[state] & (1 << i)" is > 0 if we just found word[i]
// in the text.
// Returns the number of states that the built machine has.
// States are numbered 0 up to the return value - 1, inclusive.
int buildMatchingMachine(string arr[], int k)
{
// Initialize all values in output function as 0.
memset(out, 0, sizeof out);

// Initialize all values in goto function as -1.
memset(g, -1, sizeof g);

// Initially, we just have the 0 state
int states = 1;

// Construct values for goto function, i.e., fill g[][]
// This is same as building a Trie for arr[]
for (int i = 0; i < k; ++i)
{
const string &word = arr[i];
int currentState = 0;

// Insert all characters of current word in arr[]
for (int j = 0; j < word.size(); ++j)
{
int ch = word[j] - 'a';

// Allocate a new node (create a new state) if a
// node for ch doesn't exist.
if (g[currentState][ch] == -1)
g[currentState][ch] = states++;

currentState = g[currentState][ch];
}

// Add current word in output function
out[currentState] |= (1 << i);
}

// For all characters which don't have an edge from
// root (or state 0) in Trie, add a goto edge to state
// 0 itself
for (int ch = 0; ch < MAXC; ++ch)
if (g[0][ch] == -1)
g[0][ch] = 0;

// Now, let's build the failure function

// Initialize values in fail function
memset(f, -1, sizeof f);

// Failure function is computed in breadth first order
// using a queue
queue<int> q;

// Iterate over every possible input
for (int ch = 0; ch < MAXC; ++ch)
{
// All nodes of depth 1 have failure function value
// as 0. For example, in above diagram we move to 0
// from states 1 and 3.
if (g[0][ch] != 0)
{
f[g[0][ch]] = 0;
q.push(g[0][ch]);
}
}

// Now queue has states 1 and 3
while (q.size())
{
// Remove the front state from queue
int state = q.front();
q.pop();

// For the removed state, find failure function for
// all those characters for which goto function is
// not defined.
for (int ch = 0; ch <= MAXC; ++ch)
{
// If goto function is defined for character 'ch'
// and 'state'
if (g[state][ch] != -1)
{
// Find failure state of removed state
int failure = f[state];

// Find the deepest node labeled by proper
// suffix of string from root to current
// state.
while (g[failure][ch] == -1)
failure = f[failure];

failure = g[failure][ch];
f[g[state][ch]] = failure;

// Merge output values
out[g[state][ch]] |= out[failure];

// Insert the next level node (of Trie) in Queue
q.push(g[state][ch]);
}
}
}

return states;
}

// Returns the next state the machine will transition to using goto
// and failure functions.
// currentState - The current state of the machine. Must be between
// 0 and the number of states - 1, inclusive.
// nextInput - The next character that enters into the machine.
int findNextState(int currentState, char nextInput)
{
int answer = currentState;
int ch = nextInput - 'a';

// If goto is not defined, use failure function
while (g[answer][ch] == -1)
answer = f[answer];

return g[answer][ch];
}

// This function finds all occurrences of all array words
// in text.
void searchWords(string arr[], int k, string text)
{
// Preprocess patterns.
// Build machine with goto, failure and output functions
buildMatchingMachine(arr, k);

// Initialize current state
int currentState = 0;

// Traverse the text through the nuilt machine to find
// all occurrences of words in arr[]
for (int i = 0; i < text.size(); ++i)
{
currentState = findNextState(currentState, text[i]);

// If match not found, move to next state
if (out[currentState] == 0)
continue;

// Match found, print all matching words of arr[]
// using output function.
for (int j = 0; j < k; ++j)
{
if (out[currentState] & (1 << j))
{
cout << "Word " << arr[j] << " appears from "
<< i - arr[j].size() + 1 << " to " << i << endl;
}
}
}
}

// Driver program to test above
int main()
{
string arr[] = {"he", "she", "hers", "his"};
string text = "ahishers";
int k = sizeof(arr) / sizeof(arr[0]);

searchWords(arr, k, text);

return 0;
}
92 changes: 92 additions & 0 deletions Dijkstra’s shortest path algorithm/Dijkstra.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// A C++ program for Dijkstra's single source shortest path algorithm.
// The program is for adjacency matrix representation of the graph

#include <stdio.h>
#include <limits.h>

// Number of vertices in the graph
#define V 9

// A utility function to find the vertex with minimum distance value, from
// the set of vertices not yet included in shortest path tree
int minDistance(int dist[], bool sptSet[])
{
// Initialize min value
int min = INT_MAX, min_index;

for (int v = 0; v < V; v++)
if (sptSet[v] == false && dist[v] <= min)
min = dist[v], min_index = v;

return min_index;
}

// A utility function to print the constructed distance array
int printSolution(int dist[], int n)
{
printf("Vertex Distance from Source\n");
for (int i = 0; i < V; i++)
printf("%d tt %d\n", i, dist[i]);
}

// Function that implements Dijkstra's single source shortest path algorithm
// for a graph represented using adjacency matrix representation
void dijkstra(int graph[V][V], int src)
{
int dist[V]; // The output array. dist[i] will hold the shortest
// distance from src to i

bool sptSet[V]; // sptSet[i] will true if vertex i is included in shortest
// path tree or shortest distance from src to i is finalized

// Initialize all distances as INFINITE and stpSet[] as false
for (int i = 0; i < V; i++)
dist[i] = INT_MAX, sptSet[i] = false;

// Distance of source vertex from itself is always 0
dist[src] = 0;

// Find shortest path for all vertices
for (int count = 0; count < V-1; count++)
{
// Pick the minimum distance vertex from the set of vertices not
// yet processed. u is always equal to src in the first iteration.
int u = minDistance(dist, sptSet);

// Mark the picked vertex as processed
sptSet[u] = true;

// Update dist value of the adjacent vertices of the picked vertex.
for (int v = 0; v < V; v++)

// Update dist[v] only if is not in sptSet, there is an edge from
// u to v, and total weight of path from src to v through u is
// smaller than current value of dist[v]
if (!sptSet[v] && graph[u][v] && dist[u] != INT_MAX
&& dist[u]+graph[u][v] < dist[v])
dist[v] = dist[u] + graph[u][v];
}

// print the constructed distance array
printSolution(dist, V);
}

// driver program to test above function
int main()
{
/* Let us create the example graph discussed above */
int graph[V][V] = {{0, 4, 0, 0, 0, 0, 0, 8, 0},
{4, 0, 8, 0, 0, 0, 0, 11, 0},
{0, 8, 0, 7, 0, 4, 0, 0, 2},
{0, 0, 7, 0, 9, 14, 0, 0, 0},
{0, 0, 0, 9, 0, 10, 0, 0, 0},
{0, 0, 4, 14, 10, 0, 2, 0, 0},
{0, 0, 0, 0, 0, 2, 0, 1, 6},
{8, 11, 0, 0, 0, 0, 1, 0, 7},
{0, 0, 2, 0, 0, 0, 6, 7, 0}
};

dijkstra(graph, 0);

return 0;
}
95 changes: 95 additions & 0 deletions KMP/kmp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// C++ program for implementation of KMP pattern searching
// algorithm
#include <bits/stdc++.h>

void computeLPSArray(char *pat, int M, int *lps);

// Prints occurrences of txt[] in pat[]
void KMPSearch(char *pat, char *txt)
{
int M = strlen(pat);
int N = strlen(txt);

// create lps[] that will hold the longest prefix suffix
// values for pattern
int lps[M];

// Preprocess the pattern (calculate lps[] array)
computeLPSArray(pat, M, lps);

int i = 0; // index for txt[]
int j = 0; // index for pat[]
while (i < N)
{
if (pat[j] == txt[i])
{
j++;
i++;
}

if (j == M)
{
printf("Found pattern at index %d ", i - j);
j = lps[j - 1];
}

// mismatch after j matches
else if (i < N && pat[j] != txt[i])
{
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
}

// Fills lps[] for given patttern pat[0..M-1]
void computeLPSArray(char *pat, int M, int *lps)
{
// length of the previous longest prefix suffix
int len = 0;

lps[0] = 0; // lps[0] is always 0

// the loop calculates lps[i] for i = 1 to M-1
int i = 1;
while (i < M)
{
if (pat[i] == pat[len])
{
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
// This is tricky. Consider the example.
// AAACAAAA and i = 7. The idea is similar
// to search step.
if (len != 0)
{
len = lps[len - 1];

// Also, note that we do not increment
// i here
}
else // if (len == 0)
{
lps[i] = 0;
i++;
}
}
}
}

// Driver program to test above function
int main()
{
char txt[] = "ABABDABACDABABCABAB";
char pat[] = "ABABCABAB";
KMPSearch(pat, txt);
return 0;
}