-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLexicalAnalyzer_constant.py
69 lines (43 loc) · 1.7 KB
/
LexicalAnalyzer_constant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import nltk
import re
f = open('input.txt', 'r')
program = f.read()
count = 0
Keywords_Output = []
Constant_Output = []
def remove_Spaces(program):
scanned_Program = []
for line in prog:
if (line.strip() != ''):
scanned_Program.append(line.strip())
return scanned_Program
def remove_Comments(program):
program_Multi_Comments_Removed = re.sub("/\*[^*]*\*+(?:[^/*][^*]*\*+)*/", "", program)
program_Single_Comments_Removed = re.sub("//.*", "", program_Multi_Comments_Removed)
program_Comments_removed = program_Single_Comments_Removed
return program_Comments_removed
RE_Keywords = "auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while|string|class|struc|include"
RE_Constant = "^(\d+)$"
program_Comments_removed = remove_Comments(program)
prog = program_Comments_removed.split('\n')
scanned_Prog = remove_Spaces(prog)
scanned_Program = '\n'.join([str(elem) for elem in scanned_Prog])
scanned_Program_lines = scanned_Program.split('\n')
match_counter = 0
Source_Code=[]
for line in scanned_Program_lines:
Source_Code.append(line)
display_counter = 0
for line in Source_Code:
count = count + 1
if(line.startswith("#include")):
tokens = nltk.word_tokenize(line)
else:
tokens = nltk.wordpunct_tokenize(line)
for token in tokens:
if(re.findall(RE_Keywords, token)):
Keywords_Output.append(token)
elif(re.findall(RE_Constant,token)):
Constant_Output.append(token)
print("There Are ",len(Constant_Output),"Constant:",Constant_Output)
print("\n")