-
Notifications
You must be signed in to change notification settings - Fork 0
/
findOnPage.java
107 lines (101 loc) · 3.57 KB
/
findOnPage.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package picnic;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
/**
*
* @author Rew1L
*/
public class findOnPage extends Thread
{
int page;
/**
*
* @param prevPage
*/
public findOnPage(int prevPage)
{
Picnic.indexOfNextPage++;
this.page = prevPage+1;
Picnic.konec++;
}
@Override
public void run()
{
try
{
URL newUrl;
BufferedReader rd;
InputStream is;
try
{
newUrl = new URL(Picnic.urls.get(Picnic.numUrls)); //select last link that we found
is = newUrl.openConnection().getInputStream();
rd = new BufferedReader(new InputStreamReader(is, "UTF-8")); //read page
}
catch(Exception ex)
{
Picnic.konec--;
return;
}
int lineNum = 0; //count lines
String line;
while((line = rd.readLine())!=null)
{
line = line.toLowerCase();//for better compare
lineNum++; //number of this line
help.checkLink(line,this.page);
if(line.matches(".*(<p .*>|<h.? .*>)")) //finding in content and headers
{
while(line!=null)
{
help.checkLink(line,this.page);
for(int i = 0; i< Picnic.whatNeedToFind.length; i++)
{
for(int g = i+1; g<Picnic.whatNeedToFind.length;g++)
{
if(line.contains(help.getLineOfItems(i, g))) //print line if it contains our word
{
if(Picnic.urlsFound.contains(newUrl.toString()))
{
int temp = Picnic.urlsFound.indexOf(newUrl.toString());
Picnic.urlIndex.set(temp, Picnic.urlIndex.get(temp)+(g-i));
}
else
{
Picnic.urlsFound.add(newUrl.toString());
Picnic.lines.add(Picnic.urlsFound.indexOf(newUrl.toString()), lineNum);
Picnic.urlIndex.add(Picnic.urlsFound.indexOf(newUrl.toString()), 1);
System.out.println("\r\nСсылка: "+newUrl.toString());
System.out.println("Строка: "+lineNum);
}
}
}
}
if(line.contains("</p>") || line.contains("</h")) //exit if tag finished
break;
lineNum++;
line = rd.readLine();
line = line.toLowerCase();
}
}
}
is.close();
rd.close();
}
catch(Exception ex)
{
System.out.println(ex.toString()+" in findOnPage.java");
}
finally
{
Picnic.konec--;
}
}
}