-
Notifications
You must be signed in to change notification settings - Fork 1
/
Scraper.php
executable file
·138 lines (95 loc) · 2.73 KB
/
Scraper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
<?php
class Scraper {
var $courseName;
var $assessmentItems;
function __construct($courseName){
$this->courseName = $courseName;
include_once('includes/simple_html_dom.php');
$link = $this->searchCourse($courseName);
if ($link) {
$profileLink = $this->getProfileLink($link);
if ($profileLink) {
$this->assessmentItems = $this->getProfile($profileLink);
return;
}
}
$this->assessmentItems = false;
}
function searchCourse($courseName) {
$url = "http://www.uq.edu.au/study/search.html?keywords=" . $courseName . "&searchType=all&archived=false&CourseParameters%5Bsemester%5D=2012:2";
// get DOM from URL or file
$html = file_get_html($url);
// find all div tags
if (!($html)) return false;
$html = $html->find('div#courses-container', 0);
if (!($html)) return false;
$links = $html->find('a');
$html->clear();
unset($html);
foreach($links as $e) {
$link = $e->href;
if (strstr($link,'offer')) {
return 'http://www.uq.edu.au' . $link;
}
}
return false;
}
function getProfileLink($link) {
$html = file_get_html($link);
if (!($html)) return false;
$profileLink = $html->find('div#description', 0)->find('table.offerings', 0)->find('td', 7)->find('a.profile-available', 0)->href;
$html->clear();
unset($html);
return $profileLink;
}
function changeLinkSection($link) {
$link = str_replace("section=1", "section=5", $link);
$link = str_replace("&", "&", $link);
return $link;
}
function getProfile($link) {
$link = $this->changeLinkSection($link);
// Get the File
$html = file_get_html($link);
// Find the content container
if (!($html)) return false;
$html = $html->find('div#content', 0);
// Split on the assessment Detail section
$html = split('<a name="assessmentDetail"></a>', $html);
// Convert to DOM again
$html = str_get_html($html[1]);
if (!($html)) return false;
$html = $html->find('div.infoIndent', 0);
$headings = array();
$weights = array();
if (!($html)) return false;
// Get the headings
foreach($html->find('div.assessmentheading') as $e) {
$headings[] = $e->innertext;
}
// Get the weights
foreach($html->find('div.assessDetail') as $e) {
$inner = $e->innertext;
$sections = split("<strong>", $inner);
foreach($sections as $i) {
if (stripos($i, "Weight:") === 0) {
$i = split("<br /> ", $i);
$i = split("</strong>", $i[0]);
$percent = $i[1];
$weights[] = floatval($percent);
}
}
}
$array = array();
$i = 0;
foreach($headings as $e) {
$array[] = array($e, $weights[$i]);
$i++;
}
return $array;
}
function getAssessment() {
return $this->assessmentItems;
}
}
?>