-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathleatherscraper.js
109 lines (89 loc) · 4.09 KB
/
leatherscraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
var links = [];
var casper = require('casper').create();
var x = require('casper').selectXPath;
var last,names;
var links = ["A" , "B" , "C" , "Ç" , "D" , "E" , "F" , "G" , "H" , "I" , "İ" , "J" , "K" , "L" , "M" , "N" , "O" , "Ö" , "P" , "R" , "S" , "Ş" , "T" , "U" , "Ü" , "V" , "Y" , "Z" ];
var temp = ["A"]
var fs = require('fs');
var utils = require('utils');
var linkall = []
var all = []
function getLinks() {
var links = document.querySelectorAll('.right li a');
return Array.prototype.map.call(links, function(e) {
return e.getAttribute('href');
});
}
casper.start();
casper.each(links, function(self, i) {
casper.thenOpen('http://www.turkishleatherbrands.com/company_search.php?s=letter&q='+i, function() {
links = this.evaluate(getLinks);
// links = links.concat(this.evaluate(getlinks));
linkall.push(links);
this.echo(linkall.length)
});
});
// casper.start('http://www.cfainstitute.org/community/membership/directory/pages/results.aspx', function() {
// // search for 'casperjs' from google form
// // this.fill('form[action="/search/"]', { q: 'book' }, true);
// });
casper.then(function(){
all = Array.prototype.concat.apply([], linkall);
fs.write("s.csv","mo|email |website| phone1 |phone2",'a');
casper.each(all, function(self, i) {
casper.thenOpen('http://www.turkishleatherbrands.com/'+i, function() {
// var element = document.evaluate( '//*tr[5]/td[4]' ,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue;
var k = this.getHTML('table tr:nth-child(4) td:nth-child(4)').replace(" ","|").replace("<br>","").replace(/\s+/g, ' ');
var l = this.getHTML('table tr td:nth-child(4)').replace(" ","|").replace("<br>","").replace(/\s+/g, ' ');
l =l.replace(" ","|");
l =l.replace(" ",'');
k =k.replace(" ","|");
var data = k + '|' + l + '\n';
// data = data.replace("<br>","|");
fs.write("s.csv",data,'a');
});
});
});
// casper.then(function() {
// // aggregate results for the 'casperjs' search
// // this.fill('formQ[action="/search/"]', { q: 'pen' }, true);
// links = this.evaluate(getLinks);
// this.echo(links);
// });
// casper.then(function() {
// // aggregate results for the 'phantomjs' search
// links = links.concat(this.evaluate(getLinks));
// });
// casper.then(function() {
// // aggregate results for the 'phantomjs' search
// this.echo(links.length);
// this.echo(links);
// links = links.filter(function (e, i, links) {
// return links.lastIndexOf(e) === i;
// });
// fs.write("myfile.csv","name|address|awarded|postal|email\n ",'w');
// casper.each(links, function(self, i) {
// casper.thenOpen('http://www.cfainstitute.org/community/membership/directory/pages/results.aspx'+i, function() {
// var name = this.evaluate(function() {
// return document.querySelector('h3.fn').textContent;
// });
// var address = this.evaluate(function() {
// return document.querySelector('.adr').textContent;
// });
// var awarded = this.evaluate(function() {
// return document.querySelector('.vcard span strong').textContent;
// });
// var postal = this.evaluate(function() {
// return document.querySelector('span.postal-code').textContent;
// });
// var email = this.evaluate(function() {
// return document.querySelector('span.email').textContent;
// });
// if(name!=="")
// {var data = name + "|" + String(address) + "|" + String(awarded) + "|" + String(postal) + "|" + String(email) + "\n "
// fs.write("myfile.csv",data,'a');
// }
// });
// });
// });
casper.run();