-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape1.rb
67 lines (67 loc) · 2.08 KB
/
scrape1.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
require 'metainspector'
# this script adds a product link to a text file for each free product on kvr
print 'Stage one'
class First
def kvrquery(query)
kvr=MetaInspector.new(query) # crawl kvr query
kvrlinks=kvr.links # just the links
plinks=[]
pnames=[]
kvrlinks.each do |x|
if x =~ /\d*\/reviews/ # if it's a review link
kvrlinks.delete(x)
end
end
kvrlinks.each do |x|
if x =~ /reviews\/\d*/
kvrlinks.delete(x)
end
end
kvrlinks.each do |x|
if x =~ /developer/ #if it's a developer link
kvrlinks.delete(x)
end
end
kvrlinks.each do |x|
if x =~ /product/
plinks.push x
end
end
#filename="plinks.txt"
filename="alllinks.txt" #write to big list of 2500+ products
if File.exist?(filename) == true
file=File.open(filename, "w+")
else
file=File.new(filename, "w+")
end
plinks.each do |x|
prefix=x.gsub(/http:\/\/www.kvraudio.com\/product\//, "") # strip URL before product name
suffix=prefix.gsub(/-by-\w*/, "") # strip developer
name=suffix.gsub(/\/review\/\d{4}/, "") # strip reviews
pnames.push name # push it to pnames
file.print "#{x}\n"
end
file.close
end
end
print '.'
Scraper=First.new()
query='http://www.kvraudio.com/q.php?search=1&pr[]=f&av[]=re&sh[]=s&ob[]=dan&lm[]=500&bl[]=t'
print '.'
Scraper.kvrquery(query)
query="http://www.kvraudio.com/q.php?search=1&pr[]=f&av[]=re&sh[]=s&ob[]=dan&lm[]=500&bl[]=t&start=500"
print '.'
Scraper.kvrquery(query)
query="http://www.kvraudio.com/q.php?search=1&pr[]=f&av[]=re&sh[]=s&ob[]=dan&lm[]=500&bl[]=t&start=1000"
print '.'
Scraper.kvrquery(query)
query="http://www.kvraudio.com/q.php?search=1&pr[]=f&av[]=re&sh[]=s&ob[]=dan&lm[]=500&bl[]=t&start=1500"
print '.'
Scraper.kvrquery(query)
query="http://www.kvraudio.com/q.php?search=1&pr[]=f&av[]=re&sh[]=s&ob[]=dan&lm[]=500&bl[]=t&start=2000"
print '.'
Scraper.kvrquery(query)
query="http://www.kvraudio.com/q.php?search=1&pr[]=f&av[]=re&sh[]=s&ob[]=dan&lm[]=500&bl[]=t&start=2500"
print '.'
Scraper.kvrquery(query)
puts 'done.'