This repository has been archived by the owner on Jul 17, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.rb
76 lines (58 loc) · 1.76 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
require 'mechanize'
require 'mongo'
db = Mongo::Client.new([ '127.0.0.1:27017' ], :database => 'TRANSLATOR', :connect => :direct)
agent = Mechanize.new
page = agent.get("http://hyperpolyglot.org/scripting")
table = page.search(".wiki-content-table")[0]
languages = []
table.search("tr").map do |row|
ths = row.search("th")
# Ignore headers
if (ths.length > 0 && languages.length > 0) || ths.length == 1
next
end
# Get the languages
if (ths.length > 0)
ths.map do |th|
if th.text == ""
next
end
anchors = th.search("a")
if anchors.length == 0
next
end
text = anchors[0].text
# meter check a ver se a lang n ta ja aqui
#db[:languages].find(:commonName => text).each do |lang|
#if results.map.length > 0
# languages << BSON::ObjectId(results[0].id)
#else
result = db[:languages].insert_one({
officialName: text,
commonName: text
})
languages << BSON::ObjectId(result.inserted_id)
#end
end
next
end
featureId = ""
row.search("td").map.each_with_index do |td, index|
if index == 0
result = db[:features].insert_one({
title: td.search("a")[1].text,
importance: 0,
description: "",
tags: []
})
featureId = BSON::ObjectId(result.inserted_id)
next
end
db[:snippets].insert_one({
feature: featureId,
language: languages[index-1],
code: td.text,
tags: []
})
end
end