-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdomain_match.rb
More file actions
34 lines (24 loc) · 904 Bytes
/
domain_match.rb
File metadata and controls
34 lines (24 loc) · 904 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
html_file = File.open( "gfwlist_adult.html" )
file_data = html_file.readlines.map( &:chomp ) # remove the new line characters.
print file_data, "\n\n\n"
output_file = File.open( "gfwlist_adult.json", "w" )
all_exclude_domain = []
#str = "adffg http://www.test.com http://www.ruby-lang.org/ja/ http://www.rails.org/"
file_data.each {
|str|
#print str, "\n"
#reg1 = Regexp.new( /[\w][\w-]*\.(?:com\.cn|com|cn|co|net|org|gov|cc|biz|info)(\/|$)/ )
reg1 = Regexp.new( /[\w][\w-]*\.(?:com\.cn|com|cn|co|net|org|gov|cc|biz|info)/ )
r = str.scan( reg1 )
r.each { |d|
if !all_exclude_domain.include?( d )
all_exclude_domain << d
#output_file.write "\"domain: ", d, "\",\n"
output_file.write d, "\n"
print "domain: ", d, ",\n"
end
}
}
print "total exclude domains: ", all_exclude_domain.length, "\n"
html_file.close
output_file.close