Hi, I created and mechanize action and fetched raw html to folder and then parsed using nokogiri to store the results in database below are the action given how i did it tell me how to use/connect the database to my rails app and displays the requred result for i want to enter the page search something submit it and display records. below are the actions:
parsing action: require 'rubygems' require 'nokogiri' require 'sqlite3'
FIELD_NAMES = [['selectcity', 'VARCHAR'],['match', 'VARCHAR'], ['phone_no', 'NUMERIC'], ['name', 'VARCHAR'],['address', 'VARCHAR'] ]
TABLE_DIV_ID = "#dgrSearch" OFILE = File.open('data-hold/tel-directory.txt', 'w') OFILE.puts( FIELD_NAMES.map{|f| f[0]}.join("\t") )
DBNAME = "data-hold/tel-directory.sqlite" File.delete(DBNAME) if File.exists?DBNAME DB = SQLite3::Database.new( DBNAME )
TABLE_NAME = "telephone_records" DB_INSERT_STATEMENT = "INSERT into #{TABLE_NAME} values (#{FIELD_NAMES.map{'?'}.join(',')})"
DB.execute "CREATE TABLE #{TABLE_NAME}(#{FIELD_NAMES.map{|f| "`#{f[0]}` #{f[1]}"}.join(', ')});" FIELD_NAMES.each do |fn| DB.execute "CREATE INDEX #{fn[2]} ON #{TABLE_NAME}(#{fn[0]})" unless fn[2].nil? end
Dir.glob("data-hold/pages/*.html").each do |fname| meta_info = File.basename(fname, '.html').split('--') page = Nokogiri::HTML(open(fname))
page.css("#{TABLE_DIV_ID} tr").each do |tr| data_tds = tr.css('td').map{ |td| td.text.gsub(/[$,](?=\d)/, '').gsub(/\302\240|\s/, '').strip }
data_row = meta_info + data_tds OFILE.puts( data_row.join("\t")) DB.execute(DB_INSERT_STATEMENT, data_row)
end end
OFILE.close
mechanize action: require 'rubygems' require 'mechanize' require 'fileutils' DIR = 'data-hold/pages' FileUtils.makedirs(DIR)
class GoogleController < ApplicationController def index
home_url = "http://www.chhattisgarh.bsnl.co.in/(S(jinhnz45memp4b2mtgdgk3ab))/directory_services/AreaWiseSearch.aspx?Area=04"
select_field_names = { 'selectcity'=>'DropDownList2', 'match'=>'drpMatch' }
def form_submit_w_exception_handling(frm) retries = 3 begin frm.submit(frm.button_with(:value=>'Search')) rescue Exception=>e puts "Problem: #{e}" if retries < 0 retries -= 1 puts "Sleeping...#{retries} left" retry else raise "Unexpected, repeated errors. Shutting down" end else return frm end end
agent = Mechanize.new agent.get(home_url) form = agent.page.form_with(:action=>/AreaWiseSearch.aspx/)
form.field_with(:name=>select_field_names['selectcity']).options[1..-1].each do |selc_opt| form[select_field_names['selectcity']] = selc_opt.value #form.submit(form.button_with(:value=>'Search')) form = form_submit_w_exception_handling(form) puts "selectcity #{selc_opt.value}: #{agent.page.parser.css('tr').length}"
form.field_with(:name=>select_field_names['match']).options[1..-1].each do |mat_opt| form[select_field_names['match']] = mat_opt.value #form.submit(form.button_with(:value=>'Search')) form = form_submit_w_exception_handling(form) puts "match #{mat_opt.value}: #{agent.page.parser.css('tr').length}" fname = "#{DIR}/#{selc_opt.value}--#{mat_opt.value}.html" File.open(fname, 'w'){|f| f.puts agent.page.parser.to_html} end end end end