{"id":1657,"date":"2017-02-17T02:53:34","date_gmt":"2017-02-16T17:53:34","guid":{"rendered":"http:\/\/normalblog.net\/system\/?p=1657"},"modified":"2017-02-25T02:00:06","modified_gmt":"2017-02-24T17:00:06","slug":"ruby-mechanize","status":"publish","type":"post","link":"https:\/\/normalblog.net\/system\/ruby-mechanize\/","title":{"rendered":"Ruby Mechanize\u3092\u4f7f\u7528\u3057\u305f\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u30b5\u30f3\u30d7\u30eb\u30fb\u4f8b"},"content":{"rendered":"<p>Ruby Mechanize\u3092\u4f7f\u7528\u3057\u305f\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u30b5\u30f3\u30d7\u30eb\u30fb\u4f8b\u3092\u8a18\u8f09\u3057\u307e\u3059\u3002<\/p>\n<p>\u4e00\u62ec\u3067\u306e\u753b\u50cf\u53ce\u96c6\u3082\u51fa\u6765\u307e\u3057\u305f\u3002<\/p>\n<h2>img src \u306e jpg png \u3092 img\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306b\u4fdd\u5b58\u3059\u308b<\/h2>\n<pre># -*- coding: utf-8 -*-\r\n\r\nrequire 'mechanize'\r\n\r\nmodule Crawler\r\n class Normalblog\r\n  def saveImage\r\n   agent = Mechanize.new\r\n   agent.user_agent = 'Windows Mozilla'\r\n   page = agent.get('http:\/\/normalblog.net\/system')\r\n\r\n   page.images_with(:src =&gt; \/(jpg)|(png)\/).each do |img|\r\n    sleep(5)\r\n    img_str = img.to_s\r\n    m = img_str.match(\/([^\\\/]+jpg)|([^\\\/]+png)\/)\r\n    img.fetch.save_as(\".\/img\/\" + m[0])\r\n   end\r\n  end\r\n end\r\nend\r\n\r\ncrawler = Crawler::Normalblog.new\r\ncrawler.saveImage<\/pre>\n<h2>img src \u306e\u30d1\u30b9\u3092\u8868\u793a\u3059\u308b<\/h2>\n<pre># -*- coding: utf-8 -*-\r\n\r\nrequire 'mechanize'\r\n\r\nmodule Crawler\r\n class Normalblog\r\n  def dispImage\r\n   agent = Mechanize.new\r\n   agent.user_agent = 'Windows Mozilla'\r\n   page = agent.get('http:\/\/normalblog.net\/system')\r\n\r\n \u00a0 page.search('img').each do |img|\r\n    sleep(5)\r\n    src = img.get_attribute(:src)\r\n    puts src\r\n   end\r\n  end\r\n end\r\nend\r\n\r\ncrawler = Crawler::Normalblog.new\r\ncrawler.dispImage<\/pre>\n<h2>\u30ed\u30b0\u30a4\u30f3\u3092\u3057\u3066\u304b\u3089\u8a72\u5f53\u30af\u30e9\u30b9\u306e\u753b\u50cf\u3092\u4e00\u62ec\u4fdd\u5b58<\/h2>\n<pre># -*- coding: utf-8 -*-\r\n\r\nrequire 'mechanize'\r\n\r\nmodule Crawler\r\n class testSite\r\n  def login\r\n   agent = Mechanize.new\r\n   agent.verify_mode = OpenSSL::SSL::VERIFY_NONE\r\n   agent.user_agent = 'Windows Mozilla'\r\n\r\n   page = agent.get('https:\/\/testtest.com')\r\n   response = page.form_with(:action =&gt; '\/') do |form|\r\n    formdata = {\r\n     :mail =&gt; '\u30e1\u30fc\u30eb',  # mail\u306fname\r\n     :password =&gt; '\u30d1\u30b9', # password\u306fname\r\n    }\r\n    form.field_with(:name =&gt; 'mailaddress').value = formdata[:mail]\r\n    form.field_with(:name =&gt; 'passwd').value = formdata[:password]\r\n   end.submit\r\n\r\n   agent.get('https:\/\/testtest.com') do |page|\r\n    # \u8a72\u5f53\u306e\u30af\u30e9\u30b9\u3092\u6307\u5b9a\r\n    page.search('.test_class').each do |item|\r\n     item.images_with(:src =&gt; \/(JPG)|(PNG)\/).each do |img|\r\n      sleep(3)\r\n      img_str = img.to_s\r\n      m = img_str.match(\/([^\\\/]+JPG)|([^\\\/]+PNG)\/)\r\n      img.fetch.save_as(\".\/img\/\" + m[0])\r\n     end\r\n    end\r\n   end\r\n  end\r\n end\r\nend\r\n\r\ncrawler = Crawler::testSite.new\r\ncrawler.login<\/pre>\n<h2>img src \u306e jpg png \u3092 img\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306b\u4fdd\u5b58\u3059\u308b\u304c\u30ea\u30f3\u30af\u5207\u308c\u306a\u3069\u3067\u5b58\u5728\u3057\u306a\u3044\u753b\u50cf\u306f\u98db\u3070\u3059<\/h2>\n<pre># -*- coding: utf-8 -*-\r\n\r\nrequire 'mechanize'\r\n\r\nmodule Crawler\r\n class Test\r\n  def saveImage\r\n   agent = Mechanize.new\r\n   agent.user_agent = 'Windows Mozilla'\r\n   page = agent.get('http:\/\/testtest.com')\r\n\r\n   page.images_with(:src =&gt; \/(jpg)|(png)\/).each do |img|\r\n    begin\r\n     sleep(10)\r\n     img_str = img.to_s\r\n     m = img_str.match(\/([^\\\/]+jpg)|([^\\\/]+png)\/)\r\n     img.fetch.save_as(\".\/img\/\" + m[0])\r\n    rescue Mechanize::ResponseCodeError =&gt; e\r\n     puts 'ResponseCodeError!! ' + img_str\r\n    end\r\n   end\r\n  end\r\n end\r\nend\r\n\r\ncrawler = Crawler::Test.new\r\ncrawler.saveImage<\/pre>\n<h2>images_with<\/h2>\n<p>class\u540d\u3092\u6307\u5b9a\u3059\u308b\u5834\u5408<\/p>\n<pre>page.images_with(:class =&gt; \"test_class\")<\/pre>\n<p>id\u3068class\u3092\u5408\u308f\u305b\u3066\u6307\u5b9a\u3059\u308b\u5834\u5408<\/p>\n<pre>page.images_with(:id =&gt; \/test_id\/, :class =&gt; \"test_class\")<\/pre>\n<h2>\u6ce8\u610f\u70b9<\/h2>\n<p>\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u5148\u306b\u8ca0\u8377\u304c\u304b\u304b\u308b\u306e\u3067sleep\u3092\u5165\u308c\u307e\u3057\u305f\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Ruby Mechanize\u3092\u4f7f\u7528\u3057\u305f\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u306e\u30b5\u30f3\u30d7\u30eb\u30fb\u4f8b\u3092\u8a18\u8f09\u3057\u307e\u3059\u3002 \u4e00\u62ec\u3067\u306e\u753b\u50cf\u53ce\u96c6\u3082\u51fa\u6765\u307e\u3057\u305f\u3002 img src \u306e jpg png \u3092 img\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306b\u4fdd\u5b58\u3059\u308b # -*- coding: utf-&hellip;<\/p>\n","protected":false},"author":1,"featured_media":1255,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"om_disable_all_campaigns":false,"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"footnotes":""},"categories":[64],"tags":[],"class_list":["post-1657","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-ruby"],"aioseo_notices":[],"_links":{"self":[{"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/posts\/1657","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/comments?post=1657"}],"version-history":[{"count":11,"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/posts\/1657\/revisions"}],"predecessor-version":[{"id":1688,"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/posts\/1657\/revisions\/1688"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/media\/1255"}],"wp:attachment":[{"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/media?parent=1657"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/categories?post=1657"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/normalblog.net\/system\/wp-json\/wp\/v2\/tags?post=1657"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}