pax_global_header00006660000000000000000000000064144033763750014526gustar00rootroot0000000000000052 comment=a46158dbde7419818569e5bef6f5b9a2214ca0ea biola-Voight-Kampff-a46158d/000077500000000000000000000000001440337637500156125ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/.github/000077500000000000000000000000001440337637500171525ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/.github/workflows/000077500000000000000000000000001440337637500212075ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/.github/workflows/ci.yml000066400000000000000000000011021440337637500223170ustar00rootroot00000000000000--- name: CI on: push: branches: - '**' pull_request: branches: - '**' schedule: - cron: '0 4 1 * *' jobs: rspec: runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: ruby: - '3.2' - '3.1' - '3.0' - '2.7' steps: - name: Checkout uses: actions/checkout@v2 - name: Setup Ruby uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} bundler-cache: true - name: RSpec run: bundle exec rspec biola-Voight-Kampff-a46158d/.gitignore000066400000000000000000000000651440337637500176030ustar00rootroot00000000000000.DS_Store *~ Gemfile.lock .bundle *.rbc coverage doc biola-Voight-Kampff-a46158d/.ruby-version000066400000000000000000000000061440337637500202530ustar00rootroot000000000000003.2.0 biola-Voight-Kampff-a46158d/Gemfile000066400000000000000000000000471440337637500171060ustar00rootroot00000000000000source 'https://rubygems.org' gemspec biola-Voight-Kampff-a46158d/MIT-LICENSE000066400000000000000000000020471440337637500172510ustar00rootroot00000000000000Copyright (c) 2013 by Biola University Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. biola-Voight-Kampff-a46158d/README.md000066400000000000000000000112531440337637500170730ustar00rootroot00000000000000Voight-Kampff ============= [![Build Status](https://travis-ci.org/biola/Voight-Kampff.svg?branch=master)](https://travis-ci.org/biola/Voight-Kampff) [![Code Climate](https://codeclimate.com/github/biola/Voight-Kampff/badges/gpa.svg)](https://codeclimate.com/github/biola/Voight-Kampff) [![Gem Version](https://badge.fury.io/rb/voight_kampff.svg)](https://badge.fury.io/rb/voight_kampff) Voight-Kampff relies on a [user agent](http://en.wikipedia.org/wiki/User_agent) list for its detection. It can easily tell you if a request is coming from a crawler, spider or bot. This can be especially helpful in analytics such as page hit tracking. Installation ------------ `gem install voight_kampff` If you're using Rails and want to add `ActionDispatch::Request#bot?` and `ActionDispatch::Request#human?` methods, require `voight_kampff/rails`: ```Gemfile gem 'voight_kampff', require: 'voight_kampff/rails' ``` if you're using pure Rack, require it the following way: ```Gemfile gem 'voight_kampff', require: 'voight_kampff/rack' ``` Configuration ------------- A JSON file is used to match [user agent strings](http://simplyfast.info/browser) to a list of known bots. If you'd like to use an [updated list](https://github.com/monperrus/crawler-user-agents) or make your own customizations, run `rake voight_kampff:import_user_agents`. This will download a `crawler-user-agents.json` file into the `./config` directory. __Note:__ The pattern entries in the JSON file are evaluated as [regular expressions](http://en.wikipedia.org/wiki/Regular_expression). Usage ----- There are three ways to use Voight-Kampff 1. Through Rack::Request such as in your [Ruby on Rails](http://rubyonrails.org) controllers: `request.bot?` 2. Through the `VoightKampff` module: `VoightKampff.bot? 'your user agent string'` 3. Through a `VoightKampff::Test` instance: `VoightKampff::Test.new('your user agent string').bot?` All of the above examples accept `human?` and `bot?` methods. All of these methods will return `true` or `false`. Upgrading to version 1.0 ------------------------ Version 1.0 uses a new source for a list of bot user agent strings since the old source was no longer maintained. This new source, unfortuately, does not include as much detail. Therefore the following methods have been deprecated: - `#browser?` - `#checker?` - `#downloader?` - `#proxy?` - `#crawler?` - `#spam?` In general the `#bot?` command tends to include all of these and I'm sure it's unlikely that anybody was getting this granular with their bot checking. So I see it as a small price to pay for an open and up to date bot list. Also, the gem no longer extends `ActionDispatch::Request` instead it extends `Rack::Request` which `ActionDispatch::Request` inherits from. This allows the same functionality for Rails while opening the gem up to other rack-based projects. Upgrading to version 2.0 ------------------------ If you use Rails and `ActionDispatch::Request#bot?` and `ActionDispatch::Request#human?` methods, change your gemfile: ```diff -gem 'voight_kampff' +gem 'voight_kampff', require: 'voight_kampff/rails' ``` If you use Rack, change your gemfile: ```diff -gem 'voight_kampff' +gem 'voight_kampff', require: 'voight_kampff/rack' ``` FAQ --- __Q:__ __What's with the name?__ __A:__ It's the [machine in Blade Runner](https://en.wikipedia.org/wiki/Blade_Runner#Voight-Kampff_machine) that is used to test whether someone is a human or a replicant. __Q:__ __I've found a bot that isn't being matched__ __A:__ The list is being pulled from [github.com/monperrus/crawler-user-agents](https://github.com/monperrus/crawler-user-agents). If you'd like to have entries added to the list, please create a pull request with that project. Once that pull request is merged, feel free to create an issue here and I'll release a new gem version with the updated list. In the meantime you can always run `rake voight_kampff:import_user_agents` on your project to get that updated list. __Q:__ __Why don't you use the user agent list from ______________ If you know of a better source for a list of bot user agent strings, please create an issue and let me know. I'm open to switching to a better source or supporting multiple sources. There are others out there but I like the openness of monperrus' list. Thanks ------ Thanks to [github.com/monperrus/crawler-user-agents](https://github.com/monperrus/crawler-user-agents) for providing an open and easily updatable list of bot user agents. Contributing ------------ PR without tests will not get merged, Make sure you write tests for api and rails app. Feel free to ask for help, if you do not know how to write a determined test. Running Tests? -------------- - `bundle install` - `bundle exec rspec` biola-Voight-Kampff-a46158d/config.ru000066400000000000000000000002241440337637500174250ustar00rootroot00000000000000require 'rubygems' require 'bundler' Bundler.require :default, :development Combustion.initialize! :action_controller run Combustion::Application biola-Voight-Kampff-a46158d/config/000077500000000000000000000000001440337637500170575ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/config/crawler-user-agents.json000066400000000000000000003167421440337637500236610ustar00rootroot00000000000000[ { "pattern": "Googlebot\\/", "url": "http://www.google.com/bot.html", "instances": [ "Googlebot/2.1 (+http://www.google.com/bot.html)", "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36", "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview Analytics) Chrome/27.0.1453 Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)" ] } , { "pattern": "Googlebot-Mobile", "instances": [ "DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "Nokia6820/2.0 (4.83) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)", "SAMSUNG-SGH-E250/1.0 Profile/MIDP-2.0 Configuration/CLDC-1.1 UP.Browser/6.2.3.3.c.1.101 (GUI) MMP/2.0 (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)" ] } , { "pattern": "Googlebot-Image", "instances": [ "Googlebot-Image/1.0" ] } , { "pattern": "Googlebot-News", "instances": [ "Googlebot-News" ] } , { "pattern": "Googlebot-Video", "instances": [ "Googlebot-Video/1.0" ] } , { "pattern": "AdsBot-Google([^-]|$)", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [ "AdsBot-Google (+http://www.google.com/adsbot.html)" ] } , { "pattern": "AdsBot-Google-Mobile", "addition_date": "2017/08/21", "url": "https://support.google.com/adwords/answer/2404197", "instances": [ "AdsBot-Google-Mobile-Apps", "Mozilla/5.0 (Linux; Android 5.0; SM-G920A) AppleWebKit (KHTML, like Gecko) Chrome Mobile Safari (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)", "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 (compatible; AdsBot-Google-Mobile; +http://www.google.com/mobile/adsbot.html)" ] } , { "pattern": "Feedfetcher-Google", "addition_date": "2018/06/27", "url": "https://support.google.com/webmasters/answer/178852", "instances": [ "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; 1 subscribers; feed-id=728742641706423)" ] } , { "pattern": "Mediapartners-Google", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [ "Mediapartners-Google", "Mozilla/5.0 (compatible; MSIE or Firefox mutant; not on Windows server;) Daumoa/4.0 (Following Mediapartners-Google)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 10_0 like Mac OS X; en-us) AppleWebKit/602.1.38 (KHTML, like Gecko) Version/10.0 Mobile/14A5297c Safari/602.1 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_1 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7 (compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)" ] } , { "pattern": "Mediapartners \\(Googlebot\\)", "addition_date": "2017/08/08", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [] } , { "pattern": "APIs-Google", "addition_date": "2017/08/08", "url": "https://support.google.com/webmasters/answer/1061943?hl=en", "instances": [ "APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)" ] } , { "pattern": "bingbot", "url": "http://www.bing.com/bingbot.htm", "instances": [ "Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 530) like Gecko (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) SitemapProbe", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; adidxbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)", "Mozilla/5.0 (seoanalyzer; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" ] } , { "pattern": "Slurp", "url": "http://help.yahoo.com/help/us/ysearch/slurp", "instances": [ "Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)", "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)", "Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)" ] } , { "pattern": "[wW]get", "instances": [ "WGETbot/1.0 (+http://wget.alanreed.org)", "Wget/1.14 (linux-gnu)" ] } , { "pattern": "curl", "instances": [ "eCairn-Grabber/1.0 (+http://ecairn.com/grabber) curl/7.15" ] } , { "pattern": "LinkedInBot", "instances": [ "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/3.1 +http://www.linkedin.com)", "LinkedInBot/1.0 (compatible; Mozilla/5.0; Jakarta Commons-HttpClient/4.3 +http://www.linkedin.com)" ] } , { "pattern": "Python-urllib", "instances": [ "Python-urllib/2.5", "Python-urllib/2.5", "Python-urllib/2.6", "Python-urllib/2.7", "Python-urllib/3.1", "Python-urllib/3.2", "Python-urllib/3.3", "Python-urllib/3.4", "Python-urllib/3.5", "Python-urllib/3.6" ] } , { "pattern": "python-requests", "addition_date": "2018/05/27", "instances": [ "python-requests/2.18.4" ] } , { "pattern": "libwww", "instances": [ "2Bone_LinkChecker/1.0 libwww-perl/6.03", "2Bone_LinkChkr/1.0 libwww-perl/6.03", "W3C-checklink/2.90 libwww-perl/5.64", "W3C-checklink/3.6.2.3 libwww-perl/5.64", "W3C-checklink/4.2 [4.20] libwww-perl/5.803", "W3C-checklink/4.2.1 [4.21] libwww-perl/5.803", "W3C-checklink/4.3 [4.42] libwww-perl/5.805", "W3C-checklink/4.3 [4.42] libwww-perl/5.808", "W3C-checklink/4.3 [4.42] libwww-perl/5.820", "W3C-checklink/4.5 [4.154] libwww-perl/5.823", "W3C-checklink/4.5 [4.160] libwww-perl/5.823", "amibot - http://www.amidalla.de - tech@amidalla.com libwww-perl/5.831" ] } , { "pattern": "httpunit", "instances": [ "httpunit/1.x" ] } , { "pattern": "nutch", "instances": [ "NutchCVS/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)", "istellabot-nutch/Nutch-1.10" ] } , { "pattern": "Go-http-client", "addition_date": "2016/03/26", "url": "https://golang.org/pkg/net/http/", "instances": [ "Go-http-client/1.1" ] } , { "pattern": "phpcrawl", "addition_date": "2012-09/17", "url": "http://phpcrawl.cuab.de/", "instances": [ "phpcrawl" ] } , { "pattern": "msnbot", "url": "http://search.msn.com/msnbot.htm", "instances": [ "adidxbot/1.1 (+http://search.msn.com/msnbot.htm)", "adidxbot/2.0 (+http://search.msn.com/msnbot.htm)", "librabot/1.0 (+http://search.msn.com/msnbot.htm)", "librabot/2.0 (+http://search.msn.com/msnbot.htm)", "msnbot-NewsBlogs/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot-UDiscovery/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)", "msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)", "msnbot-media/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot/1.0 (+http://search.msn.com/msnbot.htm)", "msnbot/1.1 (+http://search.msn.com/msnbot.htm)", "msnbot/2.0b (+http://search.msn.com/msnbot.htm)", "msnbot/2.0b (+http://search.msn.com/msnbot.htm).", "msnbot/2.0b (+http://search.msn.com/msnbot.htm)._" ] } , { "pattern": "jyxobot", "instances": [] } , { "pattern": "FAST-WebCrawler", "instances": [ "FAST-WebCrawler/3.6/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", "FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)", "FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)", "FAST-WebCrawler/3.8" ] } , { "pattern": "FAST Enterprise Crawler", "instances": [ "FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/", "FAST Enterprise Crawler 6 used by Schibsted (webcrawl@schibstedsok.no)" ] } , { "pattern": "BIGLOTRON", "instances": [ "BIGLOTRON (Beta 2;GNU/Linux)" ] } , { "pattern": "Teoma", "instances": [ "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://sp.ask.com/docs/about/tech_crawling.html)", "Mozilla/2.0 (compatible; Ask Jeeves/Teoma; +http://about.ask.com/en/docs/about/webmasters.shtml)" ], "url": "http://about.ask.com/en/docs/about/webmasters.shtml" } , { "pattern": "convera", "instances": [ "ConveraCrawler/0.9e (+http://ews.converasearch.com/crawl.htm)" ], "url": "http://ews.converasearch.com/crawl.htm" } , { "pattern": "seekbot", "instances": [ "Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2" ], "url": "http://www.seekbot.net/bot.html" } , { "pattern": "Gigabot", "instances": [ "Gigabot/1.0", "Gigabot/2.0 (http://www.gigablast.com/spider.html)" ], "url": "http://www.gigablast.com/spider.html" } , { "pattern": "Gigablast", "instances": [ "GigablastOpenSource/1.0" ], "url": "https://github.com/gigablast/open-source-search-engine" } , { "pattern": "exabot", "instances": [ "Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)", "Mozilla/5.0 (compatible; Exabot PyExalead/3.0; +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot-Images/3.0; +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot/3.0 (BiggerBetter); +http://www.exabot.com/go/robot)", "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)" ] } , { "pattern": "ia_archiver", "instances": [ "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)", "ia_archiver-web.archive.org" ] } , { "pattern": "GingerCrawler", "instances": [ "GingerCrawler/1.0 (Language Assistant for Dyslexics; www.gingersoftware.com/crawler_agent.htm; support at ginger software dot com)" ] } , { "pattern": "webmon ", "instances": [] } , { "pattern": "HTTrack", "instances": [ "Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98)" ] } , { "pattern": "grub.org", "instances": [ "Mozilla/4.0 (compatible; grub-client-0.3.0; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.4; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.5; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.6; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.0.7; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.1.1; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.2.1; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.3.1; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.3.7; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.4.3; Crawl your own stuff with http://grub.org)", "Mozilla/4.0 (compatible; grub-client-1.5.3; Crawl your own stuff with http://grub.org)" ] } , { "pattern": "UsineNouvelleCrawler", "instances": [] } , { "pattern": "antibot", "instances": [] } , { "pattern": "netresearchserver", "instances": [] } , { "pattern": "speedy", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) Speedy Spider for SpeedyAds (http://www.entireweb.com/about/search_tech/speedy_spider/)", "Mozilla/5.0 (compatible; Speedy Spider; http://www.entireweb.com/about/search_tech/speedy_spider/)", "Speedy Spider (Entireweb; Beta/1.2; http://www.entireweb.com/about/search_tech/speedyspider/)", "Speedy Spider (http://www.entireweb.com/about/search_tech/speedy_spider/)" ] } , { "pattern": "fluffy", "instances": [] } , { "pattern": "bibnum.bnf", "instances": [ "Mozilla/5.0 (compatible; bnf.fr_bot; +http://bibnum.bnf.fr/robot/bnf.html)" ] } , { "pattern": "findlink", "instances": [ "findlinks/1.0 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.3-beta8 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.3-beta9 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.5-beta7 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta1 (+http://wortschatz.uni-leipzig.de/findlinks/; YaCy 0.1; yacy.net)", "findlinks/1.1.6-beta2 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta3 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta4 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/1.1.6-beta6 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.1 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.2 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.4 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.0.9 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.1 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.1.3 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.1.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.2 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.5 (+http://wortschatz.uni-leipzig.de/findlinks/)", "findlinks/2.6 (+http://wortschatz.uni-leipzig.de/findlinks/)" ] } , { "pattern": "msrbot", "instances": [] } , { "pattern": "panscient", "instances": [ "panscient.com" ] } , { "pattern": "yacybot", "instances": [ "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.13.0-61-generic; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.8.0_111; Europe/de) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.19.0-15-generic; java 1.8.0_45-internal; Europe/de) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.2.0-4-amd64; java 1.7.0_67; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 4.4.0-57-generic; java 9-internal; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Windows 8 6.2; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", "yacybot (-global; amd64 Windows 8.1 6.3; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 FreeBSD 10.3-RELEASE-p7; java 1.7.0_95; GMT/en) http://yacy.net/bot.html", "yacybot (/global; amd64 FreeBSD 10.3-RELEASE; java 1.8.0_77; GMT/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-042stab093.4; java 1.7.0_65; Etc/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-042stab094.8; java 1.7.0_79; America/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-042stab108.8; java 1.7.0_91; America/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 2.6.32-573.3.1.el6.x86_64; java 1.7.0_85; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.10.0-229.7.2.el7.x86_64; java 1.8.0_45; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.10.0-327.22.2.el7.x86_64; java 1.7.0_101; Etc/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.11.10-21-desktop; java 1.7.0_51; America/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.12.1; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-042stab093.4; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-45-generic; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-74-generic; java 1.7.0_91; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-83-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-85-generic; java 1.7.0_95; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.13.0-88-generic; java 1.7.0_101; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.14-0.bpo.1-amd64; java 1.7.0_55; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.14.32-xxxx-grs-ipv6-64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16-0.bpo.2-amd64; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_111; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; America/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_75; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/de) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_91; Europe/de) http://yacy.net/bot.html", "yacybot (-global; amd64 FreeBSD 9.2-RELEASE-p10; java 1.7.0_65; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 2.6.32-042stab111.11; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 2.6.32-042stab116.1; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (-global; amd64 Linux 3.10.0-229.4.2.el7.x86_64; java 1.7.0_79; Europe/en) http://yacy.net/bot.html", "yacybot (/global; amd64 Linux 3.16.0-4-amd64; java 1.7.0_95; Europe/en) http://yacy.net/bot.html" ] } , { "pattern": "AISearchBot", "instances": [] } , { "pattern": "ips-agent", "instances": [ "BlackBerry9000/4.6.0.167 Profile/MIDP-2.0 Configuration/CLDC-1.1 VendorID/102 ips-agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.12; ips-agent) Gecko/20050922 Fedora/1.0.7-1.1.fc4 Firefox/1.0.7", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.3; ips-agent) Gecko/20090824 Fedora/1.0.7-1.1.fc4 Firefox/3.5.3", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.24; ips-agent) Gecko/20111107 Ubuntu/10.04 (lucid) Firefox/3.6.24", "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:14.0; ips-agent) Gecko/20100101 Firefox/14.0.1" ] } , { "pattern": "tagoobot", "instances": [] } , { "pattern": "MJ12bot", "instances": [ "MJ12bot/v1.2.0 (http://majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.1; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.3; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.4; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.2.5; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.0; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.1; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.2; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.3.3; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.0; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.1; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.2; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.3; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.4 (domain ownership verifier); http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.4; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.5; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.6; http://mj12bot.com/)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://www.majestic12.co.uk/bot.php?+)", "Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)" ] } , { "pattern": "woriobot", "instances": [ "Mozilla/5.0 (compatible; woriobot +http://worio.com)", "Mozilla/5.0 (compatible; woriobot support [at] zite [dot] com +http://zite.com)" ] } , { "pattern": "yanga", "instances": [ "Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)" ] } , { "pattern": "buzzbot", "instances": [ "Buzzbot/1.0 (Buzzbot; http://www.buzzstream.com; buzzbot@buzzstream.com)" ] } , { "pattern": "mlbot", "instances": [ "MLBot (www.metadatalabs.com/mlbot)" ] } , { "pattern": "YandexBot", "url": "http://yandex.com/bots", "instances": [ "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)" ], "addition_date": "2015/04/14" } , { "pattern": "yandex.com\\/bots", "url": "https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml#robot-in-logs", "instances": [ "Mozilla/5.0 (compatible; YandexWebmaster/2.0; +http://yandex.com/bots)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)" ], "addition_date": "2016/12/01" } , { "pattern": "purebot", "addition_date": "2010/01/19", "instances": [] } , { "pattern": "Linguee Bot", "addition_date": "2010/01/26", "url": "http://www.linguee.com/bot", "instances": [ "Linguee Bot (http://www.linguee.com/bot)", "Linguee Bot (http://www.linguee.com/bot; bot@linguee.com)" ] } , { "pattern": "CyberPatrol", "addition_date": "2010/02/11", "url": "http://www.cyberpatrol.com/cyberpatrolcrawler.asp", "instances": [ "CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)" ] } , { "pattern": "voilabot", "addition_date": "2010/05/18", "instances": [ "Mozilla/5.0 (Windows NT 5.1; U; Win64; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)", "Mozilla/5.0 (compatible; OrangeBot/2.0; support.voilabot@orange.com)" ] } , { "pattern": "Baiduspider", "addition_date": "2010/07/15", "url": "http://www.baidu.jp/spider/", "instances": [ "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" ] } , { "pattern": "citeseerxbot", "addition_date": "2010/07/17", "instances": [] } , { "pattern": "spbot", "addition_date": "2010/07/31", "url": "http://www.seoprofiler.com/bot", "instances": [ "Mozilla/5.0 (compatible; spbot/1.0; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/1.1; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/1.2; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.1; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.2; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.3; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.0.4; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/2.0; +http://www.seoprofiler.com/bot/ )", "Mozilla/5.0 (compatible; spbot/2.1; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/3.0; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/3.1; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.1; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.2; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.3; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.4; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.5; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.6; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.7; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.0.7; +https://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0.8; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.0.9; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.0; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0a; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.0b; +http://www.seoprofiler.com/bot )", "Mozilla/5.0 (compatible; spbot/4.1.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.2.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.3.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.4.0; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.4.1; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/4.4.2; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0.1; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0.2; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0.3; +http://OpenLinkProfiler.org/bot )", "Mozilla/5.0 (compatible; spbot/5.0; +http://OpenLinkProfiler.org/bot )" ] } , { "pattern": "twengabot", "addition_date": "2010/08/03", "url": "http://www.twenga.com/bot.html", "instances": [] } , { "pattern": "postrank", "addition_date": "2010/08/03", "url": "http://www.postrank.com", "instances": [ "PostRank/2.0 (postrank.com)", "PostRank/2.0 (postrank.com; 1 subscribers)" ] } , { "pattern": "turnitinbot", "addition_date": "2010/09/26", "url": "http://www.turnitin.com", "instances": [] } , { "pattern": "scribdbot", "addition_date": "2010/09/28", "url": "http://www.scribd.com", "instances": [] } , { "pattern": "page2rss", "addition_date": "2010/10/07", "url": "http://www.page2rss.com", "instances": [ "Mozilla/5.0 (compatible; Page2RSS/0.7; +http://page2rss.com/)" ] } , { "pattern": "sitebot", "addition_date": "2010/12/15", "url": "http://www.sitebot.org", "instances": [ "Mozilla/5.0 (compatible; Whoiswebsitebot/0.1; +http://www.whoiswebsite.net)" ] } , { "pattern": "linkdex", "addition_date": "2011/01/06", "url": "http://www.linkdex.com", "instances": [ "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/about/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/about/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.1; +http://www.linkdex.com/bots/)", "Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)", "linkdex.com/v2.0", "linkdexbot/Nutch-1.0-dev (http://www.linkdex.com/; crawl at linkdex dot com)" ] } , { "pattern": "Adidxbot", "url": "http://onlinehelp.microsoft.com/en-us/bing/hh204496.aspx", "instances": [] } , { "pattern": "blekkobot", "url": "http://blekko.com/about/blekkobot", "instances": [ "Mozilla/5.0 (compatible; Blekkobot; ScoutJet; +http://blekko.com/about/blekkobot)" ] } , { "pattern": "ezooms", "addition_date": "2011/04/27", "url": "http://www.phpbb.com/community/viewtopic.php?f=64&t=935605&start=450#p12948289", "instances": [ "Mozilla/5.0 (compatible; Ezooms/1.0; ezooms.bot@gmail.com)" ] } , { "pattern": "dotbot", "addition_date": "2011/04/27", "instances": [ "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)", "dotbot" ] } , { "pattern": "Mail.RU_Bot", "addition_date": "2011/04/27", "instances": [ "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/", "Mozilla/5.0 (compatible; Mail.RU_Bot/2.0; +http://go.mail.ru/" ] } , { "pattern": "discobot", "addition_date": "2011/05/03", "url": "http://discoveryengine.com/discobot.html", "instances": [ "Mozilla/5.0 (compatible; discobot/1.0; +http://discoveryengine.com/discobot.html)", "Mozilla/5.0 (compatible; discobot/2.0; +http://discoveryengine.com/discobot.html)", "mozilla/5.0 (compatible; discobot/1.1; +http://discoveryengine.com/discobot.html)" ] } , { "pattern": "heritrix", "addition_date": "2011/06/21", "url": "http://crawler.archive.org/", "instances": [ "Mozilla/5.0 (compatible; archive.org_bot/heritrix-1.15.4 +http://www.archive.org)", "Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.webarchiv.cz)", "Mozilla/5.0 (compatible; heritrix/1.12.1b +http://netarkivet.dk/website/info.html)", "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://rjpower.org)", "Mozilla/5.0 (compatible; heritrix/1.14.2 +http://www.webarchiv.cz)", "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://archive.org)", "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.accelobot.com)", "Mozilla/5.0 (compatible; heritrix/1.14.3 +http://www.webarchiv.cz)", "Mozilla/5.0 (compatible; heritrix/1.14.3.r6601 +http://www.buddybuzz.net/yptrino)", "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://parsijoo.ir)", "Mozilla/5.0 (compatible; heritrix/1.14.4 +http://www.exif-search.com)", "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://aihit.com)", "Mozilla/5.0 (compatible; heritrix/2.0.2 +http://seekda.com)", "Mozilla/5.0 (compatible; heritrix/3.0.0-SNAPSHOT-20091120.021634 +http://crawler.archive.org)", "Mozilla/5.0 (compatible; heritrix/3.1.0-RC1 +http://boston.lti.cs.cmu.edu/crawler_12/)", "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://places.tomtom.com/crawlerinfo)", "Mozilla/5.0 (compatible; heritrix/3.1.1 +http://www.mixdata.com)", "Mozilla/5.0 (compatible; heritrix/3.1.1-SNAPSHOT-20120116.200628 +http://www.archive.org/details/archive.org_bot)", "Mozilla/5.0 (compatible; heritrix/3.1.1; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)", "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.crim.ca)", "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.exif-search.com)", "Mozilla/5.0 (compatible; heritrix/3.2.0 +http://www.mixdata.com)", "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20140702-2247 +http://archive.org/details/archive.org_bot)", "Mozilla/5.0 (compatible; heritrix/3.3.0-SNAPSHOT-20160309-0050; UniLeipzigASV +http://corpora.informatik.uni-leipzig.de/crawler_faq.html)", "Mozilla/5.0 (compatible; sukibot_heritrix/3.1.1 +http://suki.ling.helsinki.fi/eng/webmasters.html)" ] } , { "pattern": "findthatfile", "addition_date": "2011/06/21", "url": "http://www.findthatfile.com/", "instances": [] } , { "pattern": "europarchive.org", "addition_date": "2011/06/21", "url": "", "instances": [ "Mozilla/5.0 (compatible; MSIE 7.0 +http://www.europarchive.org)" ] } , { "pattern": "NerdByNature.Bot", "addition_date": "2011/07/12", "url": "http://www.nerdbynature.net/bot", "instances": [ "Mozilla/5.0 (compatible; NerdByNature.Bot; http://www.nerdbynature.net/bot)" ] } , { "pattern": "sistrix crawler", "addition_date": "2011/08/02", "instances": [] } , { "pattern": "Ahrefs(Bot|SiteAudit)", "addition_date": "2011/08/28", "instances": [ "Mozilla/5.0 (compatible; AhrefsBot/5.2; News; +http://ahrefs.com/robot/)", "Mozilla/5.0 (compatible; AhrefsSiteAudit/5.2; +http://ahrefs.com/robot/)" ] } , { "pattern": "fuelbot", "addition_date": "2018/06/28", "instances": [ "fuelbot" ] } , { "pattern": "CrunchBot", "addition_date": "2018/06/28", "instances": [ "CrunchBot/1.0 (+http://www.leadcrunch.com/crunchbot)" ] } , { "pattern": "centurybot9", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Go-http-client/1.1; +centurybot9@gmail.com)" ] } , { "pattern": "IndeedBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0 (IndeedBot 1.1)" ] } , { "pattern": "mappydata", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Mappy/1.0; +http://mappydata.net/bot/)" ] } , { "pattern": "woobot", "addition_date": "2018/06/28", "instances": [ "woobot" ] } , { "pattern": "ZoominfoBot", "addition_date": "2018/06/28", "instances": [ "ZoominfoBot (zoominfobot at zoominfo dot com)" ] } , { "pattern": "PrivacyAwareBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; PrivacyAwareBot/1.1; +http://www.privacyaware.org)" ] } , { "pattern": "Multiviewbot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Multiviewbot" ] } , { "pattern": "SWIMGBot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36 SWIMGBot" ] } , { "pattern": "Grobbot", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Grobbot/2.2; +https://grob.it)" ] } , { "pattern": "eright", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; eright/1.0; +bot@eright.com)" ] } , { "pattern": "Apercite", "addition_date": "2018/06/28", "instances": [ "Mozilla/5.0 (compatible; Apercite; +http://www.apercite.fr/robot/index.html)" ] } , { "pattern": "semanticbot", "addition_date": "2018/06/28", "instances": [ "semanticbot", "semanticbot (info@semanticaudience.com)" ] } , { "pattern": "Aboundex", "addition_date": "2011/09/28", "url": "http://www.aboundex.com/crawler/", "instances": [ "Aboundex/0.2 (http://www.aboundex.com/crawler/)", "Aboundex/0.3 (http://www.aboundex.com/crawler/)" ] } , { "pattern": "domaincrawler", "addition_date": "2011/10/21", "instances": [ "CipaCrawler/3.0 (info@domaincrawler.com; http://www.domaincrawler.com/www.example.com)" ] } , { "pattern": "wbsearchbot", "addition_date": "2011/12/21", "url": "http://www.warebay.com/bot.html", "instances": [] } , { "pattern": "summify", "addition_date": "2012/01/04", "url": "http://summify.com", "instances": [ "Summify (Summify/1.0.1; +http://summify.com)" ] } , { "pattern": "CCBot", "addition_date": "2012/02/05", "url": "http://www.commoncrawl.org/bot.html", "instances": [ "CCBot/2.0 (http://commoncrawl.org/faq/)" ] } , { "pattern": "edisterbot", "addition_date": "2012/02/25", "instances": [] } , { "pattern": "seznambot", "addition_date": "2012/03/14", "instances": [ "Mozilla/5.0 (compatible; SeznamBot/3.2-test1-1; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2-test1; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2-test2; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2-test4; +http://napoveda.seznam.cz/en/seznambot-intro/)", "Mozilla/5.0 (compatible; SeznamBot/3.2; +http://napoveda.seznam.cz/en/seznambot-intro/)" ] } , { "pattern": "ec2linkfinder", "addition_date": "2012/03/22", "instances": [ "ec2linkfinder" ] } , { "pattern": "gslfbot", "addition_date": "2012/04/03", "instances": [] } , { "pattern": "aiHitBot", "addition_date": "2012/04/16", "instances": [ "Mozilla/5.0 (compatible; aiHitBot/2.9; +https://www.aihitdata.com/about)" ] } , { "pattern": "intelium_bot", "addition_date": "2012/05/07", "instances": [] } , { "pattern": "facebookexternalhit", "addition_date": "2012/05/07", "instances": [ "facebookexternalhit/1.0 (+http://www.facebook.com/externalhit_uatext.php)", "facebookexternalhit/1.1", "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" ] } , { "pattern": "Yeti", "addition_date": "2012/05/07", "url": "http://naver.me/bot", "instances": [ "Mozilla/5.0 (compatible; Yeti/1.1; +http://naver.me/bot)" ] } , { "pattern": "RetrevoPageAnalyzer", "addition_date": "2012/05/07", "instances": [ "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; RetrevoPageAnalyzer; +http://www.retrevo.com/content/about-us)" ] } , { "pattern": "lb-spider", "addition_date": "2012/05/07", "instances": [] } , { "pattern": "Sogou", "addition_date": "2012/05/13", "url": "http://www.sogou.com/docs/help/webmasters.htm#07", "instances": [ "Sogou News Spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)", "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)" ] } , { "pattern": "lssbot", "addition_date": "2012/05/15", "instances": [] } , { "pattern": "careerbot", "addition_date": "2012/05/23", "url": "http://www.career-x.de/bot.html", "instances": [] } , { "pattern": "wotbox", "addition_date": "2012/06/12", "url": "http://www.wotbox.com", "instances": [ "Wotbox/2.0 (bot@wotbox.com; http://www.wotbox.com)", "Wotbox/2.01 (+http://www.wotbox.com/bot/)" ] } , { "pattern": "wocbot", "addition_date": "2012/07/25", "url": "http://www.wocodi.com/crawler", "instances": [] } , { "pattern": "ichiro", "addition_date": "2012/08/28", "url": "http://help.goo.ne.jp/help/article/1142", "instances": [ "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)", "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "DoCoMo/2.0 P900i(c100;TB;W24H11) (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "DoCoMo/2.0 P900i(c100;TB;W24H11)(compatible; ichiro/mobile goo;+http://help.goo.ne.jp/door/crawler.html)", "DoCoMo/2.0 P901i(c100;TB;W24H11) (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/door/crawler.html)", "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://help.goo.ne.jp/help/article/1142/)", "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo; +http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "KDDI-CA31 UP.Browser/6.2.0.7.3.129 (GUI) MMP/2.0 (compatible; ichiro/mobile goo;+http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "ichiro/2.0 (http://help.goo.ne.jp/door/crawler.html)", "ichiro/2.0 (ichiro@nttr.co.jp)", "ichiro/3.0 (http://help.goo.ne.jp/door/crawler.html)", "ichiro/3.0 (http://help.goo.ne.jp/help/article/1142)", "ichiro/3.0 (http://search.goo.ne.jp/option/use/sub4/sub4-1/)", "ichiro/4.0 (http://help.goo.ne.jp/door/crawler.html)", "ichiro/5.0 (http://help.goo.ne.jp/door/crawler.html)" ] } , { "pattern": "DuckDuckBot", "addition_date": "2012/09/19", "url": "http://duckduckgo.com/duckduckbot.html", "instances": [ "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)", "DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html)" ] } , { "pattern": "lssrocketcrawler", "addition_date": "2012/09/24", "instances": [] } , { "pattern": "drupact", "addition_date": "2012/09/27", "url": "http://www.arocom.de/drupact", "instances": [ "drupact/0.7; http://www.arocom.de/drupact" ] } , { "pattern": "webcompanycrawler", "addition_date": "2012/10/03", "instances": [] } , { "pattern": "acoonbot", "addition_date": "2012/10/07", "url": "http://www.acoon.de/robot.asp", "instances": [] } , { "pattern": "openindexspider", "addition_date": "2012/10/26", "url": "http://www.openindex.io/en/webmasters/spider.html", "instances": [] } , { "pattern": "gnam gnam spider", "addition_date": "2012/10/31", "instances": [] } , { "pattern": "web-archive-net.com.bot", "instances": [] } , { "pattern": "backlinkcrawler", "addition_date": "2013/01/04", "instances": [] } , { "pattern": "coccoc", "addition_date": "2013/01/04", "url": "http://help.coccoc.vn/", "instances": [ "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/)", "Mozilla/5.0 (compatible; coccoc/1.0; +http://help.coccoc.com/searchengine)", "Mozilla/5.0 (compatible; coccocbot-image/1.0; +http://help.coccoc.com/searchengine)", "Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)", "Mozilla/5.0 (compatible; image.coccoc/1.0; +http://help.coccoc.com/)", "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/)", "Mozilla/5.0 (compatible; imagecoccoc/1.0; +http://help.coccoc.com/searchengine)", "coccoc", "coccoc/1.0 ()", "coccoc/1.0 (http://help.coccoc.com/)", "coccoc/1.0 (http://help.coccoc.vn/)" ] } , { "pattern": "integromedb", "addition_date": "2013/01/10", "url": "http://www.integromedb.org/Crawler", "instances": [ "www.integromedb.org/Crawler" ] } , { "pattern": "content crawler spider", "addition_date": "2013/01/11", "instances": [] } , { "pattern": "toplistbot", "addition_date": "2013/02/05", "instances": [] } , { "pattern": "it2media-domain-crawler", "addition_date": "2013/03/12", "instances": [ "it2media-domain-crawler/1.0 on crawler-prod.it2media.de", "it2media-domain-crawler/2.0" ] } , { "pattern": "ip-web-crawler.com", "addition_date": "2013/03/22", "instances": [] } , { "pattern": "siteexplorer.info", "addition_date": "2013/05/01", "instances": [ "Mozilla/5.0 (compatible; SiteExplorer/1.0b; +http://siteexplorer.info/)", "Mozilla/5.0 (compatible; SiteExplorer/1.1b; +http://siteexplorer.info/Backlink-Checker-Spider/)" ] } , { "pattern": "elisabot", "addition_date": "2013/06/27", "instances": [] } , { "pattern": "proximic", "addition_date": "2013/09/12", "url": "http://www.proximic.com/info/spider.php", "instances": [ "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com)", "Mozilla/5.0 (compatible; proximic; +http://www.proximic.com/info/spider.php)" ] } , { "pattern": "changedetection", "addition_date": "2013/09/13", "url": "http://www.changedetection.com/bot.html", "instances": [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://www.changedetection.com/bot.html )" ] } , { "pattern": "arabot", "addition_date": "2013/10/09", "instances": [] } , { "pattern": "WeSEE:Search", "addition_date": "2013/11/18", "instances": [ "WeSEE:Search", "WeSEE:Search/0.1 (Alpha, http://www.wesee.com/en/support/bot/)" ] } , { "pattern": "niki-bot", "addition_date": "2014/01/01", "instances": [] } , { "pattern": "CrystalSemanticsBot", "addition_date": "2014/02/17", "url": "http://www.crystalsemantics.com/user-agent/", "instances": [] } , { "pattern": "rogerbot", "addition_date": "2014/02/28", "url": "http://moz.com/help/pro/what-is-rogerbot-", "instances": [ "Mozilla/5.0 (compatible; rogerBot/1.0; UrlCrawler; http://www.seomoz.org/dp/rogerbot)", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+partager@moz.com)", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+shiny@moz.com)", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com", "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com)", "rogerbot/1.0 (http://www.moz.com/dp/rogerbot, rogerbot-crawler@moz.com)", "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler+shiny@seomoz.org)", "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-crawler@seomoz.org)", "rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot, rogerbot-wherecat@moz.com)", "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr2-crawler-05@moz.com)", "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-11@moz.com)", "rogerbot/1.1 (http://moz.com/help/guides/search-overview/crawl-diagnostics#more-help, rogerbot-crawler+pr4-crawler-15@moz.com)", "rogerbot/1.2 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+phaser-testing-crawler-01@moz.com)" ] } , { "pattern": "360Spider", "addition_date": "2014/03/14", "url": "http://needs-be.blogspot.co.uk/2013/02/how-to-block-spider360.html", "instances": [ "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1; 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36 QIHU 360SE; 360Spider", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; ) Firefox/1.5.0.11; 360Spider", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Firefox/1.5.0.11; 360Spider", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Firefox/1.5.0.11 360Spider;", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.0.11) Gecko/20070312 Firefox/1.5.0.11; 360Spider", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0); 360Spider(compatible; HaosouSpider; http://www.haosou.com/help/help_3_2.html)" ] } , { "pattern": "psbot", "addition_date": "2014/03/31", "url": "http://www.picsearch.com/bot.html", "instances": [ "psbot-image (+http://www.picsearch.com/bot.html)", "psbot-page (+http://www.picsearch.com/bot.html)", "psbot/0.1 (+http://www.picsearch.com/bot.html)" ] } , { "pattern": "InterfaxScanBot", "addition_date": "2014/03/31", "url": "http://scan-interfax.ru", "instances": [] } , { "pattern": "CC Metadata Scaper", "addition_date": "2014/04/01", "url": "http://wiki.creativecommons.org/Metadata_Scraper", "instances": [ "CC Metadata Scaper http://wiki.creativecommons.org/Metadata_Scraper" ] } , { "pattern": "g00g1e.net", "addition_date": "2014/04/01", "url": "http://www.g00g1e.net/", "instances": [] } , { "pattern": "GrapeshotCrawler", "addition_date": "2014/04/01", "url": "http://www.grapeshot.co.uk/crawler.php", "instances": [ "Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)" ] } , { "pattern": "urlappendbot", "addition_date": "2014/05/10", "url": "http://www.profound.net/urlappendbot.html", "instances": [ "Mozilla/5.0 (compatible; URLAppendBot/1.0; +http://www.profound.net/urlappendbot.html)" ] } , { "pattern": "brainobot", "addition_date": "2014/06/24", "instances": [] } , { "pattern": "fr-crawler", "addition_date": "2014/07/31", "instances": [ "Mozilla/5.0 (compatible; fr-crawler/1.1)" ] } , { "pattern": "binlar", "addition_date": "2014/09/12", "instances": [ "binlar_2.6.3 binlar2.6.3@unspecified.mail", "binlar_2.6.3 binlar_2.6.3@unspecified.mail", "binlar_2.6.3 larbin2.6.3@unspecified.mail", "binlar_2.6.3 phanendra_kalapala@McAfee.com", "binlar_2.6.3 test@mgmt.mic" ] } , { "pattern": "SimpleCrawler", "addition_date": "2014/09/12", "instances": [ "SimpleCrawler/0.1" ] } , { "pattern": "Twitterbot", "addition_date": "2014/09/12", "url": "https://dev.twitter.com/cards/getting-started", "instances": [ "Twitterbot/0.1", "Twitterbot/1.0" ] } , { "pattern": "cXensebot", "addition_date": "2014/10/05", "instances": [ "cXensebot/1.1a" ], "url": "http://www.cxense.com/bot.html" } , { "pattern": "smtbot", "addition_date": "2014/10/04", "instances": [ "Mozilla/5.0 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot)", "SMTBot (similartech.com/smtbot)" ], "url": "http://www.similartech.com/smtbot" } , { "pattern": "bnf.fr_bot", "addition_date": "2014/11/18", "url": "http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html", "instances": [ "Mozilla/5.0 (compatible; bnf.fr_bot; +http://www.bnf.fr/fr/outils/a.dl_web_capture_robot.html)" ] } , { "pattern": "A6-Indexer", "addition_date": "2014/12/05", "url": "http://www.a6corp.com/a6-web-scraping-policy/", "instances": [ "A6-Indexer" ] } , { "pattern": "ADmantX", "addition_date": "2014/12/05", "url": "http://www.admantx.com", "instances": [ "ADmantX Platform Semantic Analyzer - ADmantX Inc. - www.admantx.com - support@admantx.com" ] } , { "pattern": "Facebot", "url": "https://developers.facebook.com/docs/sharing/best-practices#crawl", "addition_date": "2014/12/30", "instances": [ "Facebot/1.0" ] } , { "pattern": "OrangeBot\\/", "instances": [ "Mozilla/5.0 (compatible; OrangeBot/2.0; support.orangebot@orange.com" ], "addition_date": "2015/01/12" } , { "pattern": "memorybot", "url": "http://mignify.com/bot.htm", "instances": [ "Mozilla/5.0 (compatible; memorybot/1.21.14 +http://mignify.com/bot.html)" ], "addition_date": "2015/02/01" } , { "pattern": "AdvBot", "url": "http://advbot.net/bot.html", "instances": [ "Mozilla/5.0 (compatible; AdvBot/2.0; +http://advbot.net/bot.html)" ], "addition_date": "2015/02/01" } , { "pattern": "MegaIndex", "url": "https://www.megaindex.ru/?tab=linkAnalyze", "instances": [ "Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +https://www.megaindex.ru/?tab=linkAnalyze)" ], "addition_date": "2015/03/28" } , { "pattern": "SemanticScholarBot", "url": "http://s2.allenai.org/bot.html", "instances": [ "SemanticScholarBot/1.0 (+http://s2.allenai.org/bot.html)" ], "addition_date": "2015/03/28" } , { "pattern": "ltx71", "url": "http://ltx71.com/", "instances": [ "ltx71 - (http://ltx71.com/)" ], "addition_date": "2015/04/04" } , { "pattern": "nerdybot", "url": "http://nerdybot.com/", "instances": [ "nerdybot" ], "addition_date": "2015/04/05" } , { "pattern": "xovibot", "url": "http://www.xovibot.net/", "instances": [ "Mozilla/5.0 (compatible; XoviBot/2.0; +http://www.xovibot.net/)" ], "addition_date": "2015/04/05" } , { "pattern": "BUbiNG", "url": "http://law.di.unimi.it/BUbiNG.html", "instances": [ "BUbiNG (+http://law.di.unimi.it/BUbiNG.html)" ], "addition_date": "2015/04/06" } , { "pattern": "Qwantify", "url": "https://www.qwant.com/", "instances": [ "Mozilla/5.0 (compatible; Qwantify/2.0n; +https://www.qwant.com/)/*" ], "addition_date": "2015/04/06" } , { "pattern": "archive.org_bot", "url": "http://www.archive.org/details/archive.org_bot", "instances": [ "Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)" ], "addition_date": "2015/04/14" } , { "pattern": "Applebot", "url": "http://www.apple.com/go/applebot", "addition_date": "2015/04/15", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)", "Mozilla/5.0 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)", "Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25 (compatible; Applebot/0.3; +http://www.apple.com/go/applebot)", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B410 Safari/600.1.4 (Applebot/0.1; +http://www.apple.com/go/applebot)" ] } , { "pattern": "TweetmemeBot", "url": "http://datasift.com/bot.html", "instances": [ "Mozilla/5.0 (TweetmemeBot/4.0; +http://datasift.com/bot.html) Gecko/20100101 Firefox/31.0" ], "addition_date": "2015/04/15" } , { "pattern": "crawler4j", "url": "https://github.com/yasserg/crawler4j", "instances": [ "crawler4j (http://code.google.com/p/crawler4j/)" ], "addition_date": "2015/05/07" } , { "pattern": "findxbot", "url": "http://www.findxbot.com", "instances": [ "Mozilla/5.0 (compatible; Findxbot/1.0; +http://www.findxbot.com)" ], "addition_date": "2015/05/07" } , { "pattern": "S[eE][mM]rushBot", "url": "http://www.semrush.com/bot.html", "instances": [ "Mozilla/5.0 (compatible; SemrushBot/0.98~bl; +http://www.semrush.com/bot.html)", "SEMrushBot" ], "addition_date": "2015/05/26" } , { "pattern": "yoozBot", "url": "http://yooz.ir", "instances": [ "Mozilla/5.0 (compatible; yoozBot-2.2; http://yooz.ir; info@yooz.ir)" ], "addition_date": "2015/05/26" } , { "pattern": "lipperhey", "url": "http://www.lipperhey.com/", "instances": [ "Mozilla/5.0 (compatible; Lipperhey Link Explorer; http://www.lipperhey.com/)", "Mozilla/5.0 (compatible; Lipperhey SEO Service; http://www.lipperhey.com/)", "Mozilla/5.0 (compatible; Lipperhey Site Explorer; http://www.lipperhey.com/)", "Mozilla/5.0 (compatible; Lipperhey-Kaus-Australis/5.0; +https://www.lipperhey.com/en/about/)" ], "addition_date": "2015/08/26" } , { "pattern": "Y!J", "url": "https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/%E3%82%A6%E3%82%A7%E3%83%96%E3%83%9A%E3%83%BC%E3%82%B8%E3%81%AB%E3%82%A2%E3%82%AF%E3%82%BB%E3%82%B9%E3%81%99%E3%82%8B%E3%82%B7%E3%82%B9%E3%83%86%E3%83%A0%E3%81%AE%E3%83%A6%E3%83%BC%E3%82%B6%E3%83%BC%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88%E3%81%AB%E3%81%A4%E3%81%84%E3%81%A6", "instances": [ "Y!J-ASR/0.1 crawler (http://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/)", "Y!J-BRJ/YATS crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Y!J-PSC/1.0 crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Y!J-BRW/1.0 crawler (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Mozilla/5.0 (iPhone; Y!J-BRY/YATSH crawler; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)", "Mozilla/5.0 (compatible; Y!J SearchMonkey/1.0 (Y!J-AGENT; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html))" ], "addition_date": "2015/05/26" } , { "pattern": "Domain Re-Animator Bot", "url": "http://domainreanimator.com", "instances": [ "Domain Re-Animator Bot (http://domainreanimator.com) - support@domainreanimator.com" ], "addition_date": "2015/04/14" } , { "pattern": "AddThis", "url": "https://www.addthis.com", "instances": [ "AddThis.com robot tech.support@clearspring.com" ], "addition_date": "2015/06/02" } , { "pattern": "Screaming Frog SEO Spider", "url": "http://www.screamingfrog.co.uk/seo-spider", "instances": [ "Screaming Frog SEO Spider/5.1" ], "addition_date": "2016/01/08" } , { "pattern": "MetaURI", "url": "http://www.useragentstring.com/MetaURI_id_17683.php", "instances": [ "MetaURI API/2.0 +metauri.com" ], "addition_date": "2016/01/02" } , { "pattern": "Scrapy", "url": "http://scrapy.org/", "instances": [ "Scrapy/1.0.3 (+http://scrapy.org)" ], "addition_date": "2016/01/02" } , { "pattern": "Livelap[bB]ot", "url": "http://site.livelap.com/crawler", "instances": [ "LivelapBot/0.2 (http://site.livelap.com/crawler)", "Livelapbot/0.1" ], "addition_date": "2016/01/02" } , { "pattern": "OpenHoseBot", "url": "http://www.openhose.org/bot.html", "instances": [ "Mozilla/5.0 (compatible; OpenHoseBot/2.1; +http://www.openhose.org/bot.html)" ], "addition_date": "2016/01/02" } , { "pattern": "CapsuleChecker", "url": "http://www.capsulink.com/about", "instances": [ "CapsuleChecker (http://www.capsulink.com/)" ], "addition_date": "2016/01/02" } , { "pattern": "collection@infegy.com", "url": "http://infegy.com/", "instances": [ "Mozilla/5.0 (compatible) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36 collection@infegy.com" ], "addition_date": "2016/01/03" } , { "pattern": "IstellaBot", "url": "http://www.tiscali.it/", "instances": [ "Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)" ], "addition_date": "2016/01/09" } , { "pattern": "DeuSu\\/", "addition_date": "2016/01/23", "url": "https://deusu.de/robot.html", "instances": [ "Mozilla/5.0 (compatible; DeuSu/0.1.0; +https://deusu.org)", "Mozilla/5.0 (compatible; DeuSu/5.0.2; +https://deusu.de/robot.html)" ] } , { "pattern": "betaBot", "addition_date": "2016/01/23", "instances": [] } , { "pattern": "Cliqzbot\\/", "addition_date": "2016/01/23", "url": "http://cliqz.com/company/cliqzbot", "instances": [ "Cliqzbot/0.1 (+http://cliqz.com +cliqzbot@cliqz.com)", "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)", "Mozilla/5.0 (compatible; Cliqzbot/0.1 +http://cliqz.com/company/cliqzbot)", "Mozilla/5.0 (compatible; Cliqzbot/1.0 +http://cliqz.com/company/cliqzbot)" ] } , { "pattern": "MojeekBot\\/", "addition_date": "2016/01/23", "url": "https://www.mojeek.com/bot.html", "instances": [ "MojeekBot/0.2 (archi; http://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html#relaunch)", "Mozilla/5.0 (compatible; MojeekBot/0.2; http://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.5; http://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.6; +https://www.mojeek.com/bot.html)", "Mozilla/5.0 (compatible; MojeekBot/0.6; http://www.mojeek.com/bot.html)" ] } , { "pattern": "netEstate NE Crawler", "addition_date": "2016/01/23", "url": "+http://www.website-datenbank.de/", "instances": [ "netEstate NE Crawler (+http://www.sengine.info/)", "netEstate NE Crawler (+http://www.website-datenbank.de/)" ] } , { "pattern": "SafeSearch microdata crawler", "addition_date": "2016/01/23", "url": "https://safesearch.avira.com", "instances": [ "SafeSearch microdata crawler (https://safesearch.avira.com, safesearch-abuse@avira.com)" ] } , { "pattern": "Gluten Free Crawler\\/", "addition_date": "2016/01/23", "url": "http://glutenfreepleasure.com/", "instances": [ "Mozilla/5.0 (compatible; Gluten Free Crawler/1.0; +http://glutenfreepleasure.com/)" ] } , { "pattern": "Sonic", "addition_date": "2016/02/08", "url": "http://www.yama.info.waseda.ac.jp/~crawler/info.html", "instances": [ "Mozilla/5.0 (compatible; RankSonicSiteAuditor/1.0; +https://ranksonic.com/ranksonic_sab.html)", "Mozilla/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)", "Mozzila/5.0 (compatible; Sonic/1.0; http://www.yama.info.waseda.ac.jp/~crawler/info.html)" ] } , { "pattern": "Sysomos", "addition_date": "2016/02/08", "url": "http://www.sysomos.com", "instances": [ "Mozilla/5.0 (compatible; Sysomos/1.0; +http://www.sysomos.com/; Sysomos)" ] } , { "pattern": "Trove", "addition_date": "2016/02/08", "url": "http://www.trove.com", "instances": [] } , { "pattern": "deadlinkchecker", "addition_date": "2016/02/08", "url": "http://www.deadlinkchecker.com", "instances": [ "www.deadlinkchecker.com Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36", "www.deadlinkchecker.com XMLHTTP/1.0", "www.deadlinkchecker.com XMLHTTP/1.0 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36" ] } , { "pattern": "Slack-ImgProxy", "addition_date": "2016/04/25", "url": "https://api.slack.com/robots", "instances": [ "Slack-ImgProxy (+https://api.slack.com/robots)", "Slack-ImgProxy 0.59 (+https://api.slack.com/robots)", "Slack-ImgProxy 0.66 (+https://api.slack.com/robots)", "Slack-ImgProxy 1.106 (+https://api.slack.com/robots)", "Slack-ImgProxy 1.138 (+https://api.slack.com/robots)", "Slack-ImgProxy 149 (+https://api.slack.com/robots)" ] } , { "pattern": "Embedly", "addition_date": "2016/04/25", "url": "http://support.embed.ly", "instances": [ "Embedly +support@embed.ly", "Mozilla/5.0 (compatible; Embedly/0.2; +http://support.embed.ly/)", "Mozilla/5.0 (compatible; Embedly/0.2; snap; +http://support.embed.ly/)" ] } , { "pattern": "RankActiveLinkBot", "addition_date": "2016/06/20", "url": "https://rankactive.com/resources/rankactive-linkbot", "instances": [ "Mozilla/5.0 (compatible; RankActiveLinkBot; +https://rankactive.com/resources/rankactive-linkbot)" ] } , { "pattern": "iskanie", "addition_date": "2016/09/02", "url": "http://www.iskanie.com", "instances": [ "iskanie (+http://www.iskanie.com)" ] } , { "pattern": "SafeDNSBot", "addition_date": "2016/09/10", "url": "https://www.safedns.com/searchbot", "instances": [ "SafeDNSBot (https://www.safedns.com/searchbot)" ] } , { "pattern": "SkypeUriPreview", "addition_date": "2016/10/10", "instances": [ "Mozilla/5.0 (Windows NT 6.1; WOW64) SkypeUriPreview Preview/0.5" ] } , { "pattern": "Veoozbot", "addition_date": "2016/11/03", "url": "http://www.veooz.com/veoozbot.html", "instances": [ "Mozilla/5.0 (compatible; Veoozbot/1.0; +http://www.veooz.com/veoozbot.html)" ] } , { "pattern": "Slackbot", "addition_date": "2016/11/03", "url": "https://api.slack.com/robots", "instances": [ "Slackbot-LinkExpanding (+https://api.slack.com/robots)", "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)" ] } , { "pattern": "redditbot", "addition_date": "2016/11/03", "url": "http://www.reddit.com/feedback", "instances": [ "Mozilla/5.0 (compatible; redditbot/1.0; +http://www.reddit.com/feedback)" ] } , { "pattern": "datagnionbot", "addition_date": "2016/11/03", "url": "http://www.datagnion.com/bot.html", "instances": [ "datagnionbot (+http://www.datagnion.com/bot.html)" ] } , { "pattern": "Google-Adwords-Instant", "addition_date": "2016/11/03", "url": "http://www.google.com/adsbot.html", "instances": [ "Google-Adwords-Instant (+http://www.google.com/adsbot.html)" ] } , { "pattern": "adbeat_bot", "addition_date": "2016/11/04", "instances": [ "Mozilla/5.0 (compatible; adbeat_bot; +support@adbeat.com; support@adbeat.com)", "adbeat_bot" ] } , { "pattern": "WhatsApp", "addition_date": "2016/11/15", "url": "https://www.whatsapp.com/", "instances": [ "WhatsApp", "WhatsApp/2.12.15/i", "WhatsApp/2.12.16/i", "WhatsApp/2.12.17/i", "WhatsApp/2.12.449 A", "WhatsApp/2.12.453 A", "WhatsApp/2.12.510 A", "WhatsApp/2.12.540 A", "WhatsApp/2.12.548 A", "WhatsApp/2.12.555 A", "WhatsApp/2.12.556 A", "WhatsApp/2.16.1/i", "WhatsApp/2.16.13 A", "WhatsApp/2.16.2/i", "WhatsApp/2.16.42 A", "WhatsApp/2.16.57 A" ] } , { "pattern": "contxbot", "addition_date": "2017/02/25", "instances": [ "Mozilla/5.0 (compatible;contxbot/1.0)" ] } , { "pattern": "pinterest", "addition_date": "2017/03/03", "instances": [ "Pinterest/0.2 (+http://www.pinterest.com/bot.html)" ], "url": "http://www.pinterest.com/bot.html" } , { "pattern": "electricmonk", "addition_date": "2017/03/04", "instances": [ "Mozilla/5.0 (compatible; electricmonk/3.2.0 +https://www.duedil.com/our-crawler/)" ], "url": "https://www.duedil.com/our-crawler/" } , { "pattern": "GarlikCrawler", "addition_date": "2017/03/18", "instances": [ "GarlikCrawler/1.2 (http://garlik.com/, crawler@garlik.com)" ], "url": "http://garlik.com/" } , { "pattern": "BingPreview\\/", "addition_date": "2017/04/23", "url": "https://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0", "instances": [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b", "Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0; BingPreview/1.0b) like Gecko", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Trident/6.0; WOW64; Trident/6.0; BingPreview/1.0b)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; WOW64; Trident/5.0; BingPreview/1.0b)", "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 BingPreview/1.0b" ] } , { "pattern": "vebidoobot", "addition_date": "2017/05/08", "instances": [ "Mozilla/5.0 (compatible; vebidoobot/1.0; +https://blog.vebidoo.de/vebidoobot/" ], "url": "https://blog.vebidoo.de/vebidoobot/" } , { "pattern": "FemtosearchBot", "addition_date": "2017/05/16", "instances": [ "Mozilla/5.0 (compatible; FemtosearchBot/1.0; http://femtosearch.com)" ], "url": "http://femtosearch.com" } , { "pattern": "Yahoo Link Preview", "addition_date": "2017/06/28", "instances": [ "Mozilla/5.0 (compatible; Yahoo Link Preview; https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html)" ], "url": "https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html" } , { "pattern": "MetaJobBot", "addition_date": "2017/08/16", "instances": [ "Mozilla/5.0 (compatible; MetaJobBot; http://www.metajob.de/crawler)" ], "url": "http://www.metajob.de/the/crawler" } , { "pattern": "DomainStatsBot", "addition_date": "2017/08/16", "instances": [ "DomainStatsBot/1.0 (http://domainstats.io/our-bot)" ], "url": "http://domainstats.io/our-bot" } , { "pattern": "mindUpBot", "addition_date": "2017/08/16", "instances": [ "mindUpBot (datenbutler.de)" ], "url": "http://www.datenbutler.de/" } , { "pattern": "Daum\\/", "addition_date": "2017/08/16", "instances": [ "Mozilla/5.0 (compatible; Daum/4.1; +http://cs.daum.net/faq/15/4118.html?faqId=28966)" ], "url": "http://cs.daum.net/faq/15/4118.html?faqId=28966" } , { "pattern": "Jugendschutzprogramm-Crawler", "addition_date": "2017/08/16", "instances": [ "Jugendschutzprogramm-Crawler; Info: http://www.jugendschutzprogramm.de" ], "url": "http://www.jugendschutzprogramm.de" } , { "pattern": "Xenu Link Sleuth", "addition_date": "2017/08/19", "instances": [ "Xenu Link Sleuth/1.3.8" ], "url": "http://home.snafu.de/tilman/xenulink.html" } , { "pattern": "Pcore-HTTP", "addition_date": "2017/08/19", "instances": [ "Pcore-HTTP/v0.40.3" ], "url": "https://bitbucket.org/softvisio/pcore/overview" } , { "pattern": "moatbot", "addition_date": "2017/09/16", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36 moatbot", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0 Mobile/12A4345d Safari/600.1.4 moatbot" ], "url": "https://moat.com" } , { "pattern": "KosmioBot", "addition_date": "2017/09/16", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36 (compatible; KosmioBot/1.0; +http://kosm.io/bot.html)" ], "url": "http://kosm.io/bot.html" } , { "pattern": "pingdom", "addition_date": "2017/09/16", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/59.0.3071.109 Chrome/59.0.3071.109 Safari/537.36 PingdomPageSpeed/1.0 (pingbot/2.0; +http://www.pingdom.com/)", "Mozilla/5.0 (compatible; pingbot/2.0; +http://www.pingdom.com/)" ], "url": "http://www.pingdom.com" } , { "pattern": "PhantomJS", "addition_date": "2017/09/18", "instances": [ "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1 bl.uk_lddc_renderbot/2.0.0 (+ http://www.bl.uk/aboutus/legaldeposit/websites/websites/faqswebmaster/index.html)" ], "url": "http://phantomjs.org/" } , { "pattern": "Gowikibot", "addition_date": "2017/10/26", "instances": [ "Mozilla/5.0 (compatible; Gowikibot/1.0; +http://www.gowikibot.com)" ], "url": "http://www.gowikibot.com" } , { "pattern": "PiplBot", "addition_date": "2017/10/30", "instances": [ "Mozilla/5.0+(compatible;+PiplBot;+http://www.pipl.com/bot/)" ], "url": "http://www.pipl.com/bot/" } , { "pattern": "Discordbot", "addition_date": "2017/09/22", "url": "https://discordapp.com", "instances": [ "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)" ] } , { "pattern": "TelegramBot", "addition_date": "2017/10/01", "instances": [ "TelegramBot (like TwitterBot)" ] } , { "pattern": "Jetslide", "addition_date": "2017/09/27", "url": "http://jetsli.de/crawler", "instances": [ "Mozilla/5.0 (compatible; Jetslide; +http://jetsli.de/crawler)" ] } , { "pattern": "newsharecounts", "addition_date": "2017/09/30", "url": "http://newsharecounts.com/crawler", "instances": [ "Mozilla/5.0 (compatible; NewShareCounts.com/1.0; +http://newsharecounts.com/crawler)" ] } , { "pattern": "James BOT", "addition_date": "2017/10/12", "url": "http://cognitiveseo.com/bot.html", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6 - James BOT - WebCrawler http://cognitiveseo.com/bot.html" ] } , { "pattern": "Barkrowler", "addition_date": "2017/10/09", "url": "http://www.exensa.com/crawl", "instances": [ "Barkrowler/0.5.1 (experimenting / debugging - sorry for your logs ) http://www.exensa.com/crawl - admin@exensa.com -- based on BuBiNG", "Barkrowler/0.7 (+http://www.exensa.com/crawl)" ] } , { "pattern": "TinEye", "addition_date": "2017/10/14", "url": "http://www.tineye.com/crawler.html", "instances": [ "Mozilla/5.0 (compatible; TinEye-bot/1.31; +http://www.tineye.com/crawler.html)", "TinEye/1.1 (http://tineye.com/crawler.html)" ] } , { "pattern": "SocialRankIOBot", "addition_date": "2017/10/19", "url": "http://socialrank.io/about", "instances": [ "SocialRankIOBot; http://socialrank.io/about" ] } , { "pattern": "trendictionbot", "addition_date": "2017/10/30", "url": "http://www.trendiction.de/bot", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11" ] } , { "pattern": "Ocarinabot", "addition_date": "2017/09/27", "instances": [ "Ocarinabot" ] } , { "pattern": "epicbot", "addition_date": "2017/10/31", "url": "http://www.epictions.com/epicbot", "instances": [ "Mozilla/5.0 (compatible; epicbot; +http://www.epictions.com/epicbot)" ] } , { "pattern": "Primalbot", "addition_date": "2017/09/27", "url": "https://www.primal.com", "instances": [ "Mozilla/5.0 (compatible; Primalbot; +https://www.primal.com;)" ] } , { "pattern": "DuckDuckGo-Favicons-Bot", "addition_date": "2017/10/06", "url": "http://duckduckgo.com", "instances": [ "Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com)" ] } , { "pattern": "GnowitNewsbot", "addition_date": "2017/10/30", "url": "http://www.gnowit.com", "instances": [ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0 / GnowitNewsbot / Contact information at http://www.gnowit.com" ] } , { "pattern": "Leikibot", "addition_date": "2017/09/24", "url": "http://www.leiki.com", "instances": [ "Mozilla/5.0 (Windows NT 6.3;compatible; Leikibot/1.0; +http://www.leiki.com)" ] } , { "pattern": "LinkArchiver", "addition_date": "2017/09/24", "instances": [ "@LinkArchiver twitter bot" ] } , { "pattern": "YaK\\/", "addition_date": "2017/09/25", "url": "http://linkfluence.com", "instances": [ "Mozilla/5.0 (compatible; YaK/1.0; http://linkfluence.com/; bot@linkfluence.com)" ] } , { "pattern": "PaperLiBot", "addition_date": "2017/09/25", "url": "http://support.paper.li/entries/20023257-what-is-paper-li", "instances": [ "Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li)" ] } , { "pattern": "Digg Deeper", "addition_date": "2017/09/26", "url": "http://digg.com/about", "instances": [ "Digg Deeper/v1 (http://digg.com/about)" ] } , { "pattern": "dcrawl", "addition_date": "2017/09/22", "instances": [ "dcrawl/1.0" ] } , { "pattern": "Snacktory", "addition_date": "2017/09/23", "url": "https://github.com/karussell/snacktory", "instances": [ "Mozilla/5.0 (compatible; Snacktory; +https://github.com/karussell/snacktory)" ] } , { "pattern": "AndersPinkBot", "addition_date": "2017/09/24", "url": "http://anderspink.com/bot.html", "instances": [ "Mozilla/5.0 (compatible; AndersPinkBot/1.0; +http://anderspink.com/bot.html)" ] } , { "pattern": "Fyrebot", "addition_date": "2017/09/22", "instances": [ "Fyrebot/1.0" ] } , { "pattern": "EveryoneSocialBot", "addition_date": "2017/09/22", "url": "http://everyonesocial.com", "instances": [ "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)" ] } , { "pattern": "Mediatoolkitbot", "addition_date": "2017/10/06", "url": "http://mediatoolkit.com", "instances": [ "Mediatoolkitbot (complaints@mediatoolkit.com)" ] } , { "pattern": "Luminator-robots", "addition_date": "2017/09/22", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/30.0.1599.66 Safari/537.13 Luminator-robots/2.0" ] } , { "pattern": "ExtLinksBot", "addition_date": "2017/11/02", "url": "https://extlinks.com/Bot.html", "instances": [ "Mozilla/5.0 (compatible; ExtLinksBot/1.5 +https://extlinks.com/Bot.html)" ] } , { "pattern": "SurveyBot", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.13) Gecko/2009073022 Firefox/3.5.2 (.NET CLR 3.5.30729) SurveyBot/2.3 (DomainTools)" ] } , { "pattern": "NING\\/", "addition_date": "2017/11/02", "instances": [ "NING/1.0" ] } , { "pattern": "okhttp", "addition_date": "2017/11/02", "instances": [ "okhttp/2.5.0", "okhttp/2.7.5", "okhttp/3.2.0", "okhttp/3.5.0" ] } , { "pattern": "Nuzzel", "addition_date": "2017/11/02", "instances": [ "Nuzzel" ] } , { "pattern": "omgili", "addition_date": "2017/11/02", "url": "http://omgili.com", "instances": [ "omgili/0.5 +http://omgili.com" ] } , { "pattern": "PocketParser", "addition_date": "2017/11/02", "url": "https://getpocket.com/pocketparser_ua", "instances": [ "PocketParser/2.0 (+https://getpocket.com/pocketparser_ua)" ] } , { "pattern": "YisouSpider", "addition_date": "2017/11/02", "instances": [ "YisouSpider" ] } , { "pattern": "um-LN", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (compatible; um-LN/1.0; mailto: techinfo@ubermetrics-technologies.com)" ] } , { "pattern": "ToutiaoSpider", "addition_date": "2017/11/02", "url": "http://web.toutiao.com/media_cooperation/", "instances": [ "Mozilla/5.0 (compatible; ToutiaoSpider/1.0; http://web.toutiao.com/media_cooperation/;)" ] } , { "pattern": "MuckRack", "addition_date": "2017/11/02", "url": "http://muckrack.com", "instances": [ "Mozilla/5.0 (compatible; MuckRack/1.0; +http://muckrack.com)" ] } , { "pattern": "Jamie's Spider", "addition_date": "2017/11/02", "url": "http://jamiembrown.com/", "instances": [ "Jamie's Spider (http://jamiembrown.com/)" ] } , { "pattern": "AHC\\/", "addition_date": "2017/11/02", "instances": [ "AHC/2.0" ] } , { "pattern": "NetcraftSurveyAgent", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (compatible; NetcraftSurveyAgent/1.0; +info@netcraft.com)" ] } , { "pattern": "Laserlikebot", "addition_date": "2017/11/02", "instances": [ "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4 (compatible; Laserlikebot/0.1)" ] } , { "pattern": "Apache-HttpClient", "addition_date": "2017/11/02", "instances": [ "Apache-HttpClient/4.2.3 (java 1.5)", "Apache-HttpClient/4.2.5 (java 1.5)", "Apache-HttpClient/4.3.1 (java 1.5)", "Apache-HttpClient/4.3.3 (java 1.5)", "Apache-HttpClient/4.3.5 (java 1.5)", "Apache-HttpClient/4.4.1 (Java/1.8.0_65)", "Apache-HttpClient/4.5.3 (Java/1.8.0_121)" ] } , { "pattern": "AppEngine-Google", "addition_date": "2017/11/02", "instances": [ "AppEngine-Google; (+http://code.google.com/appengine; appid: example)" ] } , { "pattern": "Jetty", "addition_date": "2017/11/02", "instances": [ "Jetty/9.3.z-SNAPSHOT" ] } , { "pattern": "Upflow", "addition_date": "2017/11/02", "instances": [ "Upflow/1.0" ] } , { "pattern": "Thinklab", "addition_date": "2017/11/02", "url": "thinklab.com", "instances": [ "Thinklab (thinklab.com)" ] } , { "pattern": "Traackr.com", "addition_date": "2017/11/02", "url": "Traackr.com", "instances": [ "Traackr.com" ] } , { "pattern": "Twurly", "addition_date": "2017/11/02", "url": "http://twurly.org", "instances": [ "Ruby, Twurly v1.1 (http://twurly.org)" ] } , { "pattern": "Mastodon", "addition_date": "2017/11/02", "instances": [ "http.rb/2.2.2 (Mastodon/1.5.1; +https://example-masto-instance.org/)" ] } , { "pattern": "http_get", "addition_date": "2017/11/02", "instances": [ "http_get" ] } , { "pattern": "DnyzBot", "addition_date": "2017/11/20", "instances": [ "Mozilla/5.0 (compatible; DnyzBot/1.0)", "Mozilla/5.0 (compatible; DnyzBot/1.0) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/64.0.3282.167 Safari/537.36", "Mozilla/5.0 (compatible; DnyzBot/1.0) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/64.0.3264.0 Safari/537.36" ] } , { "pattern": "botify", "addition_date": "2018/02/01", "instances": [ "Mozilla/5.0 (compatible; botify; http://botify.com)" ] } , { "pattern": "007ac9 Crawler", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; 007ac9 Crawler; http://crawler.007ac9.net/)" ] } , { "pattern": "BehloolBot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; BehloolBot/beta; +http://www.webeaver.com/bot)" ] } , { "pattern": "BrandVerity", "addition_date": "2018/02/27", "instances": [ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/55.0 BrandVerity/1.0 (http://www.brandverity.com/why-is-brandverity-visiting-me)" ] } , { "pattern": "check_http", "addition_date": "2018/02/09", "instances": [ "check_http/v2.2.1 (nagios-plugins 2.2.1)" ] } , { "pattern": "BDCbot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (Windows NT 6.1; compatible; BDCbot/1.0; +http://bigweb.bigdatacorp.com.br/faq.aspx) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36" ] } , { "pattern": "ZumBot", "addition_date": "2018/02/09", "instances": [ "Mozilla/5.0 (compatible; ZumBot/1.0; http://help.zum.com/inquiry)" ] } , { "pattern": "EZID", "addition_date": "2018/02/09", "instances": [ "EZID (EZID link checker; https://ezid.cdlib.org/)" ] } , { "pattern": "ICC-Crawler", "addition_date": "2018/02/28", "instances": [ "ICC-Crawler/2.0 (Mozilla-compatible; ; http://ucri.nict.go.jp/en/icccrawler.html)" ], "url": "http://ucri.nict.go.jp/en/icccrawler.html" } , { "pattern": "ArchiveBot", "addition_date": "2018/02/28", "instances": [ "ArchiveTeam ArchiveBot/20170106.02 (wpull 2.0.2)" ], "url": "https://github.com/ArchiveTeam/ArchiveBot" } , { "pattern": "^LCC ", "addition_date": "2018/02/28", "instances": [ "LCC (+http://corpora.informatik.uni-leipzig.de/crawler_faq.html)" ], "url": "http://corpora.informatik.uni-leipzig.de/crawler_faq.html" } , { "pattern": "filterdb.iss.net\\/crawler", "addition_date": "2018/03/16", "instances": [ "Mozilla/5.0 (compatible; oBot/2.3.1; +http://filterdb.iss.net/crawler/)" ], "url": "http://filterdb.iss.net/crawler/" } , { "pattern": "BLP_bbot", "addition_date": "2018/03/27", "instances": [ "BLP_bbot/0.1" ] } , { "pattern": "BomboraBot", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; BomboraBot/1.0; +http://www.bombora.com/bot)" ], "url": "http://www.bombora.com/bot" } , { "pattern": "Buck\\/", "addition_date": "2018/03/27", "instances": [ "Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html)" ], "url": "https://app.hypefactors.com/media-monitoring/about.html" } , { "pattern": "Companybook-Crawler", "addition_date": "2018/03/27", "instances": [ "Companybook-Crawler (+https://www.companybooknetworking.com/)" ], "url": "https://www.companybooknetworking.com/" } , { "pattern": "Genieo", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; Genieo/1.0 http://www.genieo.com/webfilter.html)" ], "url": "http://www.genieo.com/webfilter.html" } , { "pattern": "magpie-crawler", "addition_date": "2018/03/27", "instances": [ "magpie-crawler/1.1 (U; Linux amd64; en-GB; +http://www.brandwatch.net)" ], "url": "http://www.brandwatch.net" } , { "pattern": "MeltwaterNews", "addition_date": "2018/03/27", "instances": [ "MeltwaterNews www.meltwater.com" ], "url": "http://www.meltwater.com" } , { "pattern": "Moreover", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 Moreover/5.1 (+http://www.moreover.com)" ], "url": "http://www.moreover.com" } , { "pattern": "newspaper\\/", "addition_date": "2018/03/27", "instances": [ "newspaper/0.2.5", "newspaper/0.2.6", "newspaper/0.1.0.7" ] } , { "pattern": "ScoutJet", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)" ], "url": "http://www.scoutjet.com/" } , { "pattern": "(^| )sentry\\/", "addition_date": "2018/03/27", "instances": [ "sentry/8.22.0 (https://sentry.io)" ], "url": "https://sentry.io" } , { "pattern": "StorygizeBot", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0 (compatible; StorygizeBot; http://www.storygize.com)" ], "url": "http://www.storygize.com" } , { "pattern": "UptimeRobot", "addition_date": "2018/03/27", "instances": [ "Mozilla/5.0+(compatible; UptimeRobot/2.0; http://www.uptimerobot.com/)" ], "url": "http://www.uptimerobot.com/" } , { "pattern": "OutclicksBot", "addition_date": "2018/04/21", "instances": [ "OutclicksBot/2 +https://www.outclicks.net/agent/VjzDygCuk4ubNmg40ZMbFqT0sIh7UfOKk8s8ZMiupUR", "OutclicksBot/2 +https://www.outclicks.net/agent/gIYbZ38dfAuhZkrFVl7sJBFOUhOVct6J1SvxgmBZgCe", "OutclicksBot/2 +https://www.outclicks.net/agent/PryJzTl8POCRHfvEUlRN5FKtZoWDQOBEvFJ2wh6KH5J", "OutclicksBot/2 +https://www.outclicks.net/agent/p2i4sNUh7eylJF1S6SGgRs5mP40ExlYvsr9GBxVQG6h" ], "url": "https://www.outclicks.net" } , { "pattern": "seoscanners", "addition_date": "2018/05/27", "instances": [ "Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)" ], "url": "http://www.seoscanners.net/" } , { "pattern": "Hatena", "addition_date": "2018/05/29", "instances": [ "Hatena Antenna/0.3", "Hatena::Russia::Crawler/0.01" ] } , { "pattern": "Google Web Preview", "addition_date": "2018/05/31", "instances": [ "Mozilla/5.0 (Linux; U; Android 2.3.4; generic) AppleWebKit/537.36 (KHTML, like Gecko; Google Web Preview) Version/4.0 Mobile Safari/537.36" ] } , { "pattern": "MauiBot", "addition_date": "2018/06/06", "instances": [ "MauiBot (crawler.feedback+wc@gmail.com)" ] } , { "pattern": "AlphaBot", "addition_date": "2018/05/27", "instances": [ "Mozilla/5.0 (compatible; AlphaBot/3.2; +http://alphaseobot.com/bot.html)" ], "url": "http://alphaseobot.com/bot.html" } , { "pattern": "SBL-BOT", "addition_date": "2018/06/06", "instances": [ "SBL-BOT (http://sbl.net)" ], "url": "http://sbl.net", "description" : "Bot of SoftByte BlackWidow" } , { "pattern": "IAS crawler", "addition_date": "2018/06/06", "instances": [ "IAS crawler (ias_crawler; http://integralads.com/site-indexing-policy/)" ], "url": "http://integralads.com/site-indexing-policy/", "description" : "Bot of Integral Ad Science, Inc." } , { "pattern": "adscanner", "addition_date": "2018/06/24", "instances": [ "Mozilla/5.0 (compatible; adscanner/)" ] } , { "pattern": "Netvibes", "addition_date": "2018/06/24", "instances": [ "Netvibes (crawler/bot; http://www.netvibes.com" ], "url": "http://www.netvibes.com" } , { "pattern": "acapbot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible;acapbot/0.1;treat like Googlebot)", "Mozilla/5.0 (compatible;acapbot/0.1.;treat like Googlebot)" ] } , { "pattern": "Baidu-YunGuanCe", "addition_date": "2018/06/27", "instances": [ "Baidu-YunGuanCe-Bot(ce.baidu.com)", "Baidu-YunGuanCe-SLABot(ce.baidu.com)", "Baidu-YunGuanCe-ScanBot(ce.baidu.com)", "Baidu-YunGuanCe-PerfBot(ce.baidu.com)", "Baidu-YunGuanCe-VSBot(ce.baidu.com)" ], "url": "https://ce.baidu.com/topic/topic20150908", "description": "Baidu Cloud Watch" } , { "pattern": "bitlybot", "addition_date": "2018/06/27", "instances": [ "bitlybot/3.0 (+http://bit.ly/)", "bitlybot/2.0", "bitlybot" ], "url": "http://bit.ly/" } , { "pattern": "blogmuraBot", "addition_date": "2018/06/27", "instances": [ "blogmuraBot (+http://www.blogmura.com)" ], "url": "http://www.blogmura.com", "description": "A blog ranking site which links to blogs on just about every theme possible." } , { "pattern": "Bot.AraTurka.com", "addition_date": "2018/06/27", "instances": [ "Bot.AraTurka.com/0.0.1" ], "url": "http://www.araturka.com" } , { "pattern": "bot-pge.chlooe.com", "addition_date": "2018/06/27", "instances": [ "bot-pge.chlooe.com/1.0.0 (+http://www.chlooe.com/)" ] } , { "pattern": "BoxcarBot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; BoxcarBot/1.1; +awesome@boxcar.io)" ], "url": "https://boxcar.io/" } , { "pattern": "BTWebClient", "addition_date": "2018/06/27", "instances": [ "BTWebClient/180B(9704)" ], "url": "http://www.utorrent.com/", "description": "µTorrent BitTorrent Client" } , { "pattern": "ContextAd Bot", "addition_date": "2018/06/27", "instances": [ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0;.NET CLR 1.0.3705; ContextAd Bot 1.0)", "ContextAd Bot 1.0" ] } , { "pattern": "Digincore bot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; Digincore bot; https://www.digincore.com/crawler.html for rules and instructions.)" ], "url": "http://www.digincore.com/crawler.html" } , { "pattern": "Disqus", "addition_date": "2018/06/27", "instances": [ "Disqus/1.0" ], "url": "https://disqus.com/", "description": "validate and quality check pages." } , { "pattern": "Feedly", "addition_date": "2018/06/27", "instances": [ "Feedly/1.0 (+http://www.feedly.com/fetcher.html; like FeedFetcher-Google)", "FeedlyBot/1.0 (http://feedly.com)" ], "url": "https://www.feedly.com/fetcher.html", "description": "Feedly Fetcher is how Feedly grabs RSS or Atom feeds when users choose to add them to their Feedly or any of the other applications built on top of the feedly cloud." } , { "pattern": "Fetch\\/", "addition_date": "2018/06/27", "instances": [ "Fetch/2.0a (CMS Detection/Web/SEO analysis tool, see http://guess.scritch.org)" ] } , { "pattern": "Fever", "addition_date": "2018/06/27", "instances": [ "Fever/1.38 (Feed Parser; http://feedafever.com; Allow like Gecko)" ], "url": "http://feedafever.com" } , { "pattern": "Flamingo_SearchEngine", "addition_date": "2018/06/27", "instances": [ "Flamingo_SearchEngine (+http://www.flamingosearch.com/bot)" ] } , { "pattern": "FlipboardProxy", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; FlipboardProxy/1.1; +http://flipboard.com/browserproxy)", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6 (FlipboardProxy/1.1; +http://flipboard.com/browserproxy)", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:28.0) Gecko/20100101 Firefox/28.0 (FlipboardProxy/1.1; +http://flipboard.com/browserproxy)" ], "url": "https://about.flipboard.com/browserproxy/", "description": "a proxy service to fetch, validate, and prepare certain elements of websites for presentation through the Flipboard Application" } , { "pattern": "g2reader-bot", "addition_date": "2018/06/27", "instances": [ "g2reader-bot/1.0 (+http://www.g2reader.com/)" ], "url": "http://www.g2reader.com/" } , { "pattern": "imrbot", "addition_date": "2018/06/27", "instances": [ "Mozilla/5.0 (compatible; imrbot/1.10.8 +http://www.mignify.com)" ], "url": "http://www.mignify.com" } , { "pattern": "K7MLWCBot", "addition_date": "2018/06/27", "instances": [ "K7MLWCBot/1.0 (+http://www.k7computing.com)" ], "url": "http://www.k7computing.com", "description": "Virus scanner" } , { "pattern": "Kemvibot", "addition_date": "2018/06/27", "instances": [ "Kemvibot/1.0 (http://kemvi.com, marco@kemvi.com)" ], "url": "http://kemvi.com" } , { "pattern": "Landau-Media-Spider", "addition_date": "2018/06/27", "instances": [ "Landau-Media-Spider/1.0(http://bots.landaumedia.de/bot.html)" ], "url": "http://bots.landaumedia.de/bot.html" } , { "pattern": "linkapediabot", "addition_date": "2018/06/27", "instances": [ "linkapediabot (+http://www.linkapedia.com)" ], "url": "http://www.linkapedia.com" } , { "pattern": "vkShare", "addition_date": "2018/07/02", "instances": [ "Mozilla/5.0 (compatible; vkShare; +http://vk.com/dev/Share)" ], "url": "http://vk.com/dev/Share" } , { "pattern": "Siteimprove.com", "addition_date": "2018/06/22", "instances": [ "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) LinkCheck by Siteimprove.com", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0) Match by Siteimprove.com", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0) SiteCheck-sitecrawl by Siteimprove.com", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.0) LinkCheck by Siteimprove.com" ] } , { "pattern": "BLEXBot\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)" ], "url": "http://webmeup-crawler.com" } , { "pattern": "DareBoost", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36 DareBoost" ], "url": "https://www.dareboost.com/", "description": "Bot to test, Analyze and Optimize website" } , { "pattern": "ZuperlistBot\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; ZuperlistBot/1.0)" ] } , { "pattern": "Miniflux\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; Miniflux/2.0.7; +https://miniflux.net)" ], "url": "https://miniflux.net", "description": "Miniflux is a minimalist and opinionated feed reader." } , { "pattern": "Feedspotbot\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (compatible; Feedspotbot/1.0; +http://www.feedspot.com/fs/bot)" ], "url": "http://www.feedspot.com/fs/bot" } , { "pattern": "Diffbot\\/", "addition_date": "2018/07/07", "instances": [ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)" ], "url": "http://www.diffbot.com" } , { "pattern": "SEOkicks", "addition_date": "2018/08/22", "instances": [ "Mozilla/5.0 (compatible; SEOkicks; +https://www.seokicks.de/robot.html)" ], "url": "https://www.seokicks.de/robot.html" } , { "pattern": "tracemyfile", "addition_date": "2018/08/23", "instances": [ "Mozilla/5.0 (compatible; tracemyfile/1.0; +bot@tracemyfile.com)" ] } , { "pattern": "Nimbostratus-Bot", "addition_date": "2018/08/29", "instances": [ "Mozilla/5.0 (compatible; Nimbostratus-Bot/v1.3.2; http://cloudsystemnetworks.com)" ] } , { "pattern": "zgrab", "addition_date": "2018/08/30", "instances": [ "Mozilla/5.0 zgrab/0.x" ], "url": "https://zmap.io/" } , { "pattern": "PR-CY.RU", "addition_date": "2018/08/30", "instances": [ "Mozilla/5.0 (compatible; PR-CY.RU; + https://a.pr-cy.ru)" ], "url": "https://a.pr-cy.ru/" } , { "pattern": "AdsTxtCrawler", "addition_date": "2018/08/30", "instances": [ "AdsTxtCrawler/1.0" ] }, { "pattern": "Datafeedwatch", "addition_date": "2018/09/05", "instances": [ "Datafeedwatch/2.1.x" ], "url": "https://www.datafeedwatch.com/" } , { "pattern": "Zabbix", "addition_date": "2018/09/05", "instances": [ "Zabbix" ], "url": "https://www.zabbix.com/documentation/3.4/manual/web_monitoring" } , { "pattern": "TangibleeBot", "addition_date": "2018/09/05", "instances": [ "TangibleeBot/1.0.0.0 (http://tangiblee.com/bot)" ], "url": "http://tangiblee.com/bot" } , { "pattern": "google-xrawler", "addition_date": "2018/09/05", "instances": [ "google-xrawler" ], "url": "https://webmasters.stackexchange.com/questions/105560/what-is-the-google-xrawler-user-agent-used-for" } , { "pattern": "axios", "addition_date": "2018/09/06", "instances": [ "axios/0.18.0" ], "url": "https://github.com/axios/axios" } , { "pattern": "Amazon CloudFront", "addition_date": "2018/09/07", "instances": [ "Amazon CloudFront" ], "url": "https://aws.amazon.com/cloudfront/" } , { "pattern": "Pulsepoint", "addition_date": "2018/09/24", "instances": [ "Pulsepoint XT3 web scraper" ] } , { "pattern": "CloudFlare-AlwaysOnline", "addition_date": "2018/09/27", "instances": [ "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +http://www.cloudflare.com/always-online) AppleWebKit/534.34", "Mozilla/5.0 (compatible; CloudFlare-AlwaysOnline/1.0; +https://www.cloudflare.com/always-online) AppleWebKit/534.34" ], "url" : "https://www.cloudflare.com/always-online/" } ] biola-Voight-Kampff-a46158d/lib/000077500000000000000000000000001440337637500163605ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/lib/tasks/000077500000000000000000000000001440337637500175055ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/lib/tasks/voight_kampff.rake000066400000000000000000000011221440337637500231710ustar00rootroot00000000000000namespace :voight_kampff do desc 'Import a new crawler-user-agents.json file' task :import_user_agents, :url do |t, args| args.with_defaults url: 'https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json' require 'net/http' uri = URI(args[:url]) contents = Net::HTTP.get(uri) if contents.present? file = File.open('./config/crawler-user-agents.json', 'w') file.write(contents.force_encoding(Encoding::UTF_8)) else puts "voight_kampff:import_user_agents - empty file received from #{uri}" end end end biola-Voight-Kampff-a46158d/lib/voight_kampff.rb000066400000000000000000000010111440337637500215220ustar00rootroot00000000000000require 'json' require 'voight_kampff/test' require 'voight_kampff/methods' module VoightKampff class << self def root require 'pathname' Pathname.new File.expand_path '..', File.dirname(__FILE__) end def human?(user_agent_string) test(user_agent_string).human? end def bot?(user_agent_string) test(user_agent_string).bot? end alias :replicant? :bot? private def test(user_agent_string) VoightKampff::Test.new(user_agent_string) end end end biola-Voight-Kampff-a46158d/lib/voight_kampff/000077500000000000000000000000001440337637500212045ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/lib/voight_kampff/engine.rb000066400000000000000000000004261440337637500230000ustar00rootroot00000000000000module VoightKampff class Engine < Rails::Engine rake_tasks do load 'tasks/voight_kampff.rake' end initializer :add_voight_kampff_methods do |app| ActionDispatch::Request.class_eval do include VoightKampff::Methods end end end end biola-Voight-Kampff-a46158d/lib/voight_kampff/methods.rb000066400000000000000000000002721440337637500231750ustar00rootroot00000000000000module VoightKampff::Methods def human? VoightKampff::Test.new(user_agent).human? end def bot? VoightKampff::Test.new(user_agent).bot? end alias :replicant? :bot? end biola-Voight-Kampff-a46158d/lib/voight_kampff/rack.rb000066400000000000000000000000751440337637500224530ustar00rootroot00000000000000require 'voight_kampff' require 'voight_kampff/rack_request' biola-Voight-Kampff-a46158d/lib/voight_kampff/rack_request.rb000066400000000000000000000001761440337637500242250ustar00rootroot00000000000000# Reopen the Rack::Request class to add bot detection methods Rack::Request.class_eval do include VoightKampff::Methods end biola-Voight-Kampff-a46158d/lib/voight_kampff/rails.rb000066400000000000000000000001241440337637500226400ustar00rootroot00000000000000require 'voight_kampff' require 'voight_kampff/rack' require 'voight_kampff/engine' biola-Voight-Kampff-a46158d/lib/voight_kampff/test.rb000066400000000000000000000026131440337637500225120ustar00rootroot00000000000000module VoightKampff class Test CRAWLERS_FILENAME = 'crawler-user-agents.json' attr_accessor :user_agent_string def initialize(user_agent_string) @user_agent_string = user_agent_string end def agent @agent ||= matching_crawler || {} end def human? agent.empty? end def bot? !human? end alias :replicant? :bot? private def lookup_paths # These paths should be orderd by priority base_paths = [] base_paths << Rails.root if defined? Rails base_paths << VoightKampff.root base_paths.map { |p| p.join('config', CRAWLERS_FILENAME) } end def preferred_path lookup_paths.find { |path| File.exist? path } end def matching_crawler if match = crawler_regexp.match(@user_agent_string) index = match.names.first.sub(/match/, '').to_i crawlers[index] end end def crawler_regexp @@crawler_regexp ||= begin # NOTE: This is admittedly a bit convoluted but the performance gains make it worthwhile index = -1 crawler_patterns = crawlers.map{|c| index += 1; "(?#{c["pattern"]})" }.join("|") crawler_patterns = "(#{crawler_patterns})" Regexp.new(crawler_patterns, Regexp::IGNORECASE) end end def crawlers @@crawlers ||= JSON.load(File.open(preferred_path, 'r')) end end end biola-Voight-Kampff-a46158d/lib/voight_kampff/version.rb000066400000000000000000000001131440337637500232110ustar00rootroot00000000000000# frozen_string_literal: true module VoightKampff VERSION = '2.0.0' end biola-Voight-Kampff-a46158d/spec/000077500000000000000000000000001440337637500165445ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/controllers/000077500000000000000000000000001440337637500211125ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/controllers/replicants_controller_spec.rb000066400000000000000000000014571440337637500270670ustar00rootroot00000000000000require 'spec_helper' describe ReplicantsController, type: :controller do let(:user_agent_string) { '' } before do expect_any_instance_of(ActionController::TestRequest).to receive(:user_agent).and_return user_agent_string get :index end HUMANS.each do |name, ua_string| context "when user agent is #{name}" do let(:user_agent_string) { ua_string } it 'is forbidden' do expect(response.status).to eql 403 expect(response.body).to match(/No replicants here/) end end end REPLICANTS.each do |name, ua_string| context "when user agent is #{name}" do let(:user_agent_string) { ua_string } it 'is successful' do expect(response.status).to eql 200 expect(response.body).to match(/Rick Deckard/) end end end end biola-Voight-Kampff-a46158d/spec/internal/000077500000000000000000000000001440337637500203605ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/internal/app/000077500000000000000000000000001440337637500211405ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/internal/app/controllers/000077500000000000000000000000001440337637500235065ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/internal/app/controllers/replicants_controller.rb000066400000000000000000000004401440337637500304400ustar00rootroot00000000000000class ReplicantsController < ActionController::Base def index header = "Replicants:\n===========\n" status, content = if request.bot? [200, '- Rick Deckard'] else [403, 'No replicants here'] end render plain: header + content, status: status end end biola-Voight-Kampff-a46158d/spec/internal/config/000077500000000000000000000000001440337637500216255ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/internal/config/routes.rb000066400000000000000000000001511440337637500234700ustar00rootroot00000000000000Rails.application.routes.draw do resources :replicants, only: :index root to: 'replicants#index' end biola-Voight-Kampff-a46158d/spec/internal/log/000077500000000000000000000000001440337637500211415ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/internal/log/.gitignore000066400000000000000000000000051440337637500231240ustar00rootroot00000000000000*.logbiola-Voight-Kampff-a46158d/spec/internal/public/000077500000000000000000000000001440337637500216365ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/internal/public/favicon.ico000066400000000000000000000000001440337637500237450ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/lib/000077500000000000000000000000001440337637500173125ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/lib/voight_kampff/000077500000000000000000000000001440337637500221365ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/lib/voight_kampff/rack_request_spec.rb000066400000000000000000000015271440337637500261720ustar00rootroot00000000000000require 'spec_helper' describe Rack::Request do let(:user_agent_string) { } let(:env) { {'HTTP_USER_AGENT' => user_agent_string} } subject { Rack::Request.new(env) } it { expect(subject).to respond_to :human? } it { expect(subject).to respond_to :bot? } it { expect(subject).to respond_to :replicant? } HUMANS.each do |name, ua_string| context "when user agent is #{name}" do let(:user_agent_string) { ua_string } it 'is not a replicant' do expect(subject.human?).to eql true expect(subject.bot?).to eql false end end end REPLICANTS.each do |name, ua_string| context "when user agent is #{name}" do let(:user_agent_string) { ua_string } it 'is a replicant' do expect(subject.bot?).to eql true expect(subject.human?).to eql false end end end end biola-Voight-Kampff-a46158d/spec/lib/voight_kampff/test_spec.rb000066400000000000000000000016611440337637500244600ustar00rootroot00000000000000require 'spec_helper' describe VoightKampff::Test do let(:user_agent_string) { nil } subject { VoightKampff::Test.new(user_agent_string) } HUMANS.each do |name, ua_string| context "when user agent is #{name}" do let(:user_agent_string) { ua_string } it 'is not a replicant' do expect(subject.human?).to be true expect(subject.bot?).to be false end end end REPLICANTS.each do |name, ua_string| context "when user agent is #{name}" do let(:user_agent_string) { ua_string } it 'is a replicant' do expect(subject.bot?).to be true expect(subject.human?).to be false end end end context 'after the first run' do before { VoightKampff::Test.new('anything').bot? } it 'is fast' do expect( Benchmark.realtime do 20.times { VoightKampff::Test.new('anything').bot? } end ).to be < 0.005 end end end biola-Voight-Kampff-a46158d/spec/lib/voight_kampff_spec.rb000066400000000000000000000012671440337637500235030ustar00rootroot00000000000000require 'spec_helper' describe VoightKampff do subject { VoightKampff } HUMANS.each do |name, ua_string| context "when user agent is #{ua_string}" do let(:user_agent_string) { ua_string } it 'is not a replicant' do expect(subject.human?(user_agent_string)).to be true expect(subject.bot?(user_agent_string)).to be false end end end REPLICANTS.each do |name, ua_string| context "when user agent is #{ua_string}" do let(:user_agent_string) { ua_string } it 'is a replicant' do expect(subject.bot?(user_agent_string)).to be true expect(subject.human?(user_agent_string)).to be false end end end end biola-Voight-Kampff-a46158d/spec/spec_helper.rb000066400000000000000000000003731440337637500213650ustar00rootroot00000000000000require 'bundler/setup' require 'combustion' require 'voight_kampff' require 'voight_kampff/rails' Combustion.initialize! :action_controller require 'rspec/rails' Dir['./spec/support/**/*.rb'].each { |f| require f } RSpec.configure do |config| end biola-Voight-Kampff-a46158d/spec/support/000077500000000000000000000000001440337637500202605ustar00rootroot00000000000000biola-Voight-Kampff-a46158d/spec/support/humans.rb000066400000000000000000000015451440337637500221050ustar00rootroot00000000000000HUMANS = { 'Unknown' => nil, # for the moment we're treating a blank user agent string as not a bot 'Chrome' => 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36', 'Firefox' => 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1', 'Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A', 'Internet Explorer' => 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko', 'Chrome Mobile' => 'Mozilla/5.0 (Linux; Android 4.0.4; Galaxy Nexus Build/IMM76B) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.133 Mobile Safari/535.19', 'Safari for iOS' => 'Mozilla/5.0 (iPad; CPU OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53' } biola-Voight-Kampff-a46158d/spec/support/replicants.rb000066400000000000000000000004621440337637500227530ustar00rootroot00000000000000REPLICANTS = { 'Googlebot' => 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Bingbot' => 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 'Yahoo! Slurp' => 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)' } biola-Voight-Kampff-a46158d/voight_kampff.gemspec000066400000000000000000000017361440337637500220120ustar00rootroot00000000000000# -*- encoding: utf-8 -*- $:.unshift File.expand_path('../lib', __FILE__) require 'voight_kampff/version' Gem::Specification.new do |s| s.name = 'voight_kampff' s.summary = "Voight-Kampff bot detection" s.description = 'Voight-Kampff detects bots, spiders, crawlers and replicants' s.licenses = ['MIT'] s.author = "Adam Crownoble" s.email = "adam@codenoble.com" s.homepage = "https://github.com/biola/Voight-Kampff" # so that rubygems does not uses the actual object s.version = VoightKampff::VERSION.dup s.platform = Gem::Platform::RUBY.dup s.files = `git ls-files`.split("\n") s.files.reject! { |fn| fn.match(/\.travis.yml/) } s.test_files = `git ls-files -- {tests}/**/*`.split("\n") s.require_path = 'lib' s.add_dependency 'rack', ['>= 1.4'] s.add_development_dependency 'combustion', '~> 1.1' s.add_development_dependency 'rails', '>= 5.2' s.add_development_dependency 'rspec-rails', '~> 3.8' end