RewriteEngine – Blocking Robots v3




Tagged Under : , , , ,

Version: 41.1s
Revision: 141 Build 46

RewriteEngine – Blocking Robots v3

Introduction:
this powerful ‘htaccess’ script was designed and tested for 2 months.
It was a successful project and it’s free for use! However, you must have “RewriteEngine” enabled for the process to work!

1.] Download notepad++ from the original author or from a mirror and install the software.
——————————-

http://sourceforge.net/projects/notepad-plus/

http://filehippo.com/download_notepad/

——————————-

2.] Copy this “ANSI – script” and save it as a “.htaccess” extension.

Notes: if you want to cancel any robots or harvester from being block, you’ll need to add the comment symbol “#” (without the quotes).

Advice: To activate the script, please unmark the comment symbol (#) from line two and three. That is all…

—Copy Source Code—

# -blockBots-
Options +FollowSymlinks
RewriteEngine On
RewriteBase /
RewriteCond %{HTTP_USER_AGENT} ^web.?(auto|bandit|collector|copier|devil|hook|mole|miner|mirror|reaper|sauger|sucker|site|snake|stripper|weasel) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Alphared [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^BlackWidow [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Bot\ mailto:craftbot@yahoo.com [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^SapphireWebCrawler\ Nuch [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^ChinaClaw [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Custo [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^AsiaNetBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^RealDownload [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^DISCo [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Download\ Demon [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^DIIbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^eCatch [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^EirGrabber [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailSiphon [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailWolf [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailCollector [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Express\ WebPictures [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^ExtractorPro [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^EyeNetIE [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^FlashGet [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^GetRight [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^GetWeb! [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Go!Zilla [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Go-Ahead-Got-It [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^GrabNet [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Grafula [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^PERL/? [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^HMView [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Indy.Library [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Image\ Stripper [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Image\ Sucker [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Indy\ Library [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^InterGET [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Internet\ Ninja [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^InternetSeer.com [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^JetCar [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^JOC\ Web\ Spider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^larbin [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^LeechFTP [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Mass\ Downloader [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^MIDown\ tool [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Mister\ PiX [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Navroad [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^NearSite [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^NetAnts [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^NetSpider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Net\ Vampire [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^NetZIP [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Octopus [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline\ Explorer [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline\ Navigator [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^PageGrabber [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Papa\ Foto [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^pavuk [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^RADIAN6.com [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^R6_ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^pcBrowser [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^RealDownload [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^ReGet [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^SiteSnagger [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^SmartDownload [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperHTTP [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Surfbot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^tAkeOut [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Teleport\ Pro [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^VoidEYE [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web\ Image\ Collector [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Web\ Sucker [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebAuto [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebCopier [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebFetch [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebGo\ IS [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebLeacher [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebReaper [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebSauger [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WIKIOFEED\ wikio.com [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^JAVA [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Java/1.4.1_ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^JAVA/1.6.0_ [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^JAVA/1\.* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Website\ eXtractor [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Website\ Quester [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebStripper [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebWhacker [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebZIP [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Wget [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WhosTalking [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Widow [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WebEnhancer [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^WWWOFFLE [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Xaldon\ WebSpider [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Zeus [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^vayala [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^EATRSSFEEDHUNGRYSPIDER\* [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^TurnitinBot [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^psbot [NC]
RewriteRule ^.* - [F,L]

—End Source Code—

3.] Remove the old file from your server and upload your new version!

4.] Done!

Notes: if you want to download the files, you can use this link instead: http://www.2shared.com/file/5082668/bfa5c0cc/Blocking_Robots.html

Copyrighted by Lair360




Blocking unwanted robots or bad robots




Tagged Under : , ,

Version: 36.2
Revision: 36 Build 45

How to block unwanted robots or bad robots

Introduction: this tutorial will help you to stop unwanted robots that could tamper your website’s bandwidth usage, hotlinked your images and spam your website with force.
You can stop this problem if you have ‘htaccess’ configured or enabled. However, this will only work on servers that have Linux OS…

1.] Download notepad++ from the original author or from a mirror and install the software.
——————————-

http://sourceforge.net/projects/notepad-plus/

http://filehippo.com/download_notepad/

——————————-

Tutorial *1: before you start to block bad robots, you must know these little symbols and rules.
This is crucial if you want to block them without a mistake…

Codes and symbols…
——————————-
NC – means to ignore upper and lower case.
Notes: This code is sometimes placed after an entry to scan it with or without concern to upper or lower case.

OR – means “Or Next”, as in, match this domain or the next line that follows.
Notes: This code is placed after each of the multiple entries, except the last!
——————————-
^ – identifies the beginning of the user agent string.
$ – identifies the end of the user agent string.
\ – that is a slash with a space afterwards tells the parser to include the space between words.
? – this is a question mark that tells the engine if it has any other character included.

Example: If I have a “.jpeg” extension, I’ll put “.jp?g” as an alternative.
This will notify the engine to include jpeg and jpg to the blocking list.

——————————-

Tutorial *2: This is a small list of robots for blocking. However, they might be invalid or useless…

—Copy Source Code—

RewriteEngine On
RewriteCond %{HTTP_USER_AGENT} ^BlackWidow [OR]
RewriteCond %{HTTP_USER_AGENT} ^WWWOFFLE [OR]
RewriteCond %{HTTP_USER_AGENT} ^Xaldon\ WebSpider [OR]
RewriteCond %{HTTP_USER_AGENT} ^Zeus
RewriteRule ^.* - [F,L]

---End Source Code---

Important: the last line of “RewriteCond” shouldn’t have an [OR] after the robot’s name.

Here is a version (after the example) which I had created for you to block. This will reduce your time and research to block bad robots that is eating up your bandwidth or spamming your website. But, if you want to unmark one of the robots you can use # with a space at the beginning of “RewriteCond” to unmark a robot or multiple robots from the block list.

Here is an example…
-----------------------------

RewriteEngine On
# RewriteCond %{HTTP_USER_AGENT} ^BlackWidow [OR]
RewriteCond %{HTTP_USER_AGENT} ^WWWOFFLE [OR]
# RewriteCond %{HTTP_USER_AGENT} ^Xaldon\ WebSpider [OR]
# RewriteCond %{HTTP_USER_AGENT} ^Zeus
RewriteRule ^.* - [F,L]

-----------------------------

---Copy Source Code---

# User_Agent Version 2.35 [Final]
# Revision: 65 Build 125
# Author: Lair360

RewriteEngine On
RewriteCond %{HTTP_USER_AGENT} ^Alphared [OR]
RewriteCond %{HTTP_USER_AGENT} ^BlackWidow [OR]
RewriteCond %{HTTP_USER_AGENT} ^Bot\ mailto:craftbot@yahoo.com [OR]
RewriteCond %{HTTP_USER_AGENT} ^ChinaClaw [OR]
RewriteCond %{HTTP_USER_AGENT} ^Custo [OR]
RewriteCond %{HTTP_USER_AGENT} ^DISCo [OR]
RewriteCond %{HTTP_USER_AGENT} ^Download\ Demon [OR]
RewriteCond %{HTTP_USER_AGENT} ^DIIbot [OR]
RewriteCond %{HTTP_USER_AGENT} ^eCatch [OR]
RewriteCond %{HTTP_USER_AGENT} ^EirGrabber [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailSiphon [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailWolf [OR]
RewriteCond %{HTTP_USER_AGENT} ^EmailCollector [OR]
RewriteCond %{HTTP_USER_AGENT} ^Express\ WebPictures [OR]
RewriteCond %{HTTP_USER_AGENT} ^ExtractorPro [OR]
RewriteCond %{HTTP_USER_AGENT} ^EyeNetIE [OR]
RewriteCond %{HTTP_USER_AGENT} ^FlashGet [OR]
RewriteCond %{HTTP_USER_AGENT} ^GetRight [OR]
RewriteCond %{HTTP_USER_AGENT} ^GetWeb! [OR]
RewriteCond %{HTTP_USER_AGENT} ^Go!Zilla [OR]
RewriteCond %{HTTP_USER_AGENT} ^Go-Ahead-Got-It [OR]
RewriteCond %{HTTP_USER_AGENT} ^GrabNet [OR]
RewriteCond %{HTTP_USER_AGENT} ^Grafula [OR]
RewriteCond %{HTTP_USER_AGENT} ^Gecko [OR]
RewriteCond %{HTTP_USER_AGENT} ^HMView [OR]
RewriteCond %{HTTP_USER_AGENT} ^Indy.Library [OR]
RewriteCond %{HTTP_USER_AGENT} ^Image\ Stripper [OR]
RewriteCond %{HTTP_USER_AGENT} ^Image\ Sucker [OR]
RewriteCond %{HTTP_USER_AGENT} ^Indy\ Library [OR]
RewriteCond %{HTTP_USER_AGENT} ^InterGET [OR]
RewriteCond %{HTTP_USER_AGENT} ^Internet\ Ninja [OR]
RewriteCond %{HTTP_USER_AGENT} ^InternetSeer.com [OR]
RewriteCond %{HTTP_USER_AGENT} ^JetCar [OR]
RewriteCond %{HTTP_USER_AGENT} ^JOC\ Web\ Spider [OR]
RewriteCond %{HTTP_USER_AGENT} ^larbin [OR]
RewriteCond %{HTTP_USER_AGENT} ^LeechFTP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mass\ Downloader [OR]
RewriteCond %{HTTP_USER_AGENT} ^MIDown\ tool [OR]
RewriteCond %{HTTP_USER_AGENT} ^Mister\ PiX [OR]
RewriteCond %{HTTP_USER_AGENT} ^Navroad [OR]
RewriteCond %{HTTP_USER_AGENT} ^NearSite [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetAnts [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetSpider [OR]
RewriteCond %{HTTP_USER_AGENT} ^Net\ Vampire [OR]
RewriteCond %{HTTP_USER_AGENT} ^NetZIP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Octopus [OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline\ Explorer [OR]
RewriteCond %{HTTP_USER_AGENT} ^Offline\ Navigator [OR]
RewriteCond %{HTTP_USER_AGENT} ^SearchExpress [OR]
RewriteCond %{HTTP_USER_AGENT} ^PageGrabber [OR]
RewriteCond %{HTTP_USER_AGENT} ^Papa\ Foto [OR]
RewriteCond %{HTTP_USER_AGENT} ^pavuk [OR]
RewriteCond %{HTTP_USER_AGENT} ^pcBrowser [OR]
RewriteCond %{HTTP_USER_AGENT} ^RealDownload [OR]
RewriteCond %{HTTP_USER_AGENT} ^ReGet [OR]
RewriteCond %{HTTP_USER_AGENT} ^SiteSnagger [OR]
RewriteCond %{HTTP_USER_AGENT} ^(site.)?quester [OR]
RewriteCond %{HTTP_USER_AGENT} ^SmartDownload [OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperBot [OR]
RewriteCond %{HTTP_USER_AGENT} ^SuperHTTP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Surfbot [OR]
RewriteCond %{HTTP_USER_AGENT} ^tAkeOut [OR]
RewriteCond %{HTTP_USER_AGENT} ^Teleport\ Pro [OR]
RewriteCond %{HTTP_USER_AGENT} ^VoidEYE [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web\ Sucker [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebAuto [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebCopier [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebFetch [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebGo\ IS [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebLeacher [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebReaper [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebSauger [OR]
RewriteCond %{HTTP_USER_AGENT} ^Website\ eXtractor [OR]
RewriteCond %{HTTP_USER_AGENT} ^Website\ Quester [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebStripper [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebWhacker [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebZIP [OR]
RewriteCond %{HTTP_USER_AGENT} ^Wget [OR]
RewriteCond %{HTTP_USER_AGENT} ^WhosTalking [OR]
RewriteCond %{HTTP_USER_AGENT} ^Widow [OR]
RewriteCond %{HTTP_USER_AGENT} ^WebEnhancer [OR]
RewriteCond %{HTTP_USER_AGENT} ^WWWOFFLE [OR]
RewriteCond %{HTTP_USER_AGENT} ^Xaldon\ WebSpider [OR]
RewriteCond %{HTTP_USER_AGENT} ^Zeus [OR]
RewriteCond %{HTTP_USER_AGENT} ^Web\ Image\ Collector
RewriteRule ^.* - [F,L]

---End Source Code---

Notes: If you’re using this “htaccess” source code, you’ll need to wait for about 2 days or 24 hrs for the codes to take effect.
If you try to check the “htaccess” blocking system, you’ll not see any changes. It will take time for the system to block these nasty robots!