robots.txt details

89df016497349dd5504409990e693fb4

saved: | size: 5632kb | checksum: 89df016497349dd5504409990e693fb4

websites using this robots.txt

content


#
# robots.txt
#
# Please note: There are a lot of pages on this site, and there are
# some misbehaved spiders out there. If you're
# irresponsible, your access to the site may be blocked.
#

User-agent: ia_archiver
Disallow: /

User-Agent: *
Disallow: /wp-content/plugins/

User-agent: LinkChecker
Disallow: /

User-agent: psbot
Disallow: /

User-agent: WebReaper
Disallow: /

# xxqx proof 11/q4 http://www.scoutjet.com/ (maputo02)
User-agent: ScoutJet
Disallow: /

User-agent: wget
Disallow: /

User-agent: Eurobot
Disallow: /

User-agent: Gaisbot
Disallow: /



User-agent: WWW-Mechanize
Disallow: /

# xxqx proof http://www.majestic12.co.uk/projects/dsearch/mj12bot.php
User-agent: MJ12bot
Disallow: /

# http://www.suchen.de/popups/faq.jsp
User-agent: gonzo*
Disallow: /

User-agent: gonzo
Disallow: /

User-agent: SapphireWebCrawler
Disallow: /

# http://www.amfibi.com/cabot/
User-agent: Cabot
Disallow: /

# http://spider.acont.de/
User-agent: ACONTBOT
Disallow: /

# 11q4 offen http://turnitin.com/robot/crawlerinfo.html (war TurnitinBot)
User-agent: turnitinbot
Disallow: /

User-agent: CatchBot
Disallow: /

User-agent: WebRankSpider
Disallow: /

User-agent: yacy
Disallow: /

User-agent: yacybot
Disallow: /

User-agent: Mail.Ru
Disallow: /


# xxqx proof
User-agent: SurveyBot
Disallow: /

User-agent: SurveyBot_IgnoreIP
Disallow: /


User-agent: Yanga WorldSearch Bot
Disallow: /

#http://www.setooz.com/oozbot.html
User-agent: OOZBOT
Disallow: /

#http://www.botje.com/plukkie.htm
User-agent: plukkie
Disallow: /

User-agent: http://www.uni-koblenz.de/~flocke/robot-info.txt
Disallow: /

User-agent: Naver
Disallow: /

User-agent: NaverBot
Disallow: /

User-agent: Yeti
Disallow: /

User-agent: iisbot
Disallow: /

# 11q4 proof http://www.gigablast.com/spider.html
User-agent: Gigabot
Disallow: /

# http://www.mojeek.com/bot.html
User-agent: MojeekBot
Disallow: /

User-agent: citenikbot
Disallow: /

User-agent: Charlotte
Disallow: /

# 11q4 proof http://www.exabot.com/go/robot
User-agent: Exabot
Disallow: /

# 403 http://robot.vedens.de
User-agent: VEDENSBOT
Disallow: /

User-agent: Lexxebot
Disallow: /

User-agent: VoilaBot
Disallow: /



User-agent: Tagoobot
Disallow: /

# http://www.cityreview.org/crawler
User-agent: cityreview
Disallow: /

User-agent: Euripbot
Disallow: /

User-agent: Butterfly
Disallow: /

User-agent: isara-search
Disallow: /

User-agent: Jyxobot
Disallow: /

# xxqx proof http://www.80legs.com/spider.html (maputo02)
User-agent: 008
Disallow: /

# 403 specialists -------------------
User-agent: MLBot
Disallow: /

User-agent: libwww-perl
Disallow: /

User-agent: Nutch
Disallow: /

User-agent: nutch-agent
Disallow: /

User-agent: panscient.com
Disallow: /

User-agent: BotOnParade
Disallow: /



User-agent: jobs.de-Robot
Disallow: /

User-agent: Clewwa-Bot
Disallow: /


#http://www.search17.com/bot.php
User-agent: search17
Disallow: /

User-agent: spbot
Disallow: /

# 403 http://spinn3r.com/robot
User-agent: Spinn3r
Disallow: /

# http://www.entireweb.com/about/search_tech/speedy_spider/ Entireweb Robot
User-agent: Speedy
Disallow: /


User-agent: CatchBot
Disallow: /

User-agent: ccbot
Disallow: /

# http://www.search17.com/bot.php
User-agent: search17
Disallow: /

# http://www.envolk.com/envolkspiderinfo.html
User-agent: envolk
Disallow: /

# http://www.wise-guys.nl/webcrawler.php?item=crawlers
User-agent: Vagabondo
Disallow: /
User-agent: Bilbo
Disallow: /

# http://www.tineye.com/faq
User-agent: TinEye
Disallow: /
User-agent: "TinEye"
Disallow: /



User-agent: bixolabs
Disallow: /

User-agent: Baiduspider
Disallow: /

User-agent: infometrics-bot
Disallow: /

User-agent: ExDomain
Disallow: /

User-agent: Xenu
Disallow: /

User-agent: Peew
Disallow: /

# http://bixolabs.com/crawler/general/
User-agent: bixolabs
Disallow: /



User-agent: magpie-crawler
Disallow: /
User-agent: magpie-crawler/1.1
Disallow: /

# xxqx offen http://www.sitebot.org/robot/
User-agent: sitebot
Disallow: /

User-agent: ICCrawler - iCjobs
Disallow: /

User-agent: iCcrawler
User-agent: iCjobs
User-agent: iCjobs/3.2.3
Disallow: /

#
#
#
#


# 11q4 OFFEN http://ahrefs.com/robot/
user-agent: AhrefsBot
disallow: /

# 11q4 offen http://discoveryengine.com/discobot.html
User-agent: discobot
Disallow: /

# xxqx proof http://www.dotnetdotcom.org/
User-agent: dotbot
Disallow: /

# 11q4 OFFEN Mozilla/5.0+(compatible;+Ezooms/1.0;+ezooms.bot@gmail.com)
User-agent: Ezooms
Disallow: /

# 11q4 proof http://wortschatz.uni-leipzig.de/findlinks/
User-agent: findlinks
Disallow: /

# 11q4 offen http://www.flightdeckreports.com/pages/bot/ (maputo02
User-agent: FlightDeckReportsBot
Disallow: /

# 11q4 offen http://www.openwebspider.org/
User-agent: OpenWebSpider
Disallow: /

# 11q4 offen http://www.botje.com/plukkie.htm
User-agent: plukkie
Disallow: /

# 11q5 offen http://fulltext.sblog.cz/
User-agent: SeznamBot
Disallow: /

# xxqx OFFEN http://help.soso.com/webspider.htm (oder "Sosospider")
User-agent: Sosospider
Disallow: /


# 11q4 offen http://webintegration.at/
# User-agent: WI Job Roboter Spider Version 3
# Disallow: /
# erst so, dann so EMAIL
User-agent:webintegration
Disallow: /

# 11q4 offen http://rvs.informatik.uni-leipzig.de/bot.php
User-agent: webmeasurement-bot
Disallow: /

# 11q4 offen http://www.nerdbynature.net/bot lusaka01
User-agent: NerdByNature.Bot
Disallow: /

# 11q4 offen Email
User-agent: UnisterBot
Disallow: /

# 12q1 offen suggybot+v0.01a, http://blog.suggy.com/was-ist-suggy/suggy-webcrawler/) luanda
User-agent: suggybot
Disallow: /




User-agent: *
Disallow: *NOINDEX*
Disallow: /*.gif$
Disallow: /*.jpg$
Disallow: /*.jpeg$
Disallow: /*.png$