robots.txt details

saved: size: 3415 kb md5 checksum: cb22b5891fd67b5a691fcdf089bb0ff7

websites using this robots.txt

content


#
# robots.txt
#
# This file is to prevent the crawling and indexing of certain parts
# of your site by web crawlers and spiders run by sites like Yahoo!
# and Google. By telling these "robots" where not to go on your site,
# you save bandwidth and server resources.
#
# This file will be ignored unless it is at the root of your host:
# Used:    http://example.com/robots.txt
# Ignored: http://example.com/site/robots.txt
#
# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/wc/robots.html
#
# For syntax checking, see:
# http://www.sxw.org.uk/computing/robots/check.html


#Agent Specific Allowed Sections
User-agent: Mediapartners-Google
User-agent: Googlebot
User-agent: Googlebot-Image
User-agent: Googlebot-Mobile
User-agent: Googlebot-News
User-agent: Googlebot-Video
User-agent: Adsbot-Google
User-Agent: Googlebot_Nauxeo
User-agent: msnbot
User-agent: Slurp
User-agent: ia_archiver
User-agent: bingbot
User-agent: archive.org_bot
User-agent: Twitterbot
User-agent: Applebot
User-agent: Bingbot
User-agent: EchoboxBot
User-agent: Facebot
User-agent: SemrushBot
User-agent: Screaming Frog SEO Spider
User-agent: grapeshot
User-agent: ias_crawler
User-agent: Publication-Access-for-Facebook
User-agent: proximic
User-agent: AdsBot-Google-Mobile
User-agent: AdsBot-Google-Mobile-Apps
User-agent: FeedFetcher-Google
User-agent: Google-Read-Aloud
User-agent: GoogleAssociationService
User-agent: Yahoo! Slurp
User-agent: Qwantify
User-agent: DuckDuckBot
User-agent: TelegramBot
User-agent: LinkedInBot
User-agent: facebookexternalhit
User-agent: Pinterest
User-agent: Cliqzbot
User-agent: 360Spider
User-agent: Flipboard

#Sitemaps
Sitemap: https://www.lavoixdunord.fr/sites/default/files/sitemaps/www_lavoixdunord_fr/sitemapnews-0.xml
Sitemap: https://www.lavoixdunord.fr/sites/default/files/sitemaps/www_lavoixdunord_fr/sitemapindex.xml

Allow: /.well-known/
Allow: /apple-app-site-association
Allow: /dpi247CMS/modules/dpi/dpistat/updater.php
Allow: /misc/*.js
Allow: /misc/*.css
Allow: /modules/*.js
Allow: /modules/*.css
Allow: /profiles/*.js
Allow: /profiles/*.css
Allow: /themes/*.js
Allow: /themes/*.css
Disallow: /scripts/
Disallow: /themes/
Disallow: /simplesaml
Disallow: /simplsamlphp_auth/
Disallow: /wallyextra/contenttypesajax
Disallow: /esi/
Disallow: /includes/
Disallow: /profiles/
Disallow: /misc/
Disallow: /modules/
Disallow: /journal/VDN/
Disallow: /CHANGELOG.txt
Disallow: /cron.php
Disallow: /INSTALL.mysql.txt
Disallow: /INSTALL.pgsql.txt
Disallow: /install.php
Disallow: /INSTALL.txt
Disallow: /LICENSE.txt
Disallow: /MAINTAINERS.txt
Disallow: /update.php
Disallow: /UPGRADE.txt
Disallow: /xmlrpc.php
Disallow: /admin
Disallow: /admin/
Disallow: /comment/reply/
Disallow: /logout
Disallow: /search/
Disallow: /user/register
Disallow: /user/password
Disallow: /user/login
Disallow: /cgi-bin/
Disallow: /bears
Disallow: /feed/19?format=choixredac
Disallow: /region/weo
Disallow: /sites/all/modules/custom/vdn/vdnfreemium/scripts/
Disallow: /economie/journal-eco
Disallow: /region/test-publi
Disallow: /archives/recherche
Disallow: /*/*/*/types
Disallow: /*/*/*/modules
Disallow: /*/*/*/pixelDisplay
Disallow: /*/*/*/productDisplay
Disallow: /*/*/*/util
Disallow: /*/*/*/sets
Disallow: /*/*/*/lavdn.lavoixdunord.fr
Disallow: /*/*/*/positions
Disallow: /archive/video
Disallow: /resultats-examens

User-agent: *
Disallow: /

User-agent: * 
Crawl-Delay: 10