User-agent: facebookexternalhit Allow: / User-agent: FacebookExternalHit Allow: / User-agent: facebookcatalog Allow: / User-agent: * Disallow: / Crawl-delay: 10 User-agent: Googlebot Disallow: / User-agent: Bingbot Disallow: / User-agent: Slurp Disallow: / User-agent: DuckDuckBot Disallow: / User-agent: Baiduspider Disallow: / User-agent: YandexBot Disallow: / User-agent: Sogou Disallow: / User-agent: ia_archiver Disallow: / User-agent: facebot Allow: / User-agent: Applebot Disallow: / User-agent: Twitterbot Disallow: / User-agent: LinkedInBot Disallow: / # Explicitly block archive.org User-agent: archive.org_bot Disallow: / # Block automated tools User-agent: HTTrack Disallow: / User-agent: wget Disallow: / User-agent: curl Disallow: / # Notify that this site does not want to be scraped # Note: This is not a standard robots.txt directive but some crawlers respect it Noindex: /