# ================================================ # LUXOR Fine Dining Restaurant & Luxury Hotel # Robots.txt Configuration # Last Updated: January 15, 2024 # ================================================ # ================================================ # SECTION 1: GENERAL RULES FOR ALL BOTS # ================================================ User-agent: * Allow: / Allow: *.css$ Allow: *.js$ Allow: *.jpg$ Allow: *.jpeg$ Allow: *.png$ Allow: *.gif$ Allow: *.svg$ Allow: *.webp$ # Disallow admin and private directories Disallow: /admin/ Disallow: /private/ Disallow: /backup/ Disallow: /temp/ Disallow: /cache/ Disallow: /logs/ Disallow: /config/ Disallow: /uploads/ Disallow: /staging/ Disallow: /development/ Disallow: /test/ Disallow: /tmp/ # Disallow specific file types that shouldn't be indexed Disallow: *.pdf$ Disallow: *.doc$ Disallow: *.docx$ Disallow: *.xls$ Disallow: *.xlsx$ Disallow: *.zip$ Disallow: *.rar$ Disallow: *.exe$ # Disallow search result pages Disallow: /*?*search* Disallow: /*?*filter* Disallow: /*?*sort* Disallow: /search/ Disallow: /results/ # Disallow query strings with tracking parameters Disallow: /*?utm_* Disallow: /*?fbclid=* Disallow: /*?gclid=* # Disallow print versions Disallow: /print/ Disallow: /*?print=* # Disallow dynamic pages Disallow: /cgi-bin/ Disallow: /includes/ Disallow: /lib/ Disallow: /vendor/ # Crawl delay (wait time between requests in seconds) Crawl-delay: 1 # Request rate (requests per second) Request-rate: 1/1s # ================================================ # SECTION 2: GOOGLE-SPECIFIC RULES # ================================================ User-agent: Googlebot Allow: / Allow: /admin/private-pages.html Crawl-delay: 0.5 # Google Image Bot User-agent: Googlebot-Image Allow: *.jpg$ Allow: *.jpeg$ Allow: *.png$ Allow: *.gif$ Allow: *.webp$ # Google Video Bot User-agent: Googlebot-Video Allow: *.mp4$ Allow: *.webm$ Allow: *.avi$ # ================================================ # SECTION 3: BING-SPECIFIC RULES # ================================================ User-agent: Bingbot Allow: / Crawl-delay: 1 # ================================================ # SECTION 4: BLOCK BAD/AGGRESSIVE BOTS # ================================================ # AhrefsBot - SEO crawler User-agent: AhrefsBot User-agent: AhrefsBot-Site User-agent: AhrefsBot/Nutch Disallow: / # SemrushBot - SEO crawler User-agent: SemrushBot User-agent: SemrushBot-SA Disallow: / # MJ12bot - Majestic bot User-agent: MJ12bot Disallow: / # BLEXBot - Aggressive crawler User-agent: BLEXBot Disallow: / # DotBot - Moz crawler User-agent: dotbot Disallow: / # Grapeshot - Content monitoring User-agent: GrapeshotCrawler Disallow: / # MetaUri Scanner User-agent: MetaUri Disallow: / # Xenu Link Sleuth User-agent: Xenu Link Sleuth Disallow: / # Linkdex crawler User-agent: linkdexbot Disallow: / # Sistrix crawler User-agent: SISTRIX Disallow: / # Ezooms crawler User-agent: ezooms Disallow: / # Speedy Spider User-agent: Speedy Spider Disallow: / # Turbine User-agent: Turbine Disallow: / # ================================================ # SECTION 5: SOCIAL MEDIA BOTS (Allow) # ================================================ # Facebook User-agent: facebookexternalhit Allow: / # Twitter User-agent: Twitterbot Allow: / # LinkedIn User-agent: LinkedInBot Allow: / # Instagram User-agent: Instagram Allow: / # WhatsApp User-agent: WhatsApp Allow: / # Telegram User-agent: TelegramBot Allow: / # Pinterest User-agent: PinterestBot Allow: / # ================================================ # SECTION 6: SEARCH ENGINE CRAWLERS (Allow) # ================================================ # Yahoo User-agent: Yahoo! Slurp Allow: / # DuckDuckGo User-agent: DuckDuckBot Allow: / # Yandex User-agent: YandexBot Allow: / # Baidu User-agent: Baiduspider Allow: / # Sogou User-agent: Sogou web spider Allow: / # Qihoo User-agent: Qihoobot Allow: / # ================================================ # SECTION 7: ANALYTICS BOTS (Allow) # ================================================ User-agent: AdsBot-Google Allow: / User-agent: AdsBot-Google-Mobile Allow: / User-agent: Mediapartners-Google Allow: / User-agent: Feedfetcher-Google Allow: / # ================================================ # SECTION 8: SITEMAPS & FEEDS # ================================================ # Primary Sitemap Sitemap: https://luxor-restaurant.com/sitemap.xml # Additional Sitemaps (if you have multiple) Sitemap: https://luxor-restaurant.com/sitemap-news.xml Sitemap: https://luxor-restaurant.com/sitemap-images.xml Sitemap: https://luxor-restaurant.com/sitemap-videos.xml Sitemap: https://luxor-restaurant.com/sitemap-mobile.xml # RSS Feeds Sitemap: https://luxor-restaurant.com/feed.xml Sitemap: https://luxor-restaurant.com/blog-feed.xml # ================================================ # SECTION 9: ALLOW CRAWLING OF CDN & RESOURCES # ================================================ # Allow CDN resources User-agent: * Allow: *.css$ Allow: *.js$ Allow: *.woff$ Allow: *.woff2$ Allow: *.ttf$ Allow: *.eot$ Allow: *.svg$ # Allow compressed files Allow: *.gzip$ # ================================================ # SECTION 10: COMMENTS & DOCUMENTATION # ================================================ # This robots.txt file is designed to: # 1. Allow legitimate search engine bots to crawl content # 2. Block aggressive/bad bots that abuse resources # 3. Protect sensitive directories and files # 4. Optimize crawl budget for important pages # 5. Allow social media and analytics bots # # Rules are processed from top to bottom. # The first matching rule applies. # More specific rules should come before general ones. # # For more information about robots.txt: # https://www.robotstxt.org/ # https://developers.google.com/search/docs/advanced/robots/intro # ================================================ # SECTION 11: FREQUENTLY USED PATTERNS # ================================================ # Pattern: Disallow all files of a type # Disallow: *.pdf$ # Pattern: Disallow all parameters in URL # Disallow: /*?* # Pattern: Allow only specific files # Allow: *.pdf$ # Disallow: *.pdf # Pattern: Case insensitivity # All rules are case-insensitive by default # ================================================ # END OF ROBOTS.TXT # ================================================