# Robots.txt for PDFtoPicture.cc
# https://pdftopicture.cc/robots.txt
# Last updated: 2024-12-19

# Allow all web crawlers to access all content
User-agent: *
Allow: /
Crawl-delay: 1

# Sitemap locations
Sitemap: https://pdftopicture.cc/sitemap.xml

# Specific rules for major search engines
User-agent: Googlebot
Allow: /
Crawl-delay: 0.5

User-agent: Bingbot
Allow: /
Crawl-delay: 1

User-agent: Slurp
Allow: /
Crawl-delay: 1

User-agent: DuckDuckBot
Allow: /
Crawl-delay: 1

User-agent: Baiduspider
Allow: /
Crawl-delay: 2

User-agent: YandexBot
Allow: /
Crawl-delay: 1

# Block access to development and system files
Disallow: /.git/
Disallow: /node_modules/
Disallow: /src/
Disallow: /.env
Disallow: /package.json
Disallow: /package-lock.json
Disallow: /yarn.lock
Disallow: /pnpm-lock.yaml
Disallow: /tsconfig.json
Disallow: /vite.config.ts
Disallow: /tailwind.config.js
Disallow: /postcss.config.js
Disallow: /eslint.config.js
Disallow: /.vercel/
Disallow: /.trae/

# Allow access to important files and directories
Allow: /images/
Allow: /favicon.ico
Allow: /favicon.svg
Allow: /ads.txt
Allow: /sitemap.xml
Allow: /robots.txt

# Block common bot patterns that might waste resources
User-agent: AhrefsBot
Disallow: /

User-agent: MJ12bot
Disallow: /

User-agent: DotBot
Disallow: /

User-agent: SemrushBot
Disallow: /

# Host directive for canonical domain
Host: https://pdftopicture.cc