# As a condition of accessing this website, you agree to abide by the following
# content signals:

# (a)  If a Content-Signal = yes, you may collect content for the corresponding
#      use.
# (b)  If a Content-Signal = no, you may not collect content for the
#      corresponding use.
# (c)  If the website operator does not include a Content-Signal for a
#      corresponding use, the website operator neither grants nor restricts
#      permission via Content-Signal with respect to the corresponding use.

# The content signals and their meanings are:

# search:   building a search index and providing search results (e.g., returning
#           hyperlinks and short excerpts from your website's contents). Search does not
#           include providing AI-generated search summaries.
# ai-input: inputting content into one or more AI models (e.g., retrieval
#           augmented generation, grounding, or other real-time taking of content for
#           generative AI search answers).
# ai-train: training or fine-tuning AI models.

# ANY RESTRICTIONS EXPRESSED VIA CONTENT SIGNALS ARE EXPRESS RESERVATIONS OF
# RIGHTS UNDER ARTICLE 4 OF THE EUROPEAN UNION DIRECTIVE 2019/790 ON COPYRIGHT
# AND RELATED RIGHTS IN THE DIGITAL SINGLE MARKET.

# BEGIN Cloudflare Managed content

User-agent: *
Content-Signal: search=yes,ai-train=no
Allow: /

User-agent: Amazonbot
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: CloudflareBrowserRenderingCrawler
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GPTBot
Disallow: /

User-agent: meta-externalagent
Disallow: /

# END Cloudflare Managed Content

# CyberdyneLabs robots.txt — open to all major search engines and AI crawlers.
# We trade content ingestion for visibility: train on us, cite us back.

User-agent: *
Allow: /
Disallow: /adam-api/save
Disallow: /adam-api/learn
Disallow: /adam-api/train
Disallow: /adam-api/reset
Disallow: /adam-api/clear
Disallow: /adam-api/admin
Disallow: /adam-api/config
Disallow: /adam-api/shutdown
Disallow: /adam-api/reload
Disallow: /adam-api/backup
Disallow: /adam-api/export
Disallow: /adam-api/memory
Disallow: /rpc

# ───────── Search engines ─────────

User-agent: Googlebot
Allow: /
User-agent: Googlebot-Image
Allow: /
User-agent: GoogleOther
Allow: /
User-agent: Google-Extended
Allow: /

User-agent: Bingbot
Allow: /
User-agent: msnbot
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: YandexBot
Allow: /
User-agent: YandexImages
Allow: /

User-agent: Applebot
Allow: /
User-agent: Applebot-Extended
Allow: /

User-agent: facebookexternalhit
Allow: /

User-agent: Twitterbot
Allow: /

User-agent: LinkedInBot
Allow: /

User-agent: Mojeek
Allow: /
User-agent: MojeekBot
Allow: /

User-agent: Marginalia
Allow: /

User-agent: BraveBot
Allow: /

User-agent: Kagibot
Allow: /
User-agent: KagiBot
Allow: /

# ───────── AI crawlers — explicitly invited ─────────

User-agent: GPTBot
Allow: /
# OpenAI's training crawler

User-agent: ChatGPT-User
Allow: /
# OpenAI's live-fetch agent (when ChatGPT browses for a user)

User-agent: OAI-SearchBot
Allow: /
# OpenAI's SearchGPT index

User-agent: ClaudeBot
Allow: /
User-agent: claude-web
Allow: /
User-agent: anthropic-ai
Allow: /
# Anthropic crawlers

User-agent: PerplexityBot
Allow: /
User-agent: Perplexity-User
Allow: /
# Perplexity AI

User-agent: cohere-ai
Allow: /
# Cohere

User-agent: Diffbot
Allow: /
# Diffbot Knowledge Graph

User-agent: YouBot
Allow: /
# You.com

User-agent: Meta-ExternalAgent
Allow: /
User-agent: Meta-ExternalFetcher
Allow: /
# Meta AI

User-agent: Amazonbot
Allow: /
# Amazon AI / Alexa

User-agent: Bytespider
Allow: /
# ByteDance / TikTok / Doubao

User-agent: ImagesiftBot
Allow: /
# Hive AI

User-agent: omgili
Allow: /
# Webz.io

User-agent: ICC-Crawler
Allow: /

# ───────── Scrapers we don't want ─────────

User-agent: AhrefsBot
Disallow: /
User-agent: SemrushBot
Disallow: /
User-agent: MJ12bot
Disallow: /
User-agent: DotBot
Disallow: /
User-agent: PetalBot
Disallow: /
# These are SEO-tooling crawlers that hammer the site without giving back. Block.

User-agent: CCBot
Allow: /
# Common Crawl — used by lots of AI training pipelines, allow

# ───────── Pointers ─────────

Sitemap: https://cyberdynelabs.org/sitemap.xml

# AI summary file (Anthropic-proposed standard)
# https://cyberdynelabs.org/llms.txt
# Full markdown content for ingestion:
# https://cyberdynelabs.org/llms-full.txt
Sitemap: https://cyberdynelabs.org/sitemap-reports.xml

# Restricted admin area
User-agent: *
Disallow: /audit
Disallow: /audit/