The Sitemap directive in robots.txt tells crawlers where to find your XML sitemap. Common bugs: relative URL instead of absolute, missing protocol, pointing at an HTML sitemap page instead of XML, or no declaration at all. Search engines can sometimes find sitemaps without help, but declaring them in robots.txt AND submitting via Search Console is best practice. This guide covers the correct format, multi-sitemap patterns, and submission workflow.
curl -I https://example.com/sitemap.xml # Expected: # HTTP/2 200 # Content-Type: application/xml or text/xml
curl -s https://example.com/sitemap.xml | xmllint --noout - # No output = valid XML # Errors listed if malformed
# Correct: absolute URL with protocol User-agent: * Disallow: /admin/ Sitemap: https://example.com/sitemap.xml
# One Sitemap line per file Sitemap: https://example.com/sitemap-pages.xml Sitemap: https://example.com/sitemap-posts.xml Sitemap: https://example.com/sitemap-products.xml # Better: single sitemap index Sitemap: https://example.com/sitemap-index.xml # The index file then references all child sitemaps
# BAD: silently invalid Sitemap: /sitemap.xml # RIGHT: absolute URL Sitemap: https://example.com/sitemap.xml
# BAD Sitemap: example.com/sitemap.xml # RIGHT Sitemap: https://example.com/sitemap.xml
# Must match the actual canonical host # Site uses https://www.example.com canonical: # BAD Sitemap: http://example.com/sitemap.xml # RIGHT Sitemap: https://www.example.com/sitemap.xml
# BAD: HTML sitemap page, not XML Sitemap: https://example.com/sitemap.html # RIGHT: machine-readable XML Sitemap: https://example.com/sitemap.xml
# BAD: sitemap.xml is itself blocked User-agent: * Disallow: /sitemap Sitemap: https://example.com/sitemap.xml # Some crawlers may refuse to fetch # RIGHT: ensure sitemap path is allowed (default is allowed)
For sites with multiple sitemaps, use an index file:
<!-- /sitemap.xml (the index) -->
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap>
<loc>https://example.com/sitemap-pages.xml</loc>
<lastmod>2024-01-15</lastmod>
</sitemap>
<sitemap>
<loc>https://example.com/sitemap-posts-2024.xml</loc>
<lastmod>2024-01-20</lastmod>
</sitemap>
<sitemap>
<loc>https://example.com/sitemap-products.xml</loc>
<lastmod>2024-01-20</lastmod>
</sitemap>
</sitemapindex>
# robots.txt only references the index Sitemap: https://example.com/sitemap.xml
sitemap.xml)For sites with frequent updates, IndexNow lets you notify search engines instantly when URLs change. Bing, Yandex, Seznam support it; Google does not (Google has its own Indexing API for select use cases).
# POST to IndexNow endpoint
POST https://api.indexnow.org/indexnow
Content-Type: application/json
{
"host": "example.com",
"key": "your-key-here",
"urlList": [
"https://example.com/new-page",
"https://example.com/updated-page"
]
}
# Place key verification file at:
# https://example.com/your-key-here.txt
# containing the key value as plain text
curl -s https://example.com/robots.txt | grep -i Sitemap # Expected output: # Sitemap: https://example.com/sitemap.xml
# Yoast generates: /sitemap_index.xml # Rank Math generates: /sitemap_index.xml # Add to robots.txt manually if plugin doesn't: Sitemap: https://example.com/sitemap_index.xml
// app/robots.ts (App Router)
export default function robots() {
return {
rules: { userAgent: '*', allow: '/' },
sitemap: 'https://example.com/sitemap.xml',
};
}
// astro.config.mjs
import sitemap from '@astrojs/sitemap';
export default defineConfig({
site: 'https://example.com',
integrations: [sitemap()],
});
// Generates /sitemap-index.xml and /sitemap-0.xml