Sitemap: https://ageinghacker.net/sitemap.xml.gz # Apparently I could add my RSS and Atom feeds as "Sitemap:" here, but my # feeds contain a subset of what is already in sitemap.xml User-agent: * # This should be enough to avoid indexing mailing list archives. Disallow: /cgi-bin/ # Literary works, off-topic for me. These come from progrmaming projects for # my classes. Disallow: /teaching/old/old-lipn/DLL-2011/rattrapage/corpora/ Disallow: /teaching/old/intro-prog-2015/words/ Disallow: /teaching/old/intro-prog-2015/words/data/ Disallow: /teaching/programming-python/corpora/ # Disallow: /teaching/programming-python/corpora/ancient-greek--phaenomena--utf-8 # Disallow: /teaching/programming-python/corpora/arabic--one-thousand-and-one-nights--utf-8 # Disallow: /teaching/programming-python/corpora/bulgarian--something--utf-8 # Disallow: /teaching/programming-python/corpora/catalan--something--utf-8 # Disallow: /teaching/programming-python/corpora/chinese--the-advocate--lai-ho--utf-8 # Disallow: /teaching/programming-python/corpora/czech--something--utf-8 # Disallow: /teaching/programming-python/corpora/danish--something--utf-8 # Disallow: /teaching/programming-python/corpora/dutch--something--utf-8 # Disallow: /teaching/programming-python/corpora/english--narrative-of-a-gordon-pym--poe--utf-8 # Disallow: /teaching/programming-python/corpora/esperanto--something--utf-8 # Disallow: /teaching/programming-python/corpora/farsi--something--utf-8 # Disallow: /teaching/programming-python/corpora/finnish--something--utf-8 # Disallow: /teaching/programming-python/corpora/french--la-recherche-de-labsolu--balzac--utf-8 # Disallow: /teaching/programming-python/corpora/german--der-prozess--kafka--utf-8 # Disallow: /teaching/programming-python/corpora/hebrew--something--utf-8 # Disallow: /teaching/programming-python/corpora/hungarian--something--utf-8 # Disallow: /teaching/programming-python/corpora/icelandic--something--utf-8 # Disallow: /teaching/programming-python/corpora/italian--le-avventure-di-pinocchio--collodi--utf-8 # Disallow: /teaching/programming-python/corpora/japanese--kairo-ko--soseki--utf-8 # Disallow: /teaching/programming-python/corpora/korean--burning-mountain--cha-pomsok--utf-8 # Disallow: /teaching/programming-python/corpora/latin--something--cicero--utf-8 # Disallow: /teaching/programming-python/corpora/modern-greek--stuff--utf-8 # Disallow: /teaching/programming-python/corpora/norwegian--something--utf-8 # Disallow: /teaching/programming-python/corpora/polish--something--utf-8 # Disallow: /teaching/programming-python/corpora/portuguese--something--utf-8 # Disallow: /teaching/programming-python/corpora/russian--childhood--tolstoy--utf-8 # Disallow: /teaching/programming-python/corpora/spanish--el-ingenioso-hidalgo-don-quijote-de-la-mancha--cervantes--utf-8 # Disallow: /teaching/programming-python/corpora/swedish--something--utf-8 # Disallow: /teaching/programming-python/corpora/tagalog--something--utf-8 # Disallow: /teaching/programming-python/corpora/turkish--something--utf-8 # Disallow: /teaching/programming-python/corpora/urdu--something--utf-8 # Disallow: /teaching/programming-python/corpora/corpora-version-1.tar.gz # Disallow: /teaching/programming-python/corpora/langues.py # These URLs are pre-rewriting. Do I need this? # Disallow: /lipn-stuff/teaching/DLL-2011/rattrapage/corpora # Disallow: /lipn-stuff/teaching/DLL-2011/rattrapage/corpora/ # # Do not scan tag indices: they contain redundant information, if the rest # # of the site has been crawled. # ########################################################################## # # Tentative: wildcards are commonly supported by crawlers, but the # # specification does not define them. # ########################################################################## # Disallow: /blog/tags # Disallow: /blog/tags/ # Disallow: /blog/tags/*/* ## Old cgit pages ########################################################################## # For humans but not interesting to index. Now they have become # redirections, but that does not matter. Disallow: /git Disallow: /git/ # Disallow: /git/* # Disallow: /gitstats # Disallow: /gitstats/ # Disallow: /gitstats/* ## Particularly wasteful bots ########################################################################## User-agent: The Knowledge AI Disallow: / User-agent: MJ12bot Disallow: / User-agent: Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/) Disallow: / ## Bots I just dislike ########################################################################## # The “Ahrefs online marketing toolset” is something I never want to have # anything to do with. User-agent: AhrefsBot/7.0 Disallow: /