sitemap.c 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /*
  2. * Copyright (c) 2018 Markus Hennecke <markus-hennecke@markus-hennecke.de>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <sys/types.h>
  17. #include <err.h>
  18. #include <fcntl.h>
  19. #include <limits.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include <time.h>
  24. #include <unistd.h>
  25. #include "filehelper.h"
  26. #include "buffer.h"
  27. #include "sitemap.h"
  28. static char *concat_path(const char *, const char *);
  29. static struct lang_entry *read_language_dir(const char *, const char *,
  30. const char *);
  31. static struct url_entry *read_pages_dir(const char *, const char *,
  32. const char *, const char *);
  33. static char *format_time(time_t);
  34. static char *format_url(const char *, const char *,
  35. const char *, bool ssl);
  36. static struct buffer_list *create_xml_buffers(struct sitemap *);
  37. struct url_entry *
  38. url_entry_new(char *_url, time_t _mtime)
  39. {
  40. struct url_entry *url = malloc(sizeof(struct url_entry));
  41. url->url = strdup(_url);
  42. url->mtime = _mtime;
  43. return url;
  44. }
  45. void
  46. url_entry_free(struct url_entry *_url)
  47. {
  48. if (_url) {
  49. free(_url->url);
  50. free(_url);
  51. }
  52. }
  53. struct lang_entry *
  54. lang_entry_new(const char *_lang)
  55. {
  56. struct lang_entry *lang = malloc(sizeof(struct lang_entry));
  57. lang->lang = strdup(_lang);
  58. TAILQ_INIT(&lang->pages);
  59. lang->dir = NULL;
  60. return lang;
  61. }
  62. void
  63. lang_entry_free(struct lang_entry *_lang)
  64. {
  65. if (_lang) {
  66. struct url_entry *url;
  67. while ((url = TAILQ_FIRST(&_lang->pages))) {
  68. TAILQ_REMOVE(&_lang->pages, url, entries);
  69. url_entry_free(url);
  70. }
  71. dir_list_free(_lang->dir);
  72. free(_lang->lang);
  73. }
  74. }
  75. void
  76. sitemap_free(struct sitemap *_sitemap)
  77. {
  78. if (_sitemap) {
  79. struct lang_entry *lang;
  80. while ((lang = TAILQ_FIRST(&_sitemap->languages))) {
  81. TAILQ_REMOVE(&_sitemap->languages, lang, entries);
  82. lang_entry_free(lang);
  83. }
  84. free(_sitemap->hostname);
  85. dir_list_free(_sitemap->dir);
  86. free(_sitemap);
  87. }
  88. }
  89. struct url_entry *
  90. read_pages_dir(const char *_content_dir, const char *_lang, const char *_page,
  91. const char *_hostname)
  92. {
  93. char *path;
  94. if (asprintf(&path, "%s/%s/%s", _content_dir, _lang, _page) == -1)
  95. err(1, NULL);
  96. struct dir_list *dir = get_dir_entries(path);
  97. if (! dir)
  98. err(1, NULL);
  99. char *url_string = format_url(_hostname, _lang, _page,
  100. dir_entry_exists("SSL", dir));
  101. struct url_entry *url = url_entry_new(url_string, dir->newest);
  102. url->dir = dir;
  103. free(path);
  104. return url;
  105. }
  106. struct lang_entry *
  107. read_language_dir(const char *_content_dir, const char *_lang,
  108. const char *_hostname)
  109. {
  110. char *lang_path = concat_path(_content_dir, _lang);
  111. struct dir_list *dir = get_dir_entries(lang_path);
  112. free(lang_path);
  113. if (! dir)
  114. err(1, NULL);
  115. struct lang_entry *l = lang_entry_new(_lang);
  116. if (!l)
  117. err(1, NULL);
  118. l->dir = dir;
  119. struct dir_entry *entry;
  120. TAILQ_FOREACH(entry, &dir->entries, entries) {
  121. if ((entry->sb.st_mode & S_IFDIR) == 0)
  122. continue;
  123. struct url_entry *url = read_pages_dir(_content_dir, _lang,
  124. entry->filename, _hostname);
  125. TAILQ_INSERT_TAIL(&l->pages, url, entries);
  126. }
  127. return l;
  128. }
  129. struct sitemap *
  130. sitemap_new(const char *_content_dir, const char *_hostname)
  131. {
  132. struct sitemap *sitemap = malloc(sizeof(struct sitemap));
  133. TAILQ_INIT(&sitemap->languages);
  134. sitemap->hostname = strdup(_hostname);
  135. sitemap->dir = get_dir_entries(_content_dir);
  136. if (sitemap->dir == NULL) {
  137. warn(NULL);
  138. goto bailout;
  139. }
  140. struct dir_entry *file;
  141. TAILQ_FOREACH(file, &sitemap->dir->entries, entries) {
  142. if ((file->sb.st_mode & S_IFDIR) == 0)
  143. continue;
  144. struct lang_entry *l = read_language_dir(_content_dir,
  145. file->filename, _hostname);
  146. TAILQ_INSERT_TAIL(&sitemap->languages, l, entries);
  147. }
  148. return sitemap;
  149. bailout:
  150. free(sitemap->hostname);
  151. sitemap_free(sitemap);
  152. return NULL;
  153. }
  154. struct buffer_list *
  155. create_xml_buffers(struct sitemap *s)
  156. {
  157. struct buffer_list *xml = buffer_list_new();
  158. buffer_list_add_string(xml,
  159. "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
  160. "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" "
  161. "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
  162. "xsi:schemaLocation="
  163. "\"http://www.sitemaps.org/schemas/sitemap/0.9\n"
  164. "http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\">\n");
  165. struct lang_entry *lang;
  166. TAILQ_FOREACH(lang, &s->languages, entries) {
  167. struct url_entry *url;
  168. TAILQ_FOREACH(url, &lang->pages, entries) {
  169. buffer_list_add_string(xml, "<url><loc>");
  170. buffer_list_add_string(xml, url->url);
  171. buffer_list_add_string(xml, "</loc><lastmod>");
  172. char *isots = format_time(url->mtime);
  173. buffer_list_add_string(xml, isots);
  174. free(isots);
  175. buffer_list_add_string(xml, "</lastmod></url>");
  176. }
  177. }
  178. buffer_list_add_string(xml, "</urlset>");
  179. return xml;
  180. }
  181. char *
  182. sitemap_toxml(struct sitemap *_s)
  183. {
  184. struct buffer_list *xml = create_xml_buffers(_s);
  185. char *xml_string = buffer_list_concat_string(xml);
  186. buffer_list_free(xml);
  187. return xml_string;
  188. }
  189. char *
  190. sitemap_toxmlgz(struct sitemap *_s, size_t *_size, const char *_filename,
  191. uint32_t _mtime)
  192. {
  193. struct buffer_list *xml = create_xml_buffers(_s);
  194. struct buffer_list *gz = buffer_list_gzip(xml, _filename, _mtime);
  195. char *result = buffer_list_concat(gz);
  196. *_size = gz->size;
  197. buffer_list_free(gz);
  198. buffer_list_free(xml);
  199. return result;
  200. }
  201. char *
  202. concat_path(const char *_path1, const char *_path2)
  203. {
  204. char *path;
  205. if (asprintf(&path, "%s/%s", _path1, _path2) == -1)
  206. err(1, NULL);
  207. return path;
  208. }
  209. char *
  210. format_time(time_t _datetime)
  211. {
  212. char *result;
  213. struct tm *tm;
  214. if ((tm = gmtime(&_datetime)) == NULL)
  215. err(1, NULL);
  216. if (asprintf(&result, "%04d-%02d-%02dT%02d:%02d:%02dZ",
  217. tm->tm_year + 1900, tm->tm_mon, tm->tm_mday,
  218. tm->tm_hour, tm->tm_min, tm->tm_sec) == -1)
  219. err(1, NULL);
  220. return result;
  221. }
  222. char *
  223. format_url(const char *_hostname, const char *_lang, const char *_page,
  224. bool ssl)
  225. {
  226. const char *http = (ssl) ? "https" : "http";
  227. char *url;
  228. if (asprintf(&url, "%s://%s%s%s/%s.html", http, _hostname,
  229. CMS_ROOT_URL, _lang, _page) == -1)
  230. err(1, NULL);
  231. return url;
  232. }
  233. uint32_t
  234. sitemap_newest(struct sitemap *_sitemap, const char *_lang)
  235. {
  236. uint32_t result = 0;
  237. struct lang_entry *lang;
  238. TAILQ_FOREACH(lang, &_sitemap->languages, entries) {
  239. if (!_lang || (strcmp(_lang, lang->lang) == 0)) {
  240. if (lang->dir->newest > result)
  241. result = lang->dir->newest;
  242. struct url_entry *url;
  243. TAILQ_FOREACH(url, &lang->pages, entries) {
  244. if (url->dir->newest > result)
  245. result = url->dir->newest;
  246. }
  247. }
  248. }
  249. return result;
  250. }