sitemap.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. /*
  2. * Copyright (c) 2018 Markus Hennecke <markus-hennecke@markus-hennecke.de>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. #include <sys/types.h>
  17. #include <err.h>
  18. #include <fcntl.h>
  19. #include <limits.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include <time.h>
  24. #include <unistd.h>
  25. #include "filehelper.h"
  26. #include "buffer.h"
  27. #include "sitemap.h"
  28. static char *concat_path(const char *, const char *);
  29. static struct lang_entry *read_language_dir(char *_content_dir, char *,
  30. char *);
  31. static struct url_entry *read_pages_dir(char *, char *, char *, char *);
  32. static char *format_time(time_t _datetime);
  33. static char *format_url(char *_hostname, char *_lang,
  34. char *_page, bool ssl);
  35. static struct buffer_list *create_xml_buffers(struct sitemap *);
  36. struct url_entry *
  37. url_entry_new(char *_url, time_t _mtime)
  38. {
  39. struct url_entry *url = malloc(sizeof(struct url_entry));
  40. url->url = strdup(_url);
  41. url->mtime = _mtime;
  42. return url;
  43. }
  44. void
  45. url_entry_free(struct url_entry *_url)
  46. {
  47. if (_url) {
  48. free(_url->url);
  49. free(_url);
  50. }
  51. }
  52. struct lang_entry *
  53. lang_entry_new(char *_lang)
  54. {
  55. struct lang_entry *lang = malloc(sizeof(struct lang_entry));
  56. lang->lang = strdup(_lang);
  57. TAILQ_INIT(&lang->pages);
  58. lang->dir = NULL;
  59. return lang;
  60. }
  61. void
  62. lang_entry_free(struct lang_entry *_lang)
  63. {
  64. if (_lang) {
  65. struct url_entry *url;
  66. while ((url = TAILQ_FIRST(&_lang->pages))) {
  67. TAILQ_REMOVE(&_lang->pages, url, entries);
  68. url_entry_free(url);
  69. }
  70. dir_list_free(_lang->dir);
  71. free(_lang->lang);
  72. }
  73. }
  74. void
  75. sitemap_free(struct sitemap *_sitemap)
  76. {
  77. if (_sitemap) {
  78. struct lang_entry *lang;
  79. while ((lang = TAILQ_FIRST(&_sitemap->languages))) {
  80. TAILQ_REMOVE(&_sitemap->languages, lang, entries);
  81. lang_entry_free(lang);
  82. }
  83. free(_sitemap->hostname);
  84. dir_list_free(_sitemap->dir);
  85. free(_sitemap);
  86. }
  87. }
  88. struct url_entry *
  89. read_pages_dir(char *_content_dir, char *_lang, char *_page, char *_hostname)
  90. {
  91. char *path;
  92. if (asprintf(&path, "%s/%s/%s", _content_dir, _lang, _page) == -1)
  93. err(1, NULL);
  94. struct dir_list *dir = get_dir_entries(path);
  95. if (! dir)
  96. err(1, NULL);
  97. char *url_string = format_url(_hostname, _lang, _page,
  98. dir_entry_exists("SSL", dir));
  99. struct url_entry *url = url_entry_new(url_string, dir->newest);
  100. url->dir = dir;
  101. free(path);
  102. return url;
  103. }
  104. struct lang_entry *
  105. read_language_dir(char *_content_dir, char *_lang, char *_hostname)
  106. {
  107. char *lang_path = concat_path(_content_dir, _lang);
  108. struct dir_list *dir = get_dir_entries(lang_path);
  109. free(lang_path);
  110. if (! dir)
  111. err(1, NULL);
  112. struct lang_entry *l = lang_entry_new(_lang);
  113. if (!l)
  114. err(1, NULL);
  115. l->dir = dir;
  116. struct dir_entry *entry;
  117. TAILQ_FOREACH(entry, &dir->entries, entries) {
  118. if ((entry->sb.st_mode & S_IFDIR) == 0)
  119. continue;
  120. struct url_entry *url = read_pages_dir(_content_dir, _lang,
  121. entry->filename, _hostname);
  122. TAILQ_INSERT_TAIL(&l->pages, url, entries);
  123. }
  124. return l;
  125. }
  126. struct sitemap *
  127. sitemap_new(char *_content_dir, char *_hostname)
  128. {
  129. struct sitemap *sitemap = malloc(sizeof(struct sitemap));
  130. TAILQ_INIT(&sitemap->languages);
  131. sitemap->hostname = strdup(_hostname);
  132. sitemap->dir = get_dir_entries(_content_dir);
  133. if (sitemap->dir == NULL) {
  134. warn(NULL);
  135. goto bailout;
  136. }
  137. struct dir_entry *file;
  138. TAILQ_FOREACH(file, &sitemap->dir->entries, entries) {
  139. if ((file->sb.st_mode & S_IFDIR) == 0)
  140. continue;
  141. struct lang_entry *l = read_language_dir(_content_dir,
  142. file->filename, _hostname);
  143. TAILQ_INSERT_TAIL(&sitemap->languages, l, entries);
  144. }
  145. return sitemap;
  146. bailout:
  147. free(sitemap->hostname);
  148. sitemap_free(sitemap);
  149. return NULL;
  150. }
  151. struct buffer_list *
  152. create_xml_buffers(struct sitemap *s)
  153. {
  154. struct buffer_list *xml = buffer_list_new();
  155. buffer_list_add_string(xml,
  156. "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
  157. "<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" "
  158. "xmlns:xsi=\"http://www.w3.org/2001/XMSchema-instance\" "
  159. "xsi:schemaLocation="
  160. "\"http://www.sitemaps.org/schemas/sitemap/0.9\n"
  161. "http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\">\n");
  162. struct lang_entry *lang;
  163. TAILQ_FOREACH(lang, &s->languages, entries) {
  164. struct url_entry *url;
  165. TAILQ_FOREACH(url, &lang->pages, entries) {
  166. buffer_list_add_string(xml, "<url><loc>");
  167. buffer_list_add_string(xml, url->url);
  168. buffer_list_add_string(xml, "</loc><lastmod>");
  169. char *isots = format_time(url->mtime);
  170. buffer_list_add_string(xml, isots);
  171. free(isots);
  172. buffer_list_add_string(xml, "</lastmod></url>");
  173. }
  174. }
  175. buffer_list_add_string(xml, "</urlset>");
  176. return xml;
  177. }
  178. char *
  179. sitemap_toxml(struct sitemap *_s)
  180. {
  181. struct buffer_list *xml = create_xml_buffers(_s);
  182. char *xml_string = buffer_list_concat_string(xml);
  183. buffer_list_free(xml);
  184. return xml_string;
  185. }
  186. char *
  187. sitemap_toxmlgz(struct sitemap *_s, size_t *_size, const char *_filename,
  188. uint32_t _mtime)
  189. {
  190. struct buffer_list *xml = create_xml_buffers(_s);
  191. struct buffer_list *gz = buffer_list_gzip(xml, _filename, _mtime);
  192. char *result = buffer_list_concat(gz);
  193. *_size = gz->size;
  194. buffer_list_free(gz);
  195. buffer_list_free(xml);
  196. return result;
  197. }
  198. char *
  199. concat_path(const char *_path1, const char *_path2)
  200. {
  201. char *path;
  202. if (asprintf(&path, "%s/%s", _path1, _path2) == -1)
  203. err(1, NULL);
  204. return path;
  205. }
  206. char *
  207. format_time(time_t _datetime)
  208. {
  209. char *result;
  210. struct tm *tm;
  211. if ((tm = gmtime(&_datetime)) == NULL)
  212. err(1, NULL);
  213. if (asprintf(&result, "%04d-%02d-%02dT%02d:%02d:%02dZ",
  214. tm->tm_year + 1900, tm->tm_mon, tm->tm_mday,
  215. tm->tm_hour, tm->tm_min, tm->tm_sec) == -1)
  216. err(1, NULL);
  217. return result;
  218. }
  219. char *
  220. format_url(char *_hostname, char *_lang, char *_page, bool ssl)
  221. {
  222. char *http = (ssl) ? "https" : "http";
  223. char *url;
  224. if (asprintf(&url, "%s://%s/%s_%s.html", http, _hostname, _page,
  225. _lang) == -1)
  226. err(1, NULL);
  227. return url;
  228. }
  229. uint32_t
  230. sitemap_newest(struct sitemap *_sitemap, const char *_lang)
  231. {
  232. uint32_t result = 0;
  233. struct lang_entry *lang;
  234. TAILQ_FOREACH(lang, &_sitemap->languages, entries) {
  235. if (!_lang || (strcmp(_lang, lang->lang) == 0)) {
  236. if (lang->dir->newest > result)
  237. result = lang->dir->newest;
  238. struct url_entry *url;
  239. TAILQ_FOREACH(url, &lang->pages, entries) {
  240. if (url->dir->newest > result)
  241. result = url->dir->newest;
  242. }
  243. }
  244. }
  245. return result;
  246. }