about summary refs log tree commit diff stats
path: root/cgit.c
diff options
context:
space:
mode:
authorLars Hjemli2008-04-28 11:32:42 +0200
committerLars Hjemli2008-04-28 11:32:42 +0200
commit939d32fda70ea66c9db51687beb3cea6da7b0599 (patch)
tree50915facf89b78e3856fe6b0564a26c3678c01ba /cgit.c
parentFix commitdiff annoyance (diff)
downloadcgit-939d32fda70ea66c9db51687beb3cea6da7b0599.tar.gz
cgit-939d32fda70ea66c9db51687beb3cea6da7b0599.zip
Redesign the caching layer
The original caching layer in cgit has no upper bound on the number of
concurrent cache entries, so when cgit is traversed by a spider (like the
googlebot), the cache might end up filling your disk. Also, if any error
occurs in the cache layer, no content is returned to the client.

This patch redesigns the caching layer to avoid these flaws by
* giving the cache a bound number of slots
* disabling the cache for the current request when errors occur

The cache size limit is implemented by hashing the querystring (the cache
lookup key) and generating a cache filename based on this hash modulo the
cache size. In order to detect hash collisions, the full lookup key (i.e.
the querystring) is stored in the cache file (separated from its associated
content by ascii 0).

The cache filename is the reversed 8-digit hexadecimal representation of

  hash(key) % cache_size

which should make the filesystem lookup pretty fast (if directory content
is indexed/sorted); reversing the representation avoids the problem where
all keys have equal prefix.

There is a new config option, cache-size, which sets the upper bound for
the cache. Default value for this option is 0, which has the same effect
as setting nocache=1 (hence nocache is now deprecated).

Included in this patch is also a new testfile which verifies that the
new option works as intended.

Signed-off-by: Lars Hjemli <hjemli@gmail.com>
Diffstat (limited to 'cgit.c')
-rw-r--r--cgit.c166
1 files changed, 37 insertions, 129 deletions
diff --git a/cgit.c b/cgit.c index 38b0ba5..4dc8eec 100644 --- a/cgit.c +++ b/cgit.c
@@ -45,6 +45,8 @@ void config_cb(const char *name, const char *value)
45 ctx.cfg.enable_log_filecount = atoi(value); 45 ctx.cfg.enable_log_filecount = atoi(value);
46 else if (!strcmp(name, "enable-log-linecount")) 46 else if (!strcmp(name, "enable-log-linecount"))
47 ctx.cfg.enable_log_linecount = atoi(value); 47 ctx.cfg.enable_log_linecount = atoi(value);
48 else if (!strcmp(name, "cache-size"))
49 ctx.cfg.cache_size = atoi(value);
48 else if (!strcmp(name, "cache-root")) 50 else if (!strcmp(name, "cache-root"))
49 ctx.cfg.cache_root = xstrdup(value); 51 ctx.cfg.cache_root = xstrdup(value);
50 else if (!strcmp(name, "cache-root-ttl")) 52 else if (!strcmp(name, "cache-root-ttl"))
@@ -143,6 +145,8 @@ static void prepare_context(struct cgit_context *ctx)
143{ 145{
144 memset(ctx, 0, sizeof(ctx)); 146 memset(ctx, 0, sizeof(ctx));
145 ctx->cfg.agefile = "info/web/last-modified"; 147 ctx->cfg.agefile = "info/web/last-modified";
148 ctx->cfg.nocache = 0;
149 ctx->cfg.cache_size = 0;
146 ctx->cfg.cache_dynamic_ttl = 5; 150 ctx->cfg.cache_dynamic_ttl = 5;
147 ctx->cfg.cache_max_create_time = 5; 151 ctx->cfg.cache_max_create_time = 5;
148 ctx->cfg.cache_repo_ttl = 5; 152 ctx->cfg.cache_repo_ttl = 5;
@@ -163,47 +167,8 @@ static void prepare_context(struct cgit_context *ctx)
163 ctx->page.mimetype = "text/html"; 167 ctx->page.mimetype = "text/html";
164 ctx->page.charset = PAGE_ENCODING; 168 ctx->page.charset = PAGE_ENCODING;
165 ctx->page.filename = NULL; 169 ctx->page.filename = NULL;
166} 170 ctx->page.modified = time(NULL);
167 171 ctx->page.expires = ctx->page.modified;
168static int cgit_prepare_cache(struct cacheitem *item)
169{
170 if (!ctx.repo && ctx.qry.repo) {
171 ctx.page.title = fmt("%s - %s", ctx.cfg.root_title,
172 "Bad request");
173 cgit_print_http_headers(&ctx);
174 cgit_print_docstart(&ctx);
175 cgit_print_pageheader(&ctx);
176 cgit_print_error(fmt("Unknown repo: %s", ctx.qry.repo));
177 cgit_print_docend();
178 return 0;
179 }
180
181 if (!ctx.repo) {
182 item->name = xstrdup(fmt("%s/index.%s.html",
183 ctx.cfg.cache_root,
184 cache_safe_filename(ctx.qry.raw)));
185 item->ttl = ctx.cfg.cache_root_ttl;
186 return 1;
187 }
188
189 if (!ctx.qry.page) {
190 item->name = xstrdup(fmt("%s/%s/index.%s.html", ctx.cfg.cache_root,
191 cache_safe_filename(ctx.repo->url),
192 cache_safe_filename(ctx.qry.raw)));
193 item->ttl = ctx.cfg.cache_repo_ttl;
194 } else {
195 item->name = xstrdup(fmt("%s/%s/%s/%s.html", ctx.cfg.cache_root,
196 cache_safe_filename(ctx.repo->url),
197 ctx.qry.page,
198 cache_safe_filename(ctx.qry.raw)));
199 if (ctx.qry.has_symref)
200 item->ttl = ctx.cfg.cache_dynamic_ttl;
201 else if (ctx.qry.has_sha1)
202 item->ttl = ctx.cfg.cache_static_ttl;
203 else
204 item->ttl = ctx.cfg.cache_repo_ttl;
205 }
206 return 1;
207} 172}
208 173
209struct refmatch { 174struct refmatch {
@@ -288,8 +253,9 @@ static int prepare_repo_cmd(struct cgit_context *ctx)
288 return 0; 253 return 0;
289} 254}
290 255
291static void process_request(struct cgit_context *ctx) 256static void process_request(void *cbdata)
292{ 257{
258 struct cgit_context *ctx = cbdata;
293 struct cgit_cmd *cmd; 259 struct cgit_cmd *cmd;
294 260
295 cmd = cgit_get_cmd(ctx); 261 cmd = cgit_get_cmd(ctx);
@@ -319,82 +285,6 @@ static void process_request(struct cgit_context *ctx)
319 cgit_print_docend(); 285 cgit_print_docend();
320} 286}
321 287
322static long ttl_seconds(long ttl)
323{
324 if (ttl<0)
325 return 60 * 60 * 24 * 365;
326 else
327 return ttl * 60;
328}
329
330static void cgit_fill_cache(struct cacheitem *item, int use_cache)
331{
332 int stdout2;
333
334 if (use_cache) {
335 stdout2 = chk_positive(dup(STDOUT_FILENO),
336 "Preserving STDOUT");
337 chk_zero(close(STDOUT_FILENO), "Closing STDOUT");
338 chk_positive(dup2(item->fd, STDOUT_FILENO), "Dup2(cachefile)");
339 }
340
341 ctx.page.modified = time(NULL);
342 ctx.page.expires = ctx.page.modified + ttl_seconds(item->ttl);
343 process_request(&ctx);
344
345 if (use_cache) {
346 chk_zero(close(STDOUT_FILENO), "Close redirected STDOUT");
347 chk_positive(dup2(stdout2, STDOUT_FILENO),
348 "Restoring original STDOUT");
349 chk_zero(close(stdout2), "Closing temporary STDOUT");
350 }
351}
352
353static void cgit_check_cache(struct cacheitem *item)
354{
355 int i = 0;
356
357 top:
358 if (++i > ctx.cfg.max_lock_attempts) {
359 die("cgit_refresh_cache: unable to lock %s: %s",
360 item->name, strerror(errno));
361 }
362 if (!cache_exist(item)) {
363 if (!cache_lock(item)) {
364 sleep(1);
365 goto top;
366 }
367 if (!cache_exist(item)) {
368 cgit_fill_cache(item, 1);
369 cache_unlock(item);
370 } else {
371 cache_cancel_lock(item);
372 }
373 } else if (cache_expired(item) && cache_lock(item)) {
374 if (cache_expired(item)) {
375 cgit_fill_cache(item, 1);
376 cache_unlock(item);
377 } else {
378 cache_cancel_lock(item);
379 }
380 }
381}
382
383static void cgit_print_cache(struct cacheitem *item)
384{
385 static char buf[4096];
386 ssize_t i;
387
388 int fd = open(item->name, O_RDONLY);
389 if (fd<0)
390 die("Unable to open cached file %s", item->name);
391
392 while((i=read(fd, buf, sizeof(buf))) > 0)
393 write(STDOUT_FILENO, buf, i);
394
395 close(fd);
396}
397
398static void cgit_parse_args(int argc, const char **argv) 288static void cgit_parse_args(int argc, const char **argv)
399{ 289{
400 int i; 290 int i;
@@ -429,13 +319,29 @@ static void cgit_parse_args(int argc, const char **argv)
429 } 319 }
430} 320}
431 321
322static int calc_ttl()
323{
324 if (!ctx.repo)
325 return ctx.cfg.cache_root_ttl;
326
327 if (!ctx.qry.page)
328 return ctx.cfg.cache_repo_ttl;
329
330 if (ctx.qry.has_symref)
331 return ctx.cfg.cache_dynamic_ttl;
332
333 if (ctx.qry.has_sha1)
334 return ctx.cfg.cache_static_ttl;
335
336 return ctx.cfg.cache_repo_ttl;
337}
338
432int main(int argc, const char **argv) 339int main(int argc, const char **argv)
433{ 340{
434 struct cacheitem item;
435 const char *cgit_config_env = getenv("CGIT_CONFIG"); 341 const char *cgit_config_env = getenv("CGIT_CONFIG");
342 int err, ttl;
436 343
437 prepare_context(&ctx); 344 prepare_context(&ctx);
438 item.st.st_mtime = time(NULL);
439 cgit_repolist.length = 0; 345 cgit_repolist.length = 0;
440 cgit_repolist.count = 0; 346 cgit_repolist.count = 0;
441 cgit_repolist.repos = NULL; 347 cgit_repolist.repos = NULL;
@@ -449,13 +355,15 @@ int main(int argc, const char **argv)
449 ctx.qry.raw = xstrdup(getenv("QUERY_STRING")); 355 ctx.qry.raw = xstrdup(getenv("QUERY_STRING"));
450 cgit_parse_args(argc, argv); 356 cgit_parse_args(argc, argv);
451 http_parse_querystring(ctx.qry.raw, querystring_cb); 357 http_parse_querystring(ctx.qry.raw, querystring_cb);
452 if (!cgit_prepare_cache(&item)) 358
453 return 0; 359 ttl = calc_ttl();
454 if (ctx.cfg.nocache) { 360 ctx.page.expires += ttl*60;
455 cgit_fill_cache(&item, 0); 361 if (ctx.cfg.nocache)
456 } else { 362 ctx.cfg.cache_size = 0;
457 cgit_check_cache(&item); 363 err = cache_process(ctx.cfg.cache_size, ctx.cfg.cache_root,
458 cgit_print_cache(&item); 364 ctx.qry.raw, ttl, process_request, &ctx);
459 } 365 if (err)
460 return 0; 366 cache_log("[cgit] error %d - %s\n",
367 err, strerror(err));
368 return err;
461} 369}