diff --git a/baked.c b/baked.c index 8e1b625..974c0a8 100644 --- a/baked.c +++ b/baked.c @@ -20,10 +20,13 @@ INCBIN(favicon_ico, "./etc/favicon.ico"); INCBIN(hotreload_js, "./etc/hotreload.js"); #endif INCBIN(me_jpg, "./etc/me.jpg"); +INCBIN(tmoa_engine_jpg, "./etc/tmoa-engine.jpg"); +INCBIN(tmoa_garbage_jpg, "./etc/tmoa-garbage.jpg"); INCBIN(blog_welcome_md, "./blog/welcome.md"); INCBIN(blog_weird_page_md, "./blog/weird-page.md"); INCBIN(blog_curious_case_of_gebs_md, "./blog/curious-case-of-gebs.md"); +INCBIN(blog_the_making_of_aboba_md, "./blog/the-making-of-aboba.md"); locked(Baked_Resource *) baked_resources = locked_init(nil); @@ -62,9 +65,12 @@ void init_baked_resources(void) add_baked_resource("hotreload.js", hotreload_js_data, hotreload_js_size); #endif add_baked_resource("me.jpg", me_jpg_data, me_jpg_size); + add_baked_resource("tmoa-engine.jpg", tmoa_engine_jpg_data, tmoa_engine_jpg_size); + add_baked_resource("tmoa-garbage.jpg", tmoa_garbage_jpg_data, tmoa_garbage_jpg_size); add_baked_resource("blog-welcome.md", blog_welcome_md_data, blog_welcome_md_size); add_baked_resource("blog-weird-page.md", blog_weird_page_md_data, blog_weird_page_md_size); add_baked_resource("blog-curious-case-of-gebs.md", blog_curious_case_of_gebs_md_data, blog_curious_case_of_gebs_md_size); + add_baked_resource("blog-the-making-of-aboba.md", blog_the_making_of_aboba_md_data, blog_the_making_of_aboba_md_size); unlockx(&baked_resources); } diff --git a/baked.h b/baked.h index 8ef3b98..87f4bd3 100644 --- a/baked.h +++ b/baked.h @@ -16,10 +16,13 @@ INCBIN_EXTERN(favicon_ico); INCBIN_EXTERN(hotreload_js); #endif INCBIN_EXTERN(me_jpg); +INCBIN_EXTERN(tmoa_engine_jpg); +INCBIN_EXTERN(tmoa_garbage_jpg); INCBIN_EXTERN(blog_welcome_md); INCBIN_EXTERN(blog_weird_page_md); INCBIN_EXTERN(blog_curious_case_of_gebs_md); +INCBIN_EXTERN(blog_the_making_of_aboba_md); typedef struct { char *key; // path diff --git a/blog/the-making-of-aboba.md b/blog/the-making-of-aboba.md new file mode 100644 index 0000000..c7b2e05 --- /dev/null +++ b/blog/the-making-of-aboba.md @@ -0,0 +1,408 @@ +# The making of aboba (this website) + +In this article I'd like to present to you the internals of this website, +how the code is architectured and some cool tricks that are used throughout the project. + +## Our "engine" + +![the engine](/etc/tmoa-engine.jpg) + +This image is a joke, obviously. + +The "engine" here is a web server that we're going to be using. I've decided to pick mongoose (-> [their page](https://mongoose.ws)), +partly because I didn't know about/couldn't find other solutions, but I'm really happy with my pick. The only "downside" here is that +mongoose is not http-specific, but also has websockets, MQTT, SNTP and even RPC. While that's really cool, I only need http and not +much else. I haven't dove deeper into mongoose, but I'd be cool if they provided some \`#ifdef\`s to just disable these protocols +(ie. strip down the code that implements them). That way I could make mongoose even more lightweight and only use the features that I need. + +Here's roughly how we work with mongoose. Refer to their documentation for more context. + +Let's start with \`main()\` +\`\`\` +volatile bool alive = true; + +void graceful_shutdown(int no) { alive = false; } + +int main(int argc, char ** argv) +{ + signal(SIGINT, &graceful_shutdown); + + // skip BS + + mg_log_set(MG_LL_DEBUG); + struct mg_mgr mgr; + mg_mgr_init(&mgr); + + // skip BS + + mg_wakeup_init(&mgr); // We need this for multithreading + mg_http_listen(&mgr, CONFIG_LISTEN_URL, &event_handler, NULL); + + while (alive) { + mg_mgr_poll(&mgr, 1000); + // skip BS + } + + mg_mgr_free(&mgr); + + // skip BS + + return 0; +} +\`\`\` + +As you can see it's quite simple to set up mongoose. Here's what the used functions do: +- \`mg_log_set()\` - set the log level. \`MG_LL_DEBUG\` is very verbose, but it's good for +when the application breaks and we have no clue why. +- \`struct mg_mgr\` & \`mg_mgr_init()\` - this is the mongoose "manager". The detailed explaination +can be found [here](https://mongoose.ws/documentation/#2-minute-integration-guide), but it can be +essentially boiled down to "overall state of the web server". +- \`mg_wakeup_init()\` - this is needed to make our application multithreaded. In the docs it says +that it's used to "initialize the *wakeup scheme*". This basically means that we can now talk between +multiple threads using \`mg_wakeup()\`, which is the only thread-safe function provided by mongoose. +- \`mg_mgr_poll()\` - handle the next conection if there's any incoming data to work with. We can +also specify the timeout for a connection. Here we provide 1 second (1000 ms). + +That's all you really need to know to get started with mongoose. Let's get to the \`event_handler()\` now. + +\`\`\` +void event_handler(struct mg_connection *conn, int ev, void *ev_data) +{ + if (ev == MG_EV_HTTP_MSG) { + // Run handler in a new thread + } else if (ev == MG_EV_WAKEUP) { + // We've woken up from a handler by mg_wakeup(). Send the reply back to the client + } +} +\`\`\` + +I've removed a lot of code here, because it's irrelevant at the current point. This allowes us to look at +the simplified image of the \`event_handler()\` function. + +Let's stop to talk about the parameters for a second. + +- \`struct mg_connection *conn\` - the structure that describes the incoming connection. We will also +use it to send back our reply. +- \`int ev\` - this is the event enumeration. Basically tells us what event we're currently handling +inside of mongoose's event loop. +- \`void *ev_data\` - additional event data. The value of this parameter differs based on the value of \`int ev\`. +More on that a little bit later. + +What goes on inside the \`MG_EV_HTTP_MSG\` branch? + +\`\`\` +if (ev == MG_EV_HTTP_MSG) { + struct mg_http_message *msg = (struct mg_http_message *)ev_data; + + Route_Thread_Data *data = calloc(1, sizeof(*data)); + data->message = mg_strdup(msg->message); + data->conn_id = conn->id; + data->mgr = conn->mgr; + run_in_thread(&route_thread_function, data); +\`\`\` + +If we have an "HTTP Message" event incoming, ev_data is a pointer to \`struct mg_http_message\`. +This structure contains things like the message body, query parameters, the uri, the method and so on. Here we +duplicate the \`message\` field, which encompasses the entire HTTP message. We also save the reference to the +mongoose manager for later use when we will want to wake up from a thread. + +Here's how the thread is spawned. This code is taken from the mongoose tutorial: https://github.com/cesanta/mongoose/blob/master/tutorials/core/multi-threaded/main.c#L11 + +\`\`\` +void run_in_thread(void *(*f)(void *), void *p) +{ + pthread_t tid = 0; + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + pthread_create(&tid, &attr, f, p); + pthread_attr_destroy(&attr); +} +\`\`\` + +And then here's what goes on inside a thread: + +\`\`\` +void *route_thread_function(void *param) +{ + Route_Thread_Data *data = (Route_Thread_Data *)param; + + struct mg_http_message http_msg = {0}; + int r = mg_http_parse(data->message.buf, data->message.len, &http_msg); + if (r <= 0) { + // Unparsable HTTP request + } + + if (mg_match(http_msg.uri, mg_str("/etc/*"), nil)) { + // Request for static resource + } + + // Request for a dynamic page +} +\`\`\` + +This is quite simple, so there's not much to explain here. One cool thing I'd like to mention is the +\`mg_match()\` function. Normally I'd have to implement uri string matching myself, but I'm glad that +this functionality comes with mongoose built-in. God bless you mongoose! + +## Dynamic pages and static assets. + +![the assets](/etc/tmoa-garbage.jpg) + +Let's stop here now and talk about something different entirely (dont' worry, we'll come back later). +I'd like to show you how assets/pages are implemented in aboba, so we get the whole picture. + +In most applications you typically have a distribution model that goes like this: you have your application's +binary ("aboba" or "aboba.exe" or "aboba.bin"...) and then you have an "assets"/"resources" directory, placed +somewhere within the filesystem. There are now 2 ways to go about this. You can have a fixed path to the assets +directory or let the user configure it based on how they've installed the application. In the first case the +app could require that it's assets must be located in \`/usr/share/my-app/assets\` and otherwise it will bail +out with an error "Err: Could not find assets directory blah blah" or something. In the other case the application +would require that the user configures the path to the assets directory themselves, which is not a bad solution +(it may sound like it at first). This really depends what audience are we targetting. "Are our users tech-savvy +enough to do it themselves?" is the question we'd have to ask ourselves. + +**Enter embedded assets** + +The downsides of both solutions is that we still have to distribute our application with an assets directory. We +have to manage 1 executable file + an entire directory. The *obvious* solution would be to just create a +windows-style setup wizard or a \`make install\` or some other install script thingy. This problem is easily +solvable by literally compiling the bytes of the assets into our program. That way we can distribute only the +executable since it's self contained and doesn't need to reach out to the filesystem at runtime to get it's resources. +This technique is fairly old and the most notable uses I can think of are: [XPM Format](https://en.wikipedia.org/wiki/X_PixMap), +[Windows resources](https://en.wikipedia.org/wiki/Resource_(Windows)) and raylib's [rres](https://github.com/raysan5/rres). +Even C23 has acknowledged the significance of embedded assets (-> [article](https://thephd.dev/finally-embed-in-c23)). + +Of course there are some pitfalls of this approach. What if the files we're trying to embed are too *thicc*? We obviously +don't want to end up with a 5GB executable. Imagine the OS trying to load that program into the ram. That would just eat up 5 +gigs before anything meaningful ever happens. This pattern of asset distribution is the most suitable for embedded applications +where there's no underlying filesystem to work with, games which are not too large, GUI apps that need some icons and fonts. + +So how does aboba go about embedding it's assets? [incbin](https://github.com/graphitemaster/incbin) comes to the rescue! + +Here are external declarations for our resources +\`\`\` +#include "incbin/incbin.h" + +INCBIN_EXTERN(gpp1); + +INCBIN_EXTERN(home_html); +INCBIN_EXTERN(page_missing_html); +INCBIN_EXTERN(template_blog_html); +INCBIN_EXTERN(blog_html); + +INCBIN_EXTERN(simple_css); +INCBIN_EXTERN(favicon_ico); +#if MY_DEBUG +INCBIN_EXTERN(hotreload_js); +#endif +INCBIN_EXTERN(me_jpg); +INCBIN_EXTERN(tmoa_engine_jpg); + +INCBIN_EXTERN(blog_welcome_md); +INCBIN_EXTERN(blog_weird_page_md); +INCBIN_EXTERN(blog_curious_case_of_gebs_md); +INCBIN_EXTERN(blog_the_making_of_aboba_md); +\`\`\` + +And here's where the actual inclusion happens: + +\`\`\` +INCBIN(gpp1, "./gpp1"); + +INCBIN(home_html, "./tmpls/home.html"); +INCBIN(page_missing_html, "./tmpls/page-missing.html"); +INCBIN(template_blog_html, "./tmpls/template-blog.html"); +INCBIN(blog_html, "./tmpls/blog.html"); + +INCBIN(simple_css, "./etc/simple.css"); +INCBIN(favicon_ico, "./etc/favicon.ico"); +#if MY_DEBUG +INCBIN(hotreload_js, "./etc/hotreload.js"); +#endif +INCBIN(me_jpg, "./etc/me.jpg"); +INCBIN(tmoa_engine_jpg, "./etc/tmoa-engine.jpg"); +INCBIN(tmoa_garbage.jpg, "./etc/tmoa-garbage.jpg"); + +INCBIN(blog_welcome_md, "./blog/welcome.md"); +INCBIN(blog_weird_page_md, "./blog/weird-page.md"); +INCBIN(blog_curious_case_of_gebs_md, "./blog/curious-case-of-gebs.md"); +INCBIN(blog_the_making_of_aboba_md, "./blog/the-making-of-aboba.md"); +\`\`\` + +As you can see, with incbin embedding assets is extremely easy. In the past I've worked with my +own asset packer utility \`x2h.c\`, but it was kind of sloppy and I don't have the will to +rewrite it (maybe one day ;) ). + +Incbin works (on GCC, which I'm using here) by calling into the assembler using \`__asm__()\` and then uses \`.incbin\` +directive to include the binary file. More on that can be found in the docs: https://sourceware.org/binutils/docs/as/Incbin.html + +Now let the **trickery** begin... + +## Tangent about templating + +"Oh God, another tangent!" I hear you say... Don't worry, in this section I'll explain how templating is implemented, +so that we can circle back to our embedded assets system to better understand it. + +How are templating engines implemented in general? You usually have a pseudo-HTML page (for eg. \`my-page.tmpl\`), which +contains plain old HTML, but also has some escaped blocks that can embed dynamic variables, loop over lists, include other pages +and whatnot. You then take that pseudo-page and run it through some kind of a (keyword) *preprocessor*, which given an environment, +can expand variable *macros*, unfold loops and other tricks. + +Do you already see where I'm going with my wording? + +I want to keep this a *mostly* a pure C project, where every bit of it that can be written in C, is written in C. Yes, even the templates +(*in a way*). In order to expand the page templates I've decided to use GPP or the General PreProcessor written by Denis Auroux and maintained +by Tristan Miller. Here's a link to GPP's website (-> [click](https://logological.org/gpp)). Why not use regular CPP that comes with the GCC +suite, you may ask. This is because GPP has a special mode - the HTML mode, which makes it so that it's better suited for working with HTML. +For example \`\#define\` vs. \`<\#define>\` or \`\#ifdef\` and \`\#endif\` vs. \`<\#ifdef>\` and \`<\#endif>\`. + +How do we interact with GPP then? To preprocess our templates, we can call GPP via it's command line interface (CLI) - set HTML mode, give path +to the template file and collect the output, which we then can send back to the client. + +But wait, if we call out to an external executable, doesn't that mean that we'll have to ship GPP alongside aboba? The answer is yes! Since +we follow the *principle of a single-executable-deployment*, why don't we pack GPP into our binary, just like any other asset? Another question +arises then - how do we call a packed program and how to we work with it's CLI? I'll answer this in the next part... + +## Back to the assets + +So now that we've established why and how we embed assets into an executable and the way we work with templates, we can now discuss the +**embedded assets system**. + +Here's the core API of the system: + +\`\`\` +typedef struct { + char *key; // path + int value; // memfd +} Baked_Resource; + +void init_baked_resources(void); +void free_baked_resources(void); +// skip BS +bool get_baked_resource_path(char *key, char *buf, size_t size); +// skip BS +\`\`\` + +The \`Baked_Resource\` struct is defined in such a way that works with \`stb_ds.h\`'s string hashmap. stb_ds.h can be found here: https://nothings.org/stb_ds/. +Let's take a closer look at the fields: +- \`key\` - a key to a file within our hashmap of baked resources +- \`value\` - a memfd associated with the baked resource + +Here's the initialization and deinitialization of baked resources: + +\`\`\` + +void add_baked_resource(char *key, const uchar *data, size_t size) +{ + int fd = memfd_create(key, 0); + if (fd < 0) { + LOGE("Could not create resource %s. Aborting...", key); + abort(); + } + write(fd, data, size); + shput(baked_resources.value, key, fd); +} + +void init_baked_resources(void) +{ + lockx(&baked_resources); + add_baked_resource("home.html", home_html_data, home_html_size); + add_baked_resource("page-missing.html", page_missing_html_data, page_missing_html_size); + add_baked_resource("template-blog.html", template_blog_html_data, template_blog_html_size); + add_baked_resource("blog.html", blog_html_data, blog_html_size); + add_baked_resource("gpp1", gpp1_data, gpp1_size); + add_baked_resource("simple.css", simple_css_data, simple_css_size); + add_baked_resource("favicon.ico", favicon_ico_data, favicon_ico_size); +#if MY_DEBUG + add_baked_resource("hotreload.js", hotreload_js_data, hotreload_js_size); +#endif + add_baked_resource("me.jpg", me_jpg_data, me_jpg_size); + add_baked_resource("tmoa-engine.jpg", tmoa_engine_jpg_data, tmoa_engine_jpg_size); + add_baked_resource("tmoa-garbage.jpg", tmoa_garbage_jpg_data, tmoa_garbage_jpg_size); + add_baked_resource("blog-welcome.md", blog_welcome_md_data, blog_welcome_md_size); + add_baked_resource("blog-weird-page.md", blog_weird_page_md_data, blog_weird_page_md_size); + add_baked_resource("blog-curious-case-of-gebs.md", blog_curious_case_of_gebs_md_data, blog_curious_case_of_gebs_md_size); + add_baked_resource("blog-the-making-of-aboba.md", blog_the_making_of_aboba_md_data, blog_the_making_of_aboba_md_size); + unlockx(&baked_resources); +} + +void free_baked_resources(void) +{ + lockx(&baked_resources); + for (size_t i = 0; i < shlen(baked_resources.value); i++) { + close(baked_resources.value[i].value); + } + shfree(baked_resources.value); + unlockx(&baked_resources); +} +\`\`\` + +Here we use memfd API to convert a baked-in file into a file that has a file descriptor associated with it. Why? We do this, because +we have no way of passing files down to GPP's CLI. We only have the raw bytes of a file, which we can't really work with. memfds +allow us to create a virtual memory-mapped file and get it's file descriptor. Using said file descriptor we can then write our file's +bytes into the virtual file, making it accessible via Linux's VFS. The virtual file can be accessed via a path like \`/proc//fd/\`. +Now that we've successfully converted a baked-in file into a "*pathed*" file, we can then pass the path down to GPP. Heck, we can even run +GPP itself from a memory-mapped file! + +Here's how we get the memory-mapped file's path in aboba: + +\`\`\` +bool get_baked_resource_path(char *key, char *buf, size_t size) +{ + if (shgeti(baked_resources.value, key) != -1) { + int fd = shget(baked_resources.value, key); + snprintf(buf, size, "/proc/%d/fd/%d", getpid(), fd); + unlockx(&baked_resources); + return true; + } + return false; +} +\`\`\` + +And then we run GPP like so: + +\`\`\` +bool gpp_run(char *path, NString_List *env, String_Builder *out) +{ + Cmd cmd = {0}; + defer { cmd_free(&cmd); } + + char gpp1[PATH_MAX]; + if (!get_baked_resource_path("gpp1", gpp1, sizeof(gpp1))) { + return false; + } + + cmd_append(&cmd, gpp1); + cmd_append(&cmd, "-H"); + cmd_append(&cmd, "-x"); + cmd_append(&cmd, "--nostdinc"); + cmd_append(&cmd, path); + + for (size_t i = 0; i < env->count; i++) { + cmd_append(&cmd, env->items[i]); + } + + return cmd_run_collect(&cmd, out) == 0; +} +\`\`\` + +In the logger we can now see commands like this: \`Info: cmd /proc/1210675/fd/8 -H -x --nostdinc /proc/1210675/fd/6 ...\`, where +/proc/1210675/fd/8 is memory-mapped file for GPP and /proc/1210675/fd/6 is a memory-mapped file for the template. Pretty cool, eh? + +## Left out topics + +I think this is out of scope of this article, so I'm not going to talk about it here, but a big part of this project was making +live hotreloading. I can basically edit the website inside of my editor and it auto-refreshes in the browser, kinda like a vite js +project. A video of this can be found here: https://www.reddit.com/r/C_Programming/comments/1lbzjvi/webdev_in_c_pt2_true_live_hotreloading_no_more/ + +## Summary + +During this project I've learned a lot about Linux, the memfd API and webdev in general. Normally I wouldn't pick up a website project +simply because I'm tired of webdev. I've been through a webdev phase and it sucked. Using JavaScript, 300MB of node_modules for a bare +react hello world project, npm installing countless slop libraries and so on. +Then on the backend you have the same amount of C# or Java or TypeScript slop, but now you can call yourself a *backend engineer* or +whatever the hell. Writing this website in C put me on a different view of webdev and made it actually fun to write. + +You can go check out the code for aboba @ http://git.kamkow1lair.pl/kamkow1/aboba.git. + diff --git a/build.c b/build.c index d4d4d90..ae50d69 100644 --- a/build.c +++ b/build.c @@ -40,10 +40,13 @@ int main(int argc, char ** argv) "./etc/simple.css", "./etc/favicon.ico", "./etc/me.jpg", + "./etc/tmoa-engine.jpg", + "./etc/tmoa-garbage.jpg", "./blog/welcome.md", "./blog/weird-page.md", - "./blog/curious-case-of-gebs.md" + "./blog/curious-case-of-gebs.md", + "./blog/the-making-of-aboba.md" ) { RULE("./mongoose.o", "./mongoose/mongoose.c") { diff --git a/etc/tmoa-engine.jpg b/etc/tmoa-engine.jpg new file mode 100644 index 0000000..5541f94 Binary files /dev/null and b/etc/tmoa-engine.jpg differ diff --git a/etc/tmoa-garbage.jpg b/etc/tmoa-garbage.jpg new file mode 100644 index 0000000..66570e9 Binary files /dev/null and b/etc/tmoa-garbage.jpg differ diff --git a/main.c b/main.c index 3e4383a..2de4d5e 100644 --- a/main.c +++ b/main.c @@ -237,8 +237,12 @@ int cp(const char* source, const char* destination) void populate_etc_dump(char *etc_dump) { static char *files[] = { - "favicon.ico", "hotreload.js", - "simple.css", "me.jpg", + "favicon.ico", + "hotreload.js", + "simple.css", + "me.jpg", + "tmoa-engine.jpg", + "tmoa-garbage.jpg", }; lock_baked_resources();