From 55ebeda0cccb66c033e8466798780ae00582e9fe Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Tue, 6 Sep 2022 12:15:10 +0100 Subject: [PATCH 1/3] build: add cache introduction docs Signed-off-by: Justin Chadwell --- _data/toc.yaml | 2 + build/building/cache.md | 283 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 build/building/cache.md diff --git a/_data/toc.yaml b/_data/toc.yaml index bcadb58cfb..f5eb69b215 100644 --- a/_data/toc.yaml +++ b/_data/toc.yaml @@ -1391,6 +1391,8 @@ manuals: section: - path: /build/building/packaging/ title: Packaging your software + - path: /build/building/cache/ + title: Optimizing builds with cache management - sectiontitle: Choosing a build driver section: - path: /build/building/drivers/ diff --git a/build/building/cache.md b/build/building/cache.md new file mode 100644 index 0000000000..3cc1daab67 --- /dev/null +++ b/build/building/cache.md @@ -0,0 +1,283 @@ +--- +title: Optimizing builds with cache management +description: Improve your build speeds by taking advantage of the builtin cache +keywords: build, buildx, buildkit, dockerfile, image layers, build instructions, build context +--- + +It's very unlikely you end up just building a docker image once - most of the +time, you'll want to build it again at some point, whether that's for the next +release of your software, or, more likely, on your local development machine +for testing. Because building images is a frequent operation, docker provides +several tools to speed up your builds for when you inevitably need to run them +again. + +The main approach to improving your build's speed is to take advantage of +docker's build cache. + +## How does the build cache work? + +Docker's build cache is quite simple to understand - first, remember the +instructions that make up your Dockerfile, for example, in this build which +might be used to create a C/C++ program: + +```dockerfile +FROM ubuntu:latest + +RUN apt-get update && apt-get upgrade -y build-essentials +COPY . /src/ +WORKDIR /src/ +RUN make build +``` + +Each instruction in this Dockerfile (roughly) translates into a layer in your +final image. You can think of layers in a stack, with each layer adding more +content to the filesystem on top of the layer before it: + +``` +stack diagram +``` + +Now, if one of the layers changes, somewhere - for example, suppose you make a +change to your C/C++ program in `main.c`. After this change, the `COPY` command +will have to run again, so that the layer changes, so the cache for that layer +has been invalidated. + +``` +stack diagram with COPY layer cache invalidated +``` + +But since we have a change to that file, we now need to run our `make build` +step again, so that those changes are built into our program. So since our +cache for `COPY` was invalidated, we also have to invalidate the cache for all +the layers after it, including our `RUN make build`, so that it will run again: + +``` +stack diagram with COPY + other layer cache invalidated +``` + +That's pretty much all there is to understand the cache - once there's a change +in a layer, then all the layers after it will need to be rebuilt as well (even +if they wouldn't build anything differently, they still need to re-run). + +> **Note** +> +> Suppose you have a `RUN apt-get update && apt-get upgrade -y` step in your +> Dockerfile to upgrade all the software packages in your Debian-based image to +> the latest version. +> +> Unfortunately, this doesn't mean that the images you build are *always* up to +> date! If you built the image a week ago, then the results of your `apt-get` +> will get cached, and re-used if you re-run it now! The only way to force a +> re-run is to make sure that a layer before it has changed, for example, by +> making sure you have the latest version of the image used in `FROM`. + +## How can I use the cache efficiently? + +Now that we've seen how the cache works, we can look at how to best take +advantage of the cache to get the best results. While the cache will +automatically work on any docker build that you run, you can often refactor +your Dockerfile to get even better performance and save precious seconds (or +even minutes) off of your builds! + +### Order your layers + +Putting the commands in your Dockerfile into a logical order is a great place +to start. Because a change in an earlier step will rebuild all the later steps, +we want to make sure that we put our most expensive steps near the beginning, +and our most frequently changing steps near the end, to avoid unnecessarily +rebuilding layers that haven't changed much. + +Let's take a simple example, a Dockerfile snippet that runs a javascript build +from the source files in the current directory: + +```dockerfile +FROM node +WORKDIR /app +COPY . . +RUN npm install +RUN npm build +``` + +We can examine why this isn't very efficient. If we update our `package.json` +file, we'll install all of our dependencies and run the build from scratch, as +intended. But, if we update `src/main.js`, then we'll install all of our +dependencies again - even if nothing has changed! + +We can improve this, to only install dependencies the relevant files have +changed: + +```dockerfile +FROM node +WORKDIR /app +COPY package.json yarn.lock . +RUN npm install +COPY . . +RUN npm build +``` + +What we've done is to divide up our `COPY` command to only copy over our +`package.json` and `yarn.lock` before the `npm install` - this means that we'll +only re-run `npm install` if those files change, instead of any of the files +in our local directory! + +### Keep layers small + +One of the easiest things you can do to keep your images building quickly is to +just put less stuff into your build! This keeps your image layers thin and +lean, which means that not only will your cache stay smaller, but there should +be fewer things that could be out-of-date and need rebuilding! + +To get started, here are a few tips and tricks: + +- Don't `COPY` unnecessary files into your build environment! + + Running a command like `COPY . /src` will `COPY` your entire build context + into the image! If you've got logs, package manager artifacts, or even + previous build results in your current directory, those will also be copied + over, which will make your image larger than it needs to be (especially as + those files are usually not helpful)! + + You can avoid copying these files over by `COPY`ing only the files and + directories that you want, for example, you might only just want a `Makefile` + and your `src` directory - if that's all you need, then you can split up your + `COPY` into `COPY ./Makefile /src` and `COPY ./src /src`. If you do want the + entire current directory, but want to ignore the unnecessary files in it, you + can setup your [`.dockerignore` file](https://docs.docker.com/engine/reference/builder/#dockerignore-file), + to make sure that those files won't be copied over! + +- Use your package manager wisely! + + No matter what operating system or programming language you choose to use as + your build's base image, most docker images have some sort of package manager + to help install software into your image. For example, `debian` has `apt`, + `alpine` has `apk`, `python` has `pip`, `node` has `npm`, etc, etc. + + When installing packages be careful! Make sure to only install the packages + that you need - if you're not going to use them, don't install them. Remember + that this might be a different list for your local development environment + and your production environment. You can use multi-stage builds (which we'll + cover later) to split these up efficiently. + +- Try using the `RUN` command dedicated cache! + + The `RUN` command supports a specialized cache, which can be used when you + need a more fine-grained cache between runs. For example, when installing + packages, you don't always need to fetch all of your packages from the + internet each time, you only need the ones that have changed! + + To solve this problem, you can use `RUN --mount type=cache`. For example, for + your `debian`-based image you might use the following: + + ```dockerfile + RUN \ + --mount=type=cache,target=/var/cache/apt \ + apt-get update && apt-get install -y git + ``` + + The use of the explicit cache with the `--mount` flag keeps the contents of + the `target` directory preserved between builds - so when this layer needs to + be rebuilt, then it'll be able to use `apt`'s own cache in `/var/cache/apt`. + +### Minimize the number of layers + +Keeping your layers small is a good step to getting quick builds - the logical +next step is to reduce the number of layers that you have! Fewer layers mean +that you have less to rebuild, when something in your Dockerfile changes, so +your build will complete faster! + +Here are some more tips you can use: + +- Use an appropriate base image! + + Docker provides over 170 pre-built [official images](https://hub.docker.com/search?q=&image_filter=official) + for almost every common development scenario! For example, if you're building + a Java web server, then while you could install `java` into any image you + like, it's much quicker (and easier to manage updates) if you use a dedicated + image, for example, [`openjdk`](https://hub.docker.com/_/openjdk/). Even if + there's not an official image for what you might want, Docker provides images + from [verified publishers](https://hub.docker.com/search?q=&image_filter=store) + and [open source partners](https://hub.docker.com/search?q=&image_filter=open_source) + that can help you on your way, and the community often produces third-party + images to use as well. + + These pre-built stop you from needing to manually install and manage the + software, which allows you to save valuable build time as well as disk space. + +- Use multi-stage builds to run builds in parallel! + + + + Multi-stage builds let you split up your Dockerfile into multiple distinct + stages, and then provide the tools to combine them all back together again. + The docker builder will work out dependencies between the stages and run them + using the most efficient strategy, even allowing you to run multiple commands at the + same time in this way! + + To use a multi-stage build, you can simply use multiple `FROM` commands. For + example, suppose you want to build a simple web server that serves HTML from + your `docs` directory in Git: + + ```dockerfile + FROM alpine as git + RUN apk add git + + FROM git as fetch + WORKDIR /repo + RUN git clone https://github.com/your/repository.git . + + FROM nginx as site + COPY --from=fetch /repo/docs/ /usr/share/nginx/html + ``` + + This build has 3 stages - `git`, `fetch` and `site`. In this example, we've + used `git` as the base for the `fetch` stage, and also used `COPY`'s `--from` + flag to copy the data from the `docs/` directory into the NGINX server + directory. + + Each stage has only a few instructions, and when possible, docker will run + these stages in parallel. Additionally, only the final instructions in the + `site` stage will end up as layers in our image, so we won't have our entire + `git` history embedded into the final result, which helps keep our images + small and secure. + +- Combine your commands together wherever possible! + + Most commands in your Dockerfile support being joined together, so that they + can do multiple things all at once! For example, it's fairly common to see + `RUN` commands being used like this: + + ```dockerfile + RUN echo "the first command" + RUN echo "the second command" + ``` + + But actually, we can run both of these commands inside a single `RUN`, which + means that they will share the same cache! We can do this by using the `&&` + shell operator to run one command after another: + + ```dockerfile + RUN echo "the first command" && echo "the second command" + # or to split to multiple lines + RUN echo "the first command" && \ + echo "the second command" + ``` + + We can also use [heredocs]() to simplify complex multiline scripts (note the + `set -e` command to exit immediately after any command fails, instead of + continuing): + + ```dockerfile + RUN < + +- [Export your build cache](https://github.com/moby/buildkit#export-cache) From c42962dc875916bd71578749833e9bc72ed06757 Mon Sep 17 00:00:00 2001 From: Justin Chadwell Date: Mon, 12 Sep 2022 10:35:46 +0100 Subject: [PATCH 2/3] build: add svg diagrams to cache docs Also adds the graphviz generation code. Signed-off-by: Justin Chadwell --- _scss/_night-mode.scss | 5 ++ build/building/cache.md | 15 ++-- build/images/cache-stack-invalidate-copy.dot | 19 +++++ build/images/cache-stack-invalidate-copy.svg | 73 +++++++++++++++++++ build/images/cache-stack-invalidate-rest.dot | 19 +++++ build/images/cache-stack-invalidate-rest.svg | 74 ++++++++++++++++++++ build/images/cache-stack.dot | 19 +++++ build/images/cache-stack.svg | 72 +++++++++++++++++++ 8 files changed, 286 insertions(+), 10 deletions(-) create mode 100644 build/images/cache-stack-invalidate-copy.dot create mode 100644 build/images/cache-stack-invalidate-copy.svg create mode 100644 build/images/cache-stack-invalidate-rest.dot create mode 100644 build/images/cache-stack-invalidate-rest.svg create mode 100644 build/images/cache-stack.dot create mode 100644 build/images/cache-stack.svg diff --git a/_scss/_night-mode.scss b/_scss/_night-mode.scss index 93bd70cffc..3a26635d62 100755 --- a/_scss/_night-mode.scss +++ b/_scss/_night-mode.scss @@ -180,6 +180,11 @@ body.night { img.white-bg { background-color: white; } + + /* apply to images that support being inverted */ + img.invertible { + filter: invert(100%) hue-rotate(180deg); + } /* accordion */ .panel { diff --git a/build/building/cache.md b/build/building/cache.md index 3cc1daab67..555fe9c075 100644 --- a/build/building/cache.md +++ b/build/building/cache.md @@ -23,7 +23,7 @@ might be used to create a C/C++ program: ```dockerfile FROM ubuntu:latest -RUN apt-get update && apt-get upgrade -y build-essentials +RUN apt-get update && apt-get install -y build-essentials COPY . /src/ WORKDIR /src/ RUN make build @@ -33,27 +33,22 @@ Each instruction in this Dockerfile (roughly) translates into a layer in your final image. You can think of layers in a stack, with each layer adding more content to the filesystem on top of the layer before it: -``` -stack diagram -``` +![Image layer diagram showing the above commands chained together one after the other](../images/cache-stack.svg){:.invertible} + Now, if one of the layers changes, somewhere - for example, suppose you make a change to your C/C++ program in `main.c`. After this change, the `COPY` command will have to run again, so that the layer changes, so the cache for that layer has been invalidated. -``` -stack diagram with COPY layer cache invalidated -``` +![Image layer diagram, but now with the link between COPY and WORKDIR marked as invalid](../images/cache-stack-invalidate-copy.svg){:.invertible} But since we have a change to that file, we now need to run our `make build` step again, so that those changes are built into our program. So since our cache for `COPY` was invalidated, we also have to invalidate the cache for all the layers after it, including our `RUN make build`, so that it will run again: -``` -stack diagram with COPY + other layer cache invalidated -``` +![Image layer diagram, but now with all links after COPY marked as invalid](../images/cache-stack-invalidate-rest.svg){:.invertible} That's pretty much all there is to understand the cache - once there's a change in a layer, then all the layers after it will need to be rebuilt as well (even diff --git a/build/images/cache-stack-invalidate-copy.dot b/build/images/cache-stack-invalidate-copy.dot new file mode 100644 index 0000000000..beb4bfd11a --- /dev/null +++ b/build/images/cache-stack-invalidate-copy.dot @@ -0,0 +1,19 @@ +// dot -Tsvg ./cache-invalidate-copy.dot > ./cache-invalidate-copy.svg +digraph { + rankdir="LR"; + nodesep=0.3; + edge[minlen=0]; + bgcolor="#00000000"; + node [ shape=rect, width=5, height=0.4, fontname=monospace, fontsize=10 ]; + + from [ label = <FROM ubuntu:latest> ]; + deps [ label = <RUN apt-get update && \\
apt-get install -y build-essentials> ]; + copy [ label = <COPY . /src/>, color = "red" ]; + workdir [ label = <WORKDIR /src/> ]; + build [ label = <RUN make build> ]; + + from -> deps; + deps -> copy; + copy -> workdir [ color = "red", label = " ❌ " ]; + workdir -> build; +} diff --git a/build/images/cache-stack-invalidate-copy.svg b/build/images/cache-stack-invalidate-copy.svg new file mode 100644 index 0000000000..cd80c60f21 --- /dev/null +++ b/build/images/cache-stack-invalidate-copy.svg @@ -0,0 +1,73 @@ + + + + + + + + + +from + +FROM +ubuntu:latest + + + +deps + +RUN +apt-get update && \ +apt-get install -y build-essentials + + + +from->deps + + + + + +copy + +COPY +. /src/ + + + +deps->copy + + + + + +workdir + +WORKDIR +/src/ + + + +copy->workdir + + + ❌   + + + +build + +RUN +make build + + + +workdir->build + + + + + diff --git a/build/images/cache-stack-invalidate-rest.dot b/build/images/cache-stack-invalidate-rest.dot new file mode 100644 index 0000000000..f7956776e0 --- /dev/null +++ b/build/images/cache-stack-invalidate-rest.dot @@ -0,0 +1,19 @@ +// dot -Tsvg ./cache-invalidate-rest.dot > ./cache-invalidate-rest.svg +digraph { + rankdir="LR"; + nodesep=0.3; + edge[minlen=0]; + bgcolor="#00000000"; + node [ shape=rect, width=5, height=0.4, fontname=monospace, fontsize=10 ]; + + from [ label = <FROM ubuntu:latest> ]; + deps [ label = <RUN apt-get update && \\
apt-get install -y build-essentials> ]; + copy [ label = <COPY . /src/>, color = "red" ]; + workdir [ label = <WORKDIR /src/>, color = "red" ]; + build [ label = <RUN make build>, color = "red" ]; + + from -> deps; + deps -> copy; + copy -> workdir [ color = "red", label = " ❌ " ]; + workdir -> build [ color = "red", label = " ❌ " ]; +} diff --git a/build/images/cache-stack-invalidate-rest.svg b/build/images/cache-stack-invalidate-rest.svg new file mode 100644 index 0000000000..fb85cf43d6 --- /dev/null +++ b/build/images/cache-stack-invalidate-rest.svg @@ -0,0 +1,74 @@ + + + + + + + + + +from + +FROM +ubuntu:latest + + + +deps + +RUN +apt-get update && \ +apt-get install -y build-essentials + + + +from->deps + + + + + +copy + +COPY +. /src/ + + + +deps->copy + + + + + +workdir + +WORKDIR +/src/ + + + +copy->workdir + + + ❌   + + + +build + +RUN +make build + + + +workdir->build + + + ❌   + + + diff --git a/build/images/cache-stack.dot b/build/images/cache-stack.dot new file mode 100644 index 0000000000..95c4573435 --- /dev/null +++ b/build/images/cache-stack.dot @@ -0,0 +1,19 @@ +// dot -Tsvg ./cache-stack.dot > ./cache-stack.svg +digraph { + rankdir="LR"; + nodesep=0.3; + edge[minlen=0]; + bgcolor="#00000000"; + node [ shape=rect, width=5, height=0.4, fontname=monospace, fontsize=10 ]; + + from [ label = <FROM ubuntu:latest> ]; + deps [ label = <RUN apt-get update && \\
apt-get install -y build-essentials> ]; + copy [ label = <COPY . /src/> ]; + workdir [ label = <WORKDIR /src/> ]; + build [ label = <RUN make build> ]; + + from -> deps; + deps -> copy; + copy -> workdir; + workdir -> build; +} diff --git a/build/images/cache-stack.svg b/build/images/cache-stack.svg new file mode 100644 index 0000000000..6afd17e4c9 --- /dev/null +++ b/build/images/cache-stack.svg @@ -0,0 +1,72 @@ + + + + + + + + + +from + +FROM +ubuntu:latest + + + +deps + +RUN +apt-get update && \ +apt-get install -y build-essentials + + + +from->deps + + + + + +copy + +COPY +. /src/ + + + +deps->copy + + + + + +workdir + +WORKDIR +/src/ + + + +copy->workdir + + + + + +build + +RUN +make build + + + +workdir->build + + + + + From fdfcea003504bc4184c26095abd95fd2a45002c2 Mon Sep 17 00:00:00 2001 From: David Karlsson Date: Tue, 20 Sep 2022 15:20:47 +0200 Subject: [PATCH 3/3] build: editorial suggestions for cache docs Co-authored-by: CrazyMax Co-authored-by: Justin Chadwell --- build/building/cache.md | 386 ++++++++++--------- build/images/cache-stack-invalidate-copy.dot | 2 +- build/images/cache-stack-invalidate-copy.svg | 8 +- build/images/cache-stack-invalidate-rest.dot | 2 +- build/images/cache-stack-invalidate-rest.svg | 8 +- build/images/cache-stack.dot | 2 +- build/images/cache-stack.svg | 8 +- 7 files changed, 216 insertions(+), 200 deletions(-) diff --git a/build/building/cache.md b/build/building/cache.md index 555fe9c075..282e375c6c 100644 --- a/build/building/cache.md +++ b/build/building/cache.md @@ -1,58 +1,58 @@ --- title: Optimizing builds with cache management description: Improve your build speeds by taking advantage of the builtin cache -keywords: build, buildx, buildkit, dockerfile, image layers, build instructions, build context +keywords: > + build, buildx, buildkit, dockerfile, image layers, build instructions, build + context --- -It's very unlikely you end up just building a docker image once - most of the -time, you'll want to build it again at some point, whether that's for the next -release of your software, or, more likely, on your local development machine -for testing. Because building images is a frequent operation, docker provides -several tools to speed up your builds for when you inevitably need to run them -again. +You will likely find yourself rebuilding the same Docker image over and over +again. Whether it's for the next release of your software, or locally during +development. Because building images is a common task, Docker provides several +tools that speed up builds. -The main approach to improving your build's speed is to take advantage of -docker's build cache. +The most important feature for improving build speeds is Docker's build cache. ## How does the build cache work? -Docker's build cache is quite simple to understand - first, remember the -instructions that make up your Dockerfile, for example, in this build which -might be used to create a C/C++ program: +Understanding Docker's build cache helps you write better Dockerfiles that +result in faster builds. + +Have a look at the following example, which shows a simple Dockerfile for a +program written in C. ```dockerfile FROM ubuntu:latest RUN apt-get update && apt-get install -y build-essentials -COPY . /src/ +COPY main.c /src/ WORKDIR /src/ RUN make build ``` -Each instruction in this Dockerfile (roughly) translates into a layer in your -final image. You can think of layers in a stack, with each layer adding more -content to the filesystem on top of the layer before it: +Each instruction in this Dockerfile translates (roughly) to a layer in your +final image. You can think of image layers as a stack, with each layer adding +more content on top of the layers that came before it: ![Image layer diagram showing the above commands chained together one after the other](../images/cache-stack.svg){:.invertible} - -Now, if one of the layers changes, somewhere - for example, suppose you make a -change to your C/C++ program in `main.c`. After this change, the `COPY` command -will have to run again, so that the layer changes, so the cache for that layer -has been invalidated. +Whenever a layer changes, that layer will need to be re-built. For example, +suppose you make a change to your program in the `main.c` file. After this +change, the `COPY` command will have to run again in order for those changes to +appear in the image. In other words, Docker will invalidate the cache for this +layer. ![Image layer diagram, but now with the link between COPY and WORKDIR marked as invalid](../images/cache-stack-invalidate-copy.svg){:.invertible} -But since we have a change to that file, we now need to run our `make build` -step again, so that those changes are built into our program. So since our -cache for `COPY` was invalidated, we also have to invalidate the cache for all -the layers after it, including our `RUN make build`, so that it will run again: +If a layer changes, all other layers that come after it are also affected. When +the layer with the `COPY` command gets invalidated, all layers that follow will +need to run again, too: ![Image layer diagram, but now with all links after COPY marked as invalid](../images/cache-stack-invalidate-rest.svg){:.invertible} -That's pretty much all there is to understand the cache - once there's a change -in a layer, then all the layers after it will need to be rebuilt as well (even -if they wouldn't build anything differently, they still need to re-run). +And that's the Docker build cache in a nutshell. Once a layer changes, then all +downstream layers need to be rebuilt as well. Even if they wouldn't build +anything differently, they still need to re-run. > **Note** > @@ -60,214 +60,230 @@ if they wouldn't build anything differently, they still need to re-run). > Dockerfile to upgrade all the software packages in your Debian-based image to > the latest version. > -> Unfortunately, this doesn't mean that the images you build are *always* up to -> date! If you built the image a week ago, then the results of your `apt-get` -> will get cached, and re-used if you re-run it now! The only way to force a -> re-run is to make sure that a layer before it has changed, for example, by -> making sure you have the latest version of the image used in `FROM`. +> This doesn't mean that the images you build are always up to date. Rebuilding +> the image on the same host one week later will still get you the same packages +> as before. The only way to force a rebuild is by making sure that a layer +> before it has changed, or by clearing the build cache using +> [`docker builder prune`](/engine/reference/commandline/builder_build/). ## How can I use the cache efficiently? -Now that we've seen how the cache works, we can look at how to best take -advantage of the cache to get the best results. While the cache will -automatically work on any docker build that you run, you can often refactor -your Dockerfile to get even better performance and save precious seconds (or -even minutes) off of your builds! +Now that you understand how the cache works, you can begin to use the cache to +your advantage. While the cache will automatically work on any `docker build` +that you run, you can often refactor your Dockerfile to get even better +performance. These optimizations can save precious seconds (or even minutes) off +of your builds. ### Order your layers -Putting the commands in your Dockerfile into a logical order is a great place -to start. Because a change in an earlier step will rebuild all the later steps, -we want to make sure that we put our most expensive steps near the beginning, -and our most frequently changing steps near the end, to avoid unnecessarily -rebuilding layers that haven't changed much. +Putting the commands in your Dockerfile into a logical order is a great place to +start. Because a change causes a rebuild for steps that follow, try to make +expensive steps appear near the beginning of the Dockerfile. Steps that change +often should appear near the end of the Dockerfile, to avoid triggering rebuilds +of layers that haven't changed. -Let's take a simple example, a Dockerfile snippet that runs a javascript build -from the source files in the current directory: +Consider the following example. A Dockerfile snippet that runs a JavaScript +build from the source files in the current directory: ```dockerfile FROM node WORKDIR /app -COPY . . -RUN npm install -RUN npm build +COPY . . # Copy over all files in the current directory +RUN npm install # Install dependencies +RUN npm build # Run build ``` -We can examine why this isn't very efficient. If we update our `package.json` -file, we'll install all of our dependencies and run the build from scratch, as -intended. But, if we update `src/main.js`, then we'll install all of our -dependencies again - even if nothing has changed! +This Dockerfile is rather inefficient. Updating any file causes a reinstall of +all dependencies every time you build the Docker image &emdash; even if the +dependencies didn't change since last time! -We can improve this, to only install dependencies the relevant files have -changed: +Instead, the `COPY` command can be split in two. First, copy over the package +management files (in this case, `package.json` and `yarn.lock`). Then, install +the dependencies. Finally, copy over the project source code, which is subject +to frequent change. ```dockerfile FROM node WORKDIR /app -COPY package.json yarn.lock . -RUN npm install -COPY . . -RUN npm build +COPY package.json yarn.lock . # Copy package management files +RUN npm install # Install dependencies +COPY . . # Copy over project files +RUN npm build # Run build ``` -What we've done is to divide up our `COPY` command to only copy over our -`package.json` and `yarn.lock` before the `npm install` - this means that we'll -only re-run `npm install` if those files change, instead of any of the files -in our local directory! +By installing dependencies in earlier layers of the Dockerfile, there is no need +to rebuild those layers when a project file has changed. ### Keep layers small -One of the easiest things you can do to keep your images building quickly is to -just put less stuff into your build! This keeps your image layers thin and -lean, which means that not only will your cache stay smaller, but there should -be fewer things that could be out-of-date and need rebuilding! +One of the best things you can do to speed up image building is to just put less +stuff into your build. Fewer parts means the cache stay smaller, but also that +there should be fewer things that could be out-of-date and need rebuilding. To get started, here are a few tips and tricks: -- Don't `COPY` unnecessary files into your build environment! +#### Don't include unnecessary files - Running a command like `COPY . /src` will `COPY` your entire build context - into the image! If you've got logs, package manager artifacts, or even - previous build results in your current directory, those will also be copied - over, which will make your image larger than it needs to be (especially as - those files are usually not helpful)! - - You can avoid copying these files over by `COPY`ing only the files and - directories that you want, for example, you might only just want a `Makefile` - and your `src` directory - if that's all you need, then you can split up your - `COPY` into `COPY ./Makefile /src` and `COPY ./src /src`. If you do want the - entire current directory, but want to ignore the unnecessary files in it, you - can setup your [`.dockerignore` file](https://docs.docker.com/engine/reference/builder/#dockerignore-file), - to make sure that those files won't be copied over! +Be considerate of what files you add to the image. -- Use your package manager wisely! +Running a command like `COPY . /src` will `COPY` your entire build context into +the image. If you've got logs, package manager artifacts, or even previous build +results in your current directory, those will also be copied over. This could +make your image larger than it needs to be, especially as those files are +usually not useful. - No matter what operating system or programming language you choose to use as - your build's base image, most docker images have some sort of package manager - to help install software into your image. For example, `debian` has `apt`, - `alpine` has `apk`, `python` has `pip`, `node` has `npm`, etc, etc. +Avoid adding unnecessary files to your builds by explicitly stating the files or +directories you intend to copy over. For example, you might only want to add a +`Makefile` and your `src` directory to the image filesystem. In that case, +consider adding this to your Dockerfile: - When installing packages be careful! Make sure to only install the packages - that you need - if you're not going to use them, don't install them. Remember - that this might be a different list for your local development environment - and your production environment. You can use multi-stage builds (which we'll - cover later) to split these up efficiently. +```dockerfile +COPY ./src ./Makefile /src +``` -- Try using the `RUN` command dedicated cache! +As opposed to this: - The `RUN` command supports a specialized cache, which can be used when you - need a more fine-grained cache between runs. For example, when installing - packages, you don't always need to fetch all of your packages from the - internet each time, you only need the ones that have changed! +```dockerfile +COPY . /src +``` - To solve this problem, you can use `RUN --mount type=cache`. For example, for - your `debian`-based image you might use the following: +You can also create a +[`.dockerignore` file](https://docs.docker.com/engine/reference/builder/#dockerignore-file), +and use that to specify which files and directories to exclude from the build +context. - ```dockerfile - RUN \ - --mount=type=cache,target=/var/cache/apt \ - apt-get update && apt-get install -y git - ``` +#### Use your package manager wisely - The use of the explicit cache with the `--mount` flag keeps the contents of - the `target` directory preserved between builds - so when this layer needs to - be rebuilt, then it'll be able to use `apt`'s own cache in `/var/cache/apt`. +Most Docker image builds involve using a package manager to help install +software into the image. Debian has `apt`, Alpine has `apk`, Python has `pip`, +NodeJS has `npm`, and so on. + +When installing packages, be considerate. Make sure to only install the packages +that you need. If you're not going to use them, don't install them. Remember +that this might be a different list for your local development environment and +your production environment. You can use multi-stage builds to split these up +efficiently. + +#### Use the dedicated `RUN` cache + +The `RUN` command supports a specialized cache, which you can use when you need +a more fine-grained cache between runs. For example, when installing packages, +you don't always need to fetch all of your packages from the internet each time. +You only need the ones that have changed. + +To solve this problem, you can use `RUN --mount type=cache`. For example, for +your Debian-based image you might use the following: + +```dockerfile +RUN \ + --mount=type=cache,target=/var/cache/apt \ + apt-get update && apt-get install -y git +``` + +Using the explicit cache with the `--mount` flag keeps the contents of the +`target` directory preserved between builds. When this layer needs to be +rebuilt, then it'll use the `apt` cache in `/var/cache/apt`. ### Minimize the number of layers -Keeping your layers small is a good step to getting quick builds - the logical -next step is to reduce the number of layers that you have! Fewer layers mean -that you have less to rebuild, when something in your Dockerfile changes, so -your build will complete faster! +Keeping your layers small is a good first step, and the logical next step is to +reduce the number of layers that you have. Fewer layers mean that you have less +to rebuild, when something in your Dockerfile changes, so your build will +complete faster. -Here are some more tips you can use: +The following sections outline some tips you can use to keep the number of +layers to a minimum. -- Use an appropriate base image! +#### Use an appropriate base image - Docker provides over 170 pre-built [official images](https://hub.docker.com/search?q=&image_filter=official) - for almost every common development scenario! For example, if you're building - a Java web server, then while you could install `java` into any image you - like, it's much quicker (and easier to manage updates) if you use a dedicated - image, for example, [`openjdk`](https://hub.docker.com/_/openjdk/). Even if - there's not an official image for what you might want, Docker provides images - from [verified publishers](https://hub.docker.com/search?q=&image_filter=store) - and [open source partners](https://hub.docker.com/search?q=&image_filter=open_source) - that can help you on your way, and the community often produces third-party - images to use as well. - - These pre-built stop you from needing to manually install and manage the - software, which allows you to save valuable build time as well as disk space. +Docker provides over 170 pre-built +[official images](https://hub.docker.com/search?q=&image_filter=official) for +almost every common development scenario. For example, if you're building a Java +web server, use a dedicated image such as +[`openjdk`](https://hub.docker.com/_/openjdk/). Even when there's not an +official image for what you might want, Docker provides images from +[verified publishers](https://hub.docker.com/search?q=&image_filter=store) and +[open source partners](https://hub.docker.com/search?q=&image_filter=open_source) +that can help you on your way. The Docker community often produces third-party +images to use as well. -- Use multi-stage builds to run builds in parallel! +Using official images saves you time and ensures you stay up to date and secure +by default. - +#### Use multi-stage builds - Multi-stage builds let you split up your Dockerfile into multiple distinct - stages, and then provide the tools to combine them all back together again. - The docker builder will work out dependencies between the stages and run them - using the most efficient strategy, even allowing you to run multiple commands at the - same time in this way! - - To use a multi-stage build, you can simply use multiple `FROM` commands. For - example, suppose you want to build a simple web server that serves HTML from - your `docs` directory in Git: - - ```dockerfile - FROM alpine as git - RUN apk add git - - FROM git as fetch - WORKDIR /repo - RUN git clone https://github.com/your/repository.git . - - FROM nginx as site - COPY --from=fetch /repo/docs/ /usr/share/nginx/html - ``` - - This build has 3 stages - `git`, `fetch` and `site`. In this example, we've - used `git` as the base for the `fetch` stage, and also used `COPY`'s `--from` - flag to copy the data from the `docs/` directory into the NGINX server - directory. - - Each stage has only a few instructions, and when possible, docker will run - these stages in parallel. Additionally, only the final instructions in the - `site` stage will end up as layers in our image, so we won't have our entire - `git` history embedded into the final result, which helps keep our images - small and secure. + -- Combine your commands together wherever possible! +Multi-stage builds let you split up your Dockerfile into multiple distinct +stages. Each stage completes a step in the build process, and you can bridge the +different stages to create your final image at the end. The Docker builder will +work out dependencies between the stages and run them using the most efficient +strategy. This even allows you to run multiple builds concurrently. - Most commands in your Dockerfile support being joined together, so that they - can do multiple things all at once! For example, it's fairly common to see - `RUN` commands being used like this: +Multi-stage builds use two or more `FROM` commands. The following example +illustrates building a simple web server that serves HTML from your `docs` +directory in Git: - ```dockerfile - RUN echo "the first command" - RUN echo "the second command" - ``` +```dockerfile +# stage 1 +FROM alpine as git +RUN apk add git - But actually, we can run both of these commands inside a single `RUN`, which - means that they will share the same cache! We can do this by using the `&&` - shell operator to run one command after another: +# stage 2 +FROM git as fetch +WORKDIR /repo +RUN git clone https://github.com/your/repository.git . - ```dockerfile - RUN echo "the first command" && echo "the second command" - # or to split to multiple lines - RUN echo "the first command" && \ - echo "the second command" - ``` +# stage 3 +FROM nginx as site +COPY --from=fetch /repo/docs/ /usr/share/nginx/html +``` - We can also use [heredocs]() to simplify complex multiline scripts (note the - `set -e` command to exit immediately after any command fails, instead of - continuing): +This build has 3 stages: `git`, `fetch` and `site`. In this example, `git` is +the base for the `fetch` stage. It uses the `COPY --from` flag to copy the data +from the `docs/` directory into the Nginx server directory. - ```dockerfile - RUN <FROM ubuntu:latest> ]; deps [ label = <RUN apt-get update && \\
apt-get install -y build-essentials> ]; - copy [ label = <COPY . /src/>, color = "red" ]; + copy [ label = <COPY main.c /src/>, color = "red" ]; workdir [ label = <WORKDIR /src/> ]; build [ label = <RUN make build> ]; diff --git a/build/images/cache-stack-invalidate-copy.svg b/build/images/cache-stack-invalidate-copy.svg index cd80c60f21..a0408995db 100644 --- a/build/images/cache-stack-invalidate-copy.svg +++ b/build/images/cache-stack-invalidate-copy.svg @@ -1,13 +1,13 @@ - - + from @@ -33,8 +33,8 @@ copy -COPY -. /src/ +COPY +main.c /src/ diff --git a/build/images/cache-stack-invalidate-rest.dot b/build/images/cache-stack-invalidate-rest.dot index f7956776e0..91c0ccc127 100644 --- a/build/images/cache-stack-invalidate-rest.dot +++ b/build/images/cache-stack-invalidate-rest.dot @@ -8,7 +8,7 @@ digraph { from [ label = <FROM ubuntu:latest> ]; deps [ label = <RUN apt-get update && \\
apt-get install -y build-essentials> ]; - copy [ label = <COPY . /src/>, color = "red" ]; + copy [ label = <COPY main.c /src/>, color = "red" ]; workdir [ label = <WORKDIR /src/>, color = "red" ]; build [ label = <RUN make build>, color = "red" ]; diff --git a/build/images/cache-stack-invalidate-rest.svg b/build/images/cache-stack-invalidate-rest.svg index fb85cf43d6..7d0830d51d 100644 --- a/build/images/cache-stack-invalidate-rest.svg +++ b/build/images/cache-stack-invalidate-rest.svg @@ -1,13 +1,13 @@ - - + from @@ -33,8 +33,8 @@ copy -COPY -. /src/ +COPY +main.c /src/ diff --git a/build/images/cache-stack.dot b/build/images/cache-stack.dot index 95c4573435..7df536f727 100644 --- a/build/images/cache-stack.dot +++ b/build/images/cache-stack.dot @@ -8,7 +8,7 @@ digraph { from [ label = <FROM ubuntu:latest> ]; deps [ label = <RUN apt-get update && \\
apt-get install -y build-essentials> ]; - copy [ label = <COPY . /src/> ]; + copy [ label = <COPY main.c /src/> ]; workdir [ label = <WORKDIR /src/> ]; build [ label = <RUN make build> ]; diff --git a/build/images/cache-stack.svg b/build/images/cache-stack.svg index 6afd17e4c9..73522805a7 100644 --- a/build/images/cache-stack.svg +++ b/build/images/cache-stack.svg @@ -1,13 +1,13 @@ - - + from @@ -33,8 +33,8 @@ copy -COPY -. /src/ +COPY +main.c /src/