diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..9697c6d5c --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +outputs/ +src/ +configs/webui/userconfig_streamlit.yaml diff --git a/.env_docker.example b/.env_docker.example index 5a3494524..03c6cfeec 100644 --- a/.env_docker.example +++ b/.env_docker.example @@ -11,4 +11,4 @@ WEBUI_RELAUNCH=true #Pass cli arguments to webui.py e.g: #WEBUI_ARGS=--gpu=1 --esrgan-gpu=1 --gfpgan-gpu=1 -WEBUI_ARGS= +WEBUI_ARGS= \ No newline at end of file diff --git a/.gitattributes b/.gitattributes index fa63bd967..c51fc4ac4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,4 @@ * text=auto *.{cmd,[cC][mM][dD]} text eol=crlf *.{bat,[bB][aA][tT]} text eol=crlf -*.sh text eol=lf \ No newline at end of file +*.sh text eol=lf diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 481557a37..56df8f364 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,5 @@ -ko_fi: hlky_ -github: [hlky, altryne] +github: [ZeroCool940711] +patreon: zerocool94 +ko_fi: zerocool94 +open_collective: sygil_dev +custom: ["https://paypal.me/zerocool94"] diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 5e155a076..686b48fb9 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -40,7 +40,7 @@ body: - type: dropdown id: os attributes: - label: Where are you running the webui? + label: Where are you running the webui? multiple: true options: - Windows @@ -52,7 +52,7 @@ body: attributes: label: Custom settings description: If you are running the webui with specifi settings, please paste them here for reference (like --nitro) - render: shell + render: shell - type: textarea id: logs attributes: @@ -66,4 +66,4 @@ body: description: By submitting this issue, you agree to follow our [Code of Conduct](https://docs.github.com/en/site-policy/github-terms/github-community-code-of-conduct) options: - label: I agree to follow this project's Code of Conduct - required: true \ No newline at end of file + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index ccffec32d..000000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,11 +0,0 @@ -blank_issues_enabled: false -contact_links: - - name: WebUI developer repository - url: https://github.com/hlky/stable-diffusion-webui/issues/new/choose - about: MOST BUGS SHOULD GO HERE Have a bug related to the features? Please open a bug on the developer repository. - - name: Feature Request, Question or Suggestion - url: https://github.com/hlky/stable-diffusion-webui/discussions - about: Please create a discussion and see if folks have already solved it - - name: Colab version specific bug? - url: https://github.com/altryne/sd-webui-colab/issues/new/choose - about: Please open colab related bugs here diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000000000..52c6e1931 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,16 @@ +# Description + +Please include: +* relevant motivation +* a summary of the change +* which issue is fixed. +* any additional dependencies that are required for this change. + +Closes: # (issue) + +# Checklist: + +- [ ] I have changed the base branch to `dev` +- [ ] I have performed a self-review of my own code +- [ ] I have commented my code in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..eff26ce54 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/" # Location of package manifests + target-branch: "dev" + + schedule: + interval: "daily" diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 000000000..d8497adf7 --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,40 @@ +name: Deploy to GitHub Pages + +on: + push: + branches: + - master + # Review gh actions docs if you want to further define triggers, paths, etc + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on + +jobs: + deploy: + name: Deploy to GitHub Pages + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v3 + with: + node-version: 18 + cache: yarn + + - name: Install dependencies + run: yarn install + - name: Build website + run: yarn build + + # Popular action to deploy to GitHub Pages: + # Docs: https://github.com/peaceiris/actions-gh-pages#%EF%B8%8F-docusaurus + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + # Build output to publish to the `gh-pages` branch: + publish_dir: ./build + # The following lines assign commit authorship to the official + # GH-Actions bot for deploys to `gh-pages` branch: + # https://github.com/actions/checkout/issues/13#issuecomment-724415212 + # The GH actions bot is used by default if you didn't specify the two fields. + # You can swap them out with your own user credentials. + user_name: github-actions[bot] + user_email: 41898282+github-actions[bot]@users.noreply.github.com diff --git a/.github/workflows/test-deploy.yml b/.github/workflows/test-deploy.yml new file mode 100644 index 000000000..b0d042022 --- /dev/null +++ b/.github/workflows/test-deploy.yml @@ -0,0 +1,24 @@ +name: Test deployment + +on: + pull_request: + branches: + - master + # Review gh actions docs if you want to further define triggers, paths, etc + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on + +jobs: + test-deploy: + name: Test deployment + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-node@v3 + with: + node-version: 18 + cache: yarn + + - name: Install dependencies + run: yarn install + - name: Test build website + run: yarn build diff --git a/.gitignore b/.gitignore index 62482b67a..397cb9536 100644 --- a/.gitignore +++ b/.gitignore @@ -28,7 +28,6 @@ __pycache__/ .Python build/ develop-eggs/ -dist/ downloads/ eggs/ .eggs/ @@ -47,13 +46,51 @@ MANIFEST .env_updated condaenv.*.requirements.txt +# Visual Studio directories +.vs/ +.vscode/ # =========================================================================== # # Repo-specific # =========================================================================== # +/configs/webui/userconfig_streamlit.yaml +/configs/webui/userconfig_flet.yaml /custom-conda-path.txt +!/src/components/* +!/src/pages/* /src/* -/outputs/* +/outputs +/model_cache /log/**/*.png +/log/webui/* /log/log.csv /flagged/* +/gfpgan/* +/models/* +/webui/flet/assets/uploads/ +/webui/flet/assets/outputs/ + +z_version_env.tmp +scripts/bridgeData.py +/user_data/* + +# Dependencies +/node_modules + +# Production +/build + +# Generated files +.docusaurus +.cache-loader + +# Misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local + +npm-debug.log* +yarn-debug.log* +yarn-error.log* diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..e69de29bb diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 9c6244e56..000000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -# Default ignored files -/shelf/ -/workspace.xml -# Editor-based HTTP Client requests -/httpRequests/ -# Datasource local storage ignored files -/dataSources/ -/dataSources.local.xml - -*.pyc -.idea diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..ef57bc747 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +ci: + autofix_prs: true + autoupdate_branch: 'dev' + autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate' + autoupdate_schedule: weekly + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: "v0.0.278" + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + + - repo: https://github.com/psf/black + rev: 23.7.0 + hooks: + - id: black diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 000000000..0b2a88c21 --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,48 @@ +[global] +disableWatchdogWarning = false +showWarningOnDirectExecution = true +dataFrameSerialization = "arrow" + +[logger] +level = "info" +messageFormat = "%(asctime)s %(message)s" + +[client] +caching = true +displayEnabled = true +showErrorDetails = true + +[runner] +magicEnabled = true +installTracer = false +fixMatplotlib = true +postScriptGC = true +fastReruns = false + +[server] +folderWatchBlacklist = [] +fileWatcherType = "auto" +cookieSecret = "" +headless = false +runOnSave = false +port = 8501 +baseUrlPath = "" +enableCORS = true +enableXsrfProtection = true +maxUploadSize = 200 +maxMessageSize = 200 +enableWebsocketCompression = false + +[browser] +gatherUsageStats = false +serverPort = 8501 + +[mapbox] +token = "" + +[deprecation] +showfileUploaderEncoding = true +showPyplotGlobalUse = true + +[theme] +base = "dark" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..d2087e57a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,24 @@ +# Contribution Guide + +All Pull Requests are opened against `dev` branch which is our main development branch. + +There are two UI systems that are supported currently: + +* **Gradio** โ€” entry point is in the `/scripts/webui.py` you can start from there. Check out [Gradio documentation](https://gradio.app/docs/) and their [Discord channel](https://discord.gg/Qs8AsnX7Jd) for more information about Gradio. +* **Streamlit** โ€” entry point is in the `/scripts/webui_streamlit.py`. Documentation on Streamlit is [located here](https://docs.streamlit.io/). + +### Development environment + +`environment.yaml` can be different from the one on `master` so be sure to update before making any changes to the code. + +The development environment is currently very similar to the one in production, so you can work on your contribution in the same conda env. Optionally you can create a separate environment. + +### Making changes + +If you're working on a fix please post about it in the respective issue. If the issue doesn't exist create it and then mention it in your Pull Request. + +If you're introducing new features please make the corresponding additions to the documentation with an explanation of the new behavior. The documentation is located in `/docs/`. Depending on your contribution you may edit the existing files in there or create a new one. + +### Opening a Pull Request + +Prior to opening a request make sure your Web UI works locally with your changes and that your branch is up-to-date with the main repository. Finally, open a new PR against `dev` branch. diff --git a/Dockerfile b/Dockerfile index 8d5ecb4e1..a0ae16fc6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,37 @@ -FROM nvidia/cuda:11.3.1-runtime-ubuntu20.04 +ARG IMAGE=tukirito/sygil-webui:base -ENV DEBIAN_FRONTEND=noninteractive -WORKDIR /sd +# Use the base image +FROM ${IMAGE} +# Set the working directory +WORKDIR /workdir + +# Use the specified shell SHELL ["/bin/bash", "-c"] -RUN apt-get update && \ - apt-get install -y libglib2.0-0 wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* +# Set environment variables +ENV PYTHONPATH=/sd -# Install miniconda -ENV CONDA_DIR /opt/conda -RUN wget -O ~/miniconda.sh -q --show-progress --progress=bar:force https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ - /bin/bash ~/miniconda.sh -b -p $CONDA_DIR && \ - rm ~/miniconda.sh -ENV PATH=$CONDA_DIR/bin:$PATH +# Expose the required port +EXPOSE 8501 -# Install font for prompt matrix -COPY /data/DejaVuSans.ttf /usr/share/fonts/truetype/ +# Copy necessary files and directories +COPY ./entrypoint.sh /sd/ +COPY ./data/DejaVuSans.ttf /usr/share/fonts/truetype/ +COPY ./data /sd/data +COPY ./images /sd/images +COPY ./scripts /sd/scripts +COPY ./ldm /sd/ldm +COPY ./frontend /sd/frontend +COPY ./configs /sd/configs +COPY ./configs/webui/webui_streamlit.yaml /sd/configs/webui/userconfig_streamlit.yaml +COPY ./.streamlit /sd/.streamlit +COPY ./optimizedSD /sd/optimizedSD -EXPOSE 7860 +# Set the entrypoint +ENTRYPOINT ["/sd/entrypoint.sh"] -COPY ./entrypoint.sh /sd/ -ENTRYPOINT /sd/entrypoint.sh +# Create .streamlit directory and set up credentials.toml +RUN mkdir -p ~/.streamlit \ + && echo "[general]" > ~/.streamlit/credentials.toml \ + && echo "email = \"\"" >> ~/.streamlit/credentials.toml diff --git a/Dockerfile_base b/Dockerfile_base new file mode 100644 index 000000000..4b5deb2f9 --- /dev/null +++ b/Dockerfile_base @@ -0,0 +1,18 @@ +ARG PYTORCH_IMAGE=hlky/pytorch:1.12.1-runtime + +FROM ${PYTORCH_IMAGE} +SHELL ["/bin/bash", "-c"] + +WORKDIR /install + +RUN apt-get update && \ + apt-get install -y wget curl git build-essential zip unzip nano openssh-server libgl1 libsndfile1-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +COPY ./requirements.txt /install/ +COPY ./setup.py /install/ + +RUN /opt/conda/bin/python -m pip install -r /install/requirements.txt + +RUN /opt/conda/bin/conda clean -ya diff --git a/Dockerfile_runpod b/Dockerfile_runpod new file mode 100644 index 000000000..3f42eef0d --- /dev/null +++ b/Dockerfile_runpod @@ -0,0 +1,29 @@ +ARG IMAGE=tukirito/sygil-webui:base + +FROM ${IMAGE} + +WORKDIR /workdir + +SHELL ["/bin/bash", "-c"] + +ENV PYTHONPATH=/sd + +EXPOSE 8501 +COPY ./runpod_entrypoint.sh /sd/entrypoint.sh +COPY ./data/DejaVuSans.ttf /usr/share/fonts/truetype/ +COPY ./configs/ /sd/configs/ +copy ./configs/webui/webui_streamlit.yaml /sd/configs/webui/userconfig_streamlit.yaml +COPY ./data/ /sd/data/ +COPY ./frontend/ /sd/frontend/ +COPY ./gfpgan/ /sd/gfpgan/ +COPY ./images/ /sd/images/ +COPY ./ldm/ /sd/ldm/ +COPY ./models/ /sd/models/ +copy ./optimizedSD/ /sd/optimizedSD/ +COPY ./scripts/ /sd/scripts/ +COPY ./.streamlit/ /sd/.streamlit/ +ENTRYPOINT /sd/entrypoint.sh + +RUN mkdir -p ~/.streamlit/ +RUN echo "[general]" > ~/.streamlit/credentials.toml +RUN echo "email = \"\"" >> ~/.streamlit/credentials.toml diff --git a/README.md b/README.md index 6ba748a6e..f29df449e 100644 --- a/README.md +++ b/README.md @@ -1,244 +1,181 @@ -[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/altryne/sd-webui-colab/blob/main/Stable_Diffusion_WebUi_Altryne.ipynb) +#
Web-based UI for Stable Diffusion
-# [Installation](https://github.com/hlky/stable-diffusion/wiki/Installation) +## Created by [Sygil.Dev](https://github.com/sygil-dev) -### Have an **issue**? +## Join us at Sygil.Dev's Discord Server [![Generic badge](https://flat.badgen.net/discord/members/ttM8Tm6wge?icon=discord)](https://discord.gg/ttM8Tm6wge) -* If the issue involves _a bug_ in **textual-inversion** create the issue on **_[hlky/stable-diffusion-webui](https://github.com/hlky/stable-diffusion-webui)_** -* If you want to know how to **activate** or **use** textual-inversion see **_[hlky/sd-enable-textual-inversion](https://github.com/hlky/sd-enable-textual-inversion)_**. Activation not working? create the issue on **_[hlky/stable-diffusion-webui](https://github.com/hlky/stable-diffusion-webui)_** +## Installation instructions for: +- **[Windows](https://sygil-dev.github.io/sygil-webui/docs/Installation/windows-installation)** +- **[Linux](https://sygil-dev.github.io/sygil-webui/docs/Installation/linux-installation)** -## More documentation about features, troubleshooting, common issues very soon -### Want to help with documentation? Documented something? Use [Discussions](https://github.com/hlky/stable-diffusion-webui/discussions) +### Want to ask a question or request a feature? -## **Important** +Come to our [Discord Server](https://discord.gg/gyXNe4NySY) or use [Discussions](https://github.com/sygil-dev/sygil-webui/discussions). -๐Ÿ”ฅ NEW! webui.cmd updates with any changes in environment.yaml file so the environment will always be up to date as long as you get the new environment.yaml file ๐Ÿ”ฅ +## Documentation -:fire: no need to remove environment, delete src folder and create again, MUCH simpler! ๐Ÿ”ฅ +[Documentation is located here](https://sygil-dev.github.io/sygil-webui/) +## Want to contribute? +Check the [Contribution Guide](CONTRIBUTING.md) +[Sygil-Dev](https://github.com/Sygil-Dev) main devs: --------------- +* ![ZeroCool940711's avatar](https://avatars.githubusercontent.com/u/5977640?s=40&v=4)[ZeroCool940711](https://github.com/ZeroCool940711) +* ![Kasiya13's avatar](https://avatars.githubusercontent.com/u/26075839?s=40&v=4)[Kasiya13](https://github.com/Kasiya13) -### Questions about **_[Upscalers](https://github.com/hlky/stable-diffusion-webui/wiki/Upscalers)_**? -### Questions about **_[Optimized mode](https://github.com/hlky/stable-diffusion-webui/wiki/Optimized-mode)_**? -### Questions about **_[Command line options](https://github.com/hlky/stable-diffusion-webui/wiki/Command-line-options)_**? +### Project Features: --------------- +* Built-in image enhancers and upscalers, including GFPGAN and realESRGAN +* Generator Preview: See your image as its being made -Features: +* Run additional upscaling models on CPU to save VRAM -* Gradio GUI: Idiot-proof, fully featured frontend for both txt2img and img2img generation -* No more manually typing parameters, now all you have to do is write your prompt and adjust sliders -* GFPGAN Face Correction ๐Ÿ”ฅ: [Download the model](https://github.com/hlky/stable-diffusion-webui#gfpgan)Automatically correct distorted faces with a built-in GFPGAN option, fixes them in less than half a second -* RealESRGAN Upscaling ๐Ÿ”ฅ: [Download the models](https://github.com/hlky/stable-diffusion-webui#realesrgan) Boosts the resolution of images with a built-in RealESRGAN option -* :computer: esrgan/gfpgan on cpu support :computer: -* Textual inversion ๐Ÿ”ฅ: [info](https://textual-inversion.github.io/) - requires enabling, see [here](https://github.com/hlky/sd-enable-textual-inversion), script works as usual without it enabled -* Advanced img2img editor :art: :fire: :art: -* :fire::fire: Mask and crop :fire::fire: -* Mask painting (NEW) ๐Ÿ–Œ๏ธ: Powerful tool for re-generating only specific parts of an image you want to change -* More k_diffusion samplers ๐Ÿ”ฅ๐Ÿ”ฅ : Far greater quality outputs than the default sampler, less distortion and more accurate -* txt2img samplers: "DDIM", "PLMS", 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms' -* img2img samplers: "DDIM", 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms' -* Loopback (NEW) โžฟ: Automatically feed the last generated sample back into img2img -* Prompt Weighting (NEW) ๐Ÿ‹๏ธ: Adjust the strength of different terms in your prompt -* :fire: gpu device selectable with --gpu :fire: -* Memory Monitoring ๐Ÿ”ฅ: Shows Vram usage and generation time after outputting. -* Word Seeds ๐Ÿ”ฅ: Use words instead of seed numbers -* CFG: Classifier free guidance scale, a feature for fine-tuning your output -* Launcher Automatic ๐Ÿ‘‘๐Ÿ”ฅ shortcut to load the model, no more typing in Conda -* Lighter on Vram: 512x512 img2img & txt2img tested working on 6gb -* and ???? +* Textual inversion: [Reaserch Paper](https://textual-inversion.github.io/) -# Stable Diffusion web UI -A browser interface based on Gradio library for Stable Diffusion. +* K-Diffusion Samplers: A great collection of samplers to use, including: -Original script with Gradio UI was written by a kind anonymous user. This is a modification. + - `k_euler` + - `k_lms` + - `k_euler_a` + - `k_dpm_2` + - `k_dpm_2_a` + - `k_heun` + - `PLMS` + - `DDIM` -![](https://github.com/hlky/stable-diffusion-webui/blob/master/images/txt2img.jpg) -![](https://github.com/hlky/stable-diffusion-webui/blob/master/images/img2img.jpg) -![](https://github.com/hlky/stable-diffusion-webui/blob/master/images/gfpgan.jpg) -![](https://github.com/hlky/stable-diffusion-webui/blob/master/images/esrgan.jpg) +* Loopback: Automatically feed the last generated sample back into img2img -### GFPGAN +* Prompt Weighting & Negative Prompts: Gain more control over your creations -If you want to use GFPGAN to improve generated faces, you need to install it separately. -Download [GFPGANv1.3.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth) and put it -into the `/stable-diffusion/src/gfpgan/experiments/pretrained_models` directory. +* Selectable GPU usage from Settings tab -### RealESRGAN -Download [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth) and [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth). -Put them into the `stable-diffusion/src/realesrgan/experiments/pretrained_models` directory. +* Word Seeds: Use words instead of seed numbers -### Web UI +* Automated Launcher: Activate conda and run Stable Diffusion with a single command -When launching, you may get a very long warning message related to some weights not being used. You may freely ignore it. -After a while, you will get a message like this: +* Lighter on VRAM: 512x512 Text2Image & Image2Image tested working on 4GB (with *optimized* mode enabled in Settings) -``` -Running on local URL: http://127.0.0.1:7860/ -``` +* Prompt validation: If your prompt is too long, you will get a warning in the text output field -Open the URL in browser, and you are good to go. +* Sequential seeds for batches: If you use a seed of 1000 to generate two batches of two images each, four generated images will have seeds: `1000, 1001, 1002, 1003`. -## Features -The script creates a web UI for Stable Diffusion's txt2img and img2img scripts. Following are features added -that are not in original script. +* Prompt matrix: Separate multiple prompts using the `|` character, and the system will produce an image for every combination of them. -### GFPGAN -Lets you improve faces in pictures using the GFPGAN model. There is a checkbox in every tab to use GFPGAN at 100%, and -also a separate tab that just allows you to use GFPGAN on any picture, with a slider that controls how strongthe effect is. +* [Gradio] Advanced img2img editor with Mask and crop capabilities -![](images/GFPGAN.png) +* [Gradio] Mask painting ๐Ÿ–Œ๏ธ: Powerful tool for re-generating only specific parts of an image you want to change (currently Gradio only) -### RealESRGAN -Lets you double the resolution of generated images. There is a checkbox in every tab to use RealESRGAN, and you can choose between the regular upscaler and the anime version. -There is also a separate tab for using RealESRGAN on any picture. +# SD WebUI -![](images/RealESRGAN.png) +An easy way to work with Stable Diffusion right from your browser. + +## Streamlit + +![](images/streamlit/streamlit-t2i.png) + +**Features:** + +- Clean UI with an easy to use design, with support for widescreen displays +- *Dynamic live preview* of your generations +- Easily customizable defaults, right from the WebUI's Settings tab +- An integrated gallery to show the generations for a prompt +- *Optimized VRAM* usage for bigger generations or usage on lower end GPUs +- *Text to Video:* Generate video clips from text prompts right from the WebUI (WIP) +- Image to Text: Use [CLIP Interrogator](https://github.com/pharmapsychotic/clip-interrogator) to interrogate an image and get a prompt that you can use to generate a similar image using Stable Diffusion. +- *Concepts Library:* Run custom embeddings others have made via textual inversion. +- Textual Inversion training: Train your own embeddings on any photo you want and use it on your prompt. +- **Currently in development: [Stable Horde](https://stablehorde.net/) integration; ImgLab, batch inputs, & mask editor from Gradio + +**Prompt Weights & Negative Prompts:** -### Sampling method selection -txt2img samplers: "DDIM", "PLMS", 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms' -img2img samplers: "DDIM", 'k_dpm_2_a', 'k_dpm_2', 'k_euler_a', 'k_euler', 'k_heun', 'k_lms' +To give a token (tag recognized by the AI) a specific or increased weight (emphasis), add `:0.##` to the prompt, where `0.##` is a decimal that will specify the weight of all tokens before the colon. +Ex: `cat:0.30, dog:0.70` or `guy riding a bicycle :0.7, incoming car :0.30` -![](images/sampling.png) +Negative prompts can be added by using `###` , after which any tokens will be seen as negative. +Ex: `cat playing with string ### yarn` will negate `yarn` from the generated image. -### Prompt matrix -Separate multiple prompts using the `|` character, and the system will produce an image for every combination of them. -For example, if you use `a busy city street in a modern city|illustration|cinematic lighting` prompt, there are four combinations possible (first part of prompt is always kept): +Negatives are a very powerful tool to get rid of contextually similar or related topics, but **be careful when adding them since the AI might see connections you can't**, and end up outputting gibberish -- `a busy city street in a modern city` -- `a busy city street in a modern city, illustration` -- `a busy city street in a modern city, cinematic lighting` -- `a busy city street in a modern city, illustration, cinematic lighting` +**Tip:* Try using the same seed with different prompt configurations or weight values see how the AI understands them, it can lead to prompts that are more well-tuned and less prone to error. -Four images will be produced, in this order, all with same seed and each with corresponding prompt: -![](images/prompt-matrix.png) +Please see the [Streamlit Documentation](docs/4.streamlit-interface.md) to learn more. -Another example, this time with 5 prompts and 16 variations: -![](images/prompt_matrix.jpg) +## Gradio [Legacy] -If you use this feature, batch count will be ignored, because the number of pictures to produce -depends on your prompts, but batch size will still work (generating multiple pictures at the -same time for a small speed boost). +![](images/gradio/gradio-t2i.png) -### Flagging (Broken after UI changed to gradio.Blocks() see [Flag button missing from new UI](https://github.com/hlky/stable-diffusion-webui/issues/50)) -Click the Flag button under the output section, and generated images will be saved to `log/images` directory, and generation parameters -will be appended to a csv file `log/log.csv` in the `/sd` directory. +**Features:** -> but every image is saved, why would I need this? +- Older UI that is functional and feature complete. +- Has access to all upscaling models, including LSDR. +- Dynamic prompt entry automatically changes your generation settings based on `--params` in a prompt. +- Includes quick and easy ways to send generations to Image2Image or the Image Lab for upscaling. -If you're like me, you experiment a lot with prompts and settings, and only few images are worth saving. You can -just save them using right click in browser, but then you won't be able to reproduce them later because you will not -know what exact prompt created the image. If you use the flag button, generation paramerters will be written to csv file, -and you can easily find parameters for an image by searching for its filename. +**Note: the Gradio interface is no longer being actively developed by Sygil.Dev and is only receiving bug fixes.** -### Copy-paste generation parameters -A text output provides generation parameters in an easy to copy-paste form for easy sharing. +Please see the [Gradio Documentation](https://sygil-dev.github.io/sygil-webui/docs/Gradio/gradio-interface/) to learn more. -![](images/kopipe.png) +## Image Upscalers -If you generate multiple pictures, the displayed seed will be the seed of the first one. +--- -### Correct seeds for batches -If you use a seed of 1000 to generate two batches of two images each, four generated images will have seeds: `1000, 1001, 1002, 1003`. -Previous versions of the UI would produce `1000, x, 1001, x`, where x is an iamge that can't be generated by any seed. +### GFPGAN -### Resizing -There are three options for resizing input images in img2img mode: +![](images/GFPGAN.png) -- Just resize - simply resizes source image to target resolution, resulting in incorrect aspect ratio -- Crop and resize - resize source image preserving aspect ratio so that entirety of target resolution is occupied by it, and crop parts that stick out -- Resize and fill - resize source image preserving aspect ratio so that it entirely fits target resolution, and fill empty space by rows/columns from source image +Lets you improve faces in pictures using the GFPGAN model. There is a checkbox in every tab to use GFPGAN at 100%, and also a separate tab that just allows you to use GFPGAN on any picture, with a slider that controls how strong the effect is. -Example: -![](images/resizing.jpg) +If you want to use GFPGAN to improve generated faces, you need to install it separately. +Download [GFPGANv1.4.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth) and put it +into the `/sygil-webui/models/gfpgan` directory. -### Loading -Gradio's loading graphic has a very negative effect on the processing speed of the neural network. -My RTX 3090 makes images about 10% faster when the tab with gradio is not active. By default, the UI -now hides loading progress animation and replaces it with static "Loading..." text, which achieves -the same effect. Use the --no-progressbar-hiding commandline option to revert this and show loading animations. +### RealESRGAN -### Prompt validation -Stable Diffusion has a limit for input text length. If your prompt is too long, you will get a -warning in the text output field, showing which parts of your text were truncated and ignored by the model. +![](images/RealESRGAN.png) -### Loopback -A checkbox for img2img allowing to automatically feed output image as input for the next batch. Equivalent to -saving output image, and replacing input image with it. Batch count setting controls how many iterations of -this you get. +Lets you double the resolution of generated images. There is a checkbox in every tab to use RealESRGAN, and you can choose between the regular upscaler and the anime version. +There is also a separate tab for using RealESRGAN on any picture. -Usually, when doing this, you would choose one of many images for the next iteration yourself, so the usefulness -of this feature may be questionable, but I've managed to get some very nice outputs with it that I wasn't abble -to get otherwise. +Download [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth) and [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth). +Put them into the `sygil-webui/models/realesrgan` directory. -Example: (cherrypicked result; original picture by anon) +### LSDR -![](images/loopback.jpg) +Download **LDSR** [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename `last.ckpt` to `model.ckpt` and place both under `sygil-webui/models/ldsr/` +### GoBig, and GoLatent *(Currently on the Gradio version Only)* -### --help -``` -optional arguments: - -h, --help show this help message and exit - --outdir [OUTDIR] dir to write results to - --outdir_txt2img [OUTDIR_TXT2IMG] - dir to write txt2img results to (overrides --outdir) - --outdir_img2img [OUTDIR_IMG2IMG] - dir to write img2img results to (overrides --outdir) - --save-metadata Whether to embed the generation parameters in the sample images - --skip-grid do not save a grid, only individual samples. Helpful when evaluating lots of samples - --skip-save do not save indiviual samples. For speed measurements. - --n_rows N_ROWS rows in the grid; use -1 for autodetect and 0 for n_rows to be same as batch_size (default: - -1) - --config CONFIG path to config which constructs model - --ckpt CKPT path to checkpoint of model - --precision {full,autocast} - evaluate at this precision - --gfpgan-dir GFPGAN_DIR - GFPGAN directory - --realesrgan-dir REALESRGAN_DIR - RealESRGAN directory - --realesrgan-model REALESRGAN_MODEL - Upscaling model for RealESRGAN - --no-verify-input do not verify input to check if it's too long - --no-half do not switch the model to 16-bit floats - --no-progressbar-hiding - do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware - accleration in browser) - --defaults DEFAULTS path to configuration file providing UI defaults, uses same format as cli parameter - --gpu GPU choose which GPU to use if you have multiple - --extra-models-cpu run extra models (GFGPAN/ESRGAN) on cpu - --esrgan-cpu run ESRGAN on cpu - --gfpgan-cpu run GFPGAN on cpu - --cli CLI don't launch web server, take Python function kwargs from this file. -``` +More powerful upscalers that uses a separate Latent Diffusion model to more cleanly upscale images. + +Please see the [Post-Processing Documentation](https://sygil-dev.github.io/sygil-webui/docs/post-processing) to learn more. ----- +### *Original Information From The Stable Diffusion Repo:* + # Stable Diffusion + *Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:* -[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/)
+[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/) [Robin Rombach](https://github.com/rromb)\*, [Andreas Blattmann](https://github.com/ablattmann)\*, [Dominik Lorenz](https://github.com/qp-qp)\, [Patrick Esser](https://github.com/pesser), -[Bjรถrn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)
+[Bjรถrn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer) **CVPR '22 Oral** which is available on [GitHub](https://github.com/CompVis/latent-diffusion). PDF at [arXiv](https://arxiv.org/abs/2112.10752). Please also visit our [Project page](https://ommer-lab.com/research/latent-diffusion-models/). -![txt2img-stable2](assets/stable-samples/txt2img/merged-0006.png) [Stable Diffusion](#stable-diffusion-v1) is a latent text-to-image diffusion model. -Thanks to a generous compute donation from [Stability AI](https://stability.ai/) and support from [LAION](https://laion.ai/), we were able to train a Latent Diffusion Model on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) database. -Similar to Google's [Imagen](https://arxiv.org/abs/2205.11487), +Thanks to a generous compute donation from [Stability AI](https://stability.ai/) and support from [LAION](https://laion.ai/), we were able to train a Latent Diffusion Model on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) database. +Similar to Google's [Imagen](https://arxiv.org/abs/2205.11487), this model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text prompts. With its 860M UNet and 123M text encoder, the model is relatively lightweight and runs on a GPU with at least 10GB VRAM. See [this section](#stable-diffusion-v1) below and the [model card](https://huggingface.co/CompVis/stable-diffusion). @@ -247,34 +184,30 @@ See [this section](#stable-diffusion-v1) below and the [model card](https://hugg Stable Diffusion v1 refers to a specific configuration of the model architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet -and CLIP ViT-L/14 text encoder for the diffusion model. The model was pretrained on 256x256 images and +and CLIP ViT-L/14 text encoder for the diffusion model. The model was pretrained on 256x256 images and then finetuned on 512x512 images. *Note: Stable Diffusion v1 is a general text-to-image diffusion model and therefore mirrors biases and (mis-)conceptions that are present -in its training data. +in its training data. Details on the training procedure and data, as well as the intended use of the model can be found in the corresponding [model card](https://huggingface.co/CompVis/stable-diffusion). -## Comments +## Comments -- Our codebase for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion) -and [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch). -Thanks for open-sourcing! - -- The implementation of the transformer encoder is from [x-transformers](https://github.com/lucidrains/x-transformers) by [lucidrains](https://github.com/lucidrains?tab=repositories). +- Our code base for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion) + and [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch). + Thanks for open-sourcing! +- The implementation of the transformer encoder is from [x-transformers](https://github.com/lucidrains/x-transformers) by [lucidrains](https://github.com/lucidrains?tab=repositories). ## BibTeX ``` @misc{rombach2021highresolution, - title={High-Resolution Image Synthesis with Latent Diffusion Models}, + title={High-Resolution Image Synthesis with Latent Diffusion Models}, author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Bjรถrn Ommer}, year={2021}, eprint={2112.10752}, archivePrefix={arXiv}, primaryClass={cs.CV} } - ``` - - diff --git a/Stable_Diffusion_v1_Model_Card.md b/Stable_Diffusion_v1_Model_Card.md index 2cbf99bd2..ec37dc9e1 100644 --- a/Stable_Diffusion_v1_Model_Card.md +++ b/Stable_Diffusion_v1_Model_Card.md @@ -21,7 +21,7 @@ This model card focuses on the model associated with the Stable Diffusion model, # Uses -## Direct Use +## Direct Use The model is intended for research purposes only. Possible research areas and tasks include @@ -68,11 +68,11 @@ Using the model to generate content that is cruel to individuals is a misuse of considerations. ### Bias -While the capabilities of image generation models are impressive, they can also reinforce or exacerbate social biases. -Stable Diffusion v1 was trained on subsets of [LAION-2B(en)](https://laion.ai/blog/laion-5b/), -which consists of images that are primarily limited to English descriptions. -Texts and images from communities and cultures that use other languages are likely to be insufficiently accounted for. -This affects the overall output of the model, as white and western cultures are often set as the default. Further, the +While the capabilities of image generation models are impressive, they can also reinforce or exacerbate social biases. +Stable Diffusion v1 was trained on subsets of [LAION-2B(en)](https://laion.ai/blog/laion-5b/), +which consists of images that are primarily limited to English descriptions. +Texts and images from communities and cultures that use other languages are likely to be insufficiently accounted for. +This affects the overall output of the model, as white and western cultures are often set as the default. Further, the ability of the model to generate content with non-English prompts is significantly worse than with English-language prompts. @@ -84,7 +84,7 @@ The model developers used the following dataset for training the model: - LAION-2B (en) and subsets thereof (see next section) **Training Procedure** -Stable Diffusion v1 is a latent diffusion model which combines an autoencoder with a diffusion model that is trained in the latent space of the autoencoder. During training, +Stable Diffusion v1 is a latent diffusion model which combines an autoencoder with a diffusion model that is trained in the latent space of the autoencoder. During training, - Images are encoded through an encoder, which turns images into latent representations. The autoencoder uses a relative downsampling factor of 8 and maps images of shape H x W x 3 to latents of shape H/f x W/f x 4 - Text prompts are encoded through a ViT-L/14 text-encoder. @@ -108,12 +108,12 @@ filtered to images with an original size `>= 512x512`, estimated aesthetics scor - **Batch:** 32 x 8 x 2 x 4 = 2048 - **Learning rate:** warmup to 0.0001 for 10,000 steps and then kept constant -## Evaluation Results +## Evaluation Results Evaluations with different classifier-free guidance scales (1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0) and 50 PLMS sampling steps show the relative improvements of the checkpoints: -![pareto](assets/v1-variants-scores.jpg) +![pareto](assets/v1-variants-scores.jpg) Evaluated using 50 PLMS steps and 10000 random prompts from the COCO2017 validation set, evaluated at 512x512 resolution. Not optimized for FID scores. ## Environmental Impact @@ -137,4 +137,3 @@ Based on that information, we estimate the following CO2 emissions using the [Ma } *This model card was written by: Robin Rombach and Patrick Esser and is based on the [DALL-E Mini model card](https://huggingface.co/dalle-mini/dalle-mini).* - diff --git a/Web_based_UI_for_Stable_Diffusion_colab.ipynb b/Web_based_UI_for_Stable_Diffusion_colab.ipynb new file mode 100644 index 000000000..c9125872c --- /dev/null +++ b/Web_based_UI_for_Stable_Diffusion_colab.ipynb @@ -0,0 +1,633 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "S5RoIM-5IPZJ" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Sygil-Dev/sygil-webui/blob/main/Web_based_UI_for_Stable_Diffusion_colab.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5-Bx4AsEoPU-" + }, + "source": [ + "# README" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z4kQYMPQn4d-" + }, + "source": [ + "###
Web-based UI for Stable Diffusion
\n", + "\n", + "## Created by [Sygil-Dev](https://github.com/Sygil-Dev)\n", + "\n", + "## [Visit Sygil-Dev's Discord Server](https://discord.gg/gyXNe4NySY) [![Discord Server](https://user-images.githubusercontent.com/5977640/190528254-9b5b4423-47ee-4f24-b4f9-fd13fba37518.png)](https://discord.gg/gyXNe4NySY)\n", + "\n", + "## Installation instructions for:\n", + "\n", + "- **[Windows](https://sygil-dev.github.io/sygil-webui/docs/1.windows-installation.html)**\n", + "- **[Linux](https://sygil-dev.github.io/sygil-webui/docs/2.linux-installation.html)**\n", + "\n", + "### Want to ask a question or request a feature?\n", + "\n", + "Come to our [Discord Server](https://discord.gg/gyXNe4NySY) or use [Discussions](https://github.com/Sygil-Dev/sygil-webui/discussions).\n", + "\n", + "## Documentation\n", + "\n", + "[Documentation is located here](https://sygil-dev.github.io/sygil-webui/)\n", + "\n", + "## Want to contribute?\n", + "\n", + "Check the [Contribution Guide](CONTRIBUTING.md)\n", + "\n", + "[Sygil-Dev](https://github.com/Sygil-Dev) main devs:\n", + "\n", + "* ![hlky's avatar](https://avatars.githubusercontent.com/u/106811348?s=40&v=4) [hlky](https://github.com/hlky)\n", + "* ![ZeroCool940711's avatar](https://avatars.githubusercontent.com/u/5977640?s=40&v=4)[ZeroCool940711](https://github.com/ZeroCool940711)\n", + "* ![codedealer's avatar](https://avatars.githubusercontent.com/u/4258136?s=40&v=4)[codedealer](https://github.com/codedealer)\n", + "\n", + "### Project Features:\n", + "\n", + "* Two great Web UI's to choose from: Streamlit or Gradio\n", + "\n", + "* No more manually typing parameters, now all you have to do is write your prompt and adjust sliders\n", + "\n", + "* Built-in image enhancers and upscalers, including GFPGAN and realESRGAN\n", + "\n", + "* Run additional upscaling models on CPU to save VRAM\n", + "\n", + "* Textual inversion ๐Ÿ”ฅ: [info](https://textual-inversion.github.io/) - requires enabling, see [here](https://github.com/hlky/sd-enable-textual-inversion), script works as usual without it enabled\n", + "\n", + "* Advanced img2img editor with Mask and crop capabilities\n", + "\n", + "* Mask painting ๐Ÿ–Œ๏ธ: Powerful tool for re-generating only specific parts of an image you want to change (currently Gradio only)\n", + "\n", + "* More diffusion samplers ๐Ÿ”ฅ๐Ÿ”ฅ: A great collection of samplers to use, including:\n", + " \n", + " - `k_euler` (Default)\n", + " - `k_lms`\n", + " - `k_euler_a`\n", + " - `k_dpm_2`\n", + " - `k_dpm_2_a`\n", + " - `k_heun`\n", + " - `PLMS`\n", + " - `DDIM`\n", + "\n", + "* Loopback โžฟ: Automatically feed the last generated sample back into img2img\n", + "\n", + "* Prompt Weighting ๐Ÿ‹๏ธ: Adjust the strength of different terms in your prompt\n", + "\n", + "* Selectable GPU usage with `--gpu `\n", + "\n", + "* Memory Monitoring ๐Ÿ”ฅ: Shows VRAM usage and generation time after outputting\n", + "\n", + "* Word Seeds ๐Ÿ”ฅ: Use words instead of seed numbers\n", + "\n", + "* CFG: Classifier free guidance scale, a feature for fine-tuning your output\n", + "\n", + "* Automatic Launcher: Activate conda and run Stable Diffusion with a single command\n", + "\n", + "* Lighter on VRAM: 512x512 Text2Image & Image2Image tested working on 4GB\n", + "\n", + "* Prompt validation: If your prompt is too long, you will get a warning in the text output field\n", + "\n", + "* Copy-paste generation parameters: A text output provides generation parameters in an easy to copy-paste form for easy sharing.\n", + "\n", + "* Correct seeds for batches: If you use a seed of 1000 to generate two batches of two images each, four generated images will have seeds: `1000, 1001, 1002, 1003`.\n", + "\n", + "* Prompt matrix: Separate multiple prompts using the `|` character, and the system will produce an image for every combination of them.\n", + "\n", + "* Loopback for Image2Image: A checkbox for img2img allowing to automatically feed output image as input for the next batch. Equivalent to saving output image, and replacing input image with it.\n", + "\n", + "# Stable Diffusion Web UI\n", + "\n", + "A fully-integrated and easy way to work with Stable Diffusion right from a browser window.\n", + "\n", + "## Streamlit\n", + "\n", + "![](https://github.com/aedhcarrick/sygil-webui/blob/patch-2/images/streamlit/streamlit-t2i.png?raw=1)\n", + "\n", + "**Features:**\n", + "\n", + "- Clean UI with an easy to use design, with support for widescreen displays.\n", + "- Dynamic live preview of your generations\n", + "- Easily customizable presets right from the WebUI (Coming Soon!)\n", + "- An integrated gallery to show the generations for a prompt or session (Coming soon!)\n", + "- Better optimization VRAM usage optimization, less errors for bigger generations.\n", + "- Text2Video - Generate video clips from text prompts right from the WEb UI (WIP)\n", + "- Concepts Library - Run custom embeddings others have made via textual inversion.\n", + "- Actively being developed with new features being added and planned - Stay Tuned!\n", + "- Streamlit is now the new primary UI for the project moving forward.\n", + "- *Currently in active development and still missing some of the features present in the Gradio Interface.*\n", + "\n", + "Please see the [Streamlit Documentation](docs/4.streamlit-interface.md) to learn more.\n", + "\n", + "## Gradio\n", + "\n", + "![](https://github.com/aedhcarrick/sygil-webui/blob/patch-2/images/gradio/gradio-t2i.png?raw=1)\n", + "\n", + "**Features:**\n", + "\n", + "- Older UI design that is fully functional and feature complete.\n", + "- Has access to all upscaling models, including LSDR.\n", + "- Dynamic prompt entry automatically changes your generation settings based on `--params` in a prompt.\n", + "- Includes quick and easy ways to send generations to Image2Image or the Image Lab for upscaling.\n", + "- *Note, the Gradio interface is no longer being actively developed and is only receiving bug fixes.*\n", + "\n", + "Please see the [Gradio Documentation](docs/5.gradio-interface.md) to learn more.\n", + "\n", + "## Image Upscalers\n", + "\n", + "---\n", + "\n", + "### GFPGAN\n", + "\n", + "![](https://github.com/aedhcarrick/sygil-webui/blob/patch-2/images/GFPGAN.png?raw=1)\n", + "\n", + "Lets you improve faces in pictures using the GFPGAN model. There is a checkbox in every tab to use GFPGAN at 100%, and also a separate tab that just allows you to use GFPGAN on any picture, with a slider that controls how strong the effect is.\n", + "\n", + "If you want to use GFPGAN to improve generated faces, you need to install it separately.\n", + "Download [GFPGANv1.4.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth) and put it\n", + "into the `/sygil-webui/models/gfpgan` directory.\n", + "\n", + "### RealESRGAN\n", + "\n", + "![](https://github.com/aedhcarrick/sygil-webui/blob/patch-2/images/RealESRGAN.png?raw=1)\n", + "\n", + "Lets you double the resolution of generated images. There is a checkbox in every tab to use RealESRGAN, and you can choose between the regular upscaler and the anime version.\n", + "There is also a separate tab for using RealESRGAN on any picture.\n", + "\n", + "Download [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth) and [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth).\n", + "Put them into the `sygil-webui/models/realesrgan` directory.\n", + "\n", + "\n", + "\n", + "### LSDR\n", + "\n", + "Download **LDSR** [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename last.ckpt to model.ckpt and place both under `sygil-webui/models/ldsr/`\n", + "\n", + "### GoBig, and GoLatent *(Currently on the Gradio version Only)*\n", + "\n", + "More powerful upscalers that uses a seperate Latent Diffusion model to more cleanly upscale images.\n", + "\n", + "\n", + "\n", + "Please see the [Image Enhancers Documentation](docs/6.image_enhancers.md) to learn more.\n", + "\n", + "-----\n", + "\n", + "### *Original Information From The Stable Diffusion Repo*\n", + "\n", + "# Stable Diffusion\n", + "\n", + "*Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:*\n", + "\n", + "[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/)
\n", + "[Robin Rombach](https://github.com/rromb)\\*,\n", + "[Andreas Blattmann](https://github.com/ablattmann)\\*,\n", + "[Dominik Lorenz](https://github.com/qp-qp)\\,\n", + "[Patrick Esser](https://github.com/pesser),\n", + "[Bjรถrn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)
\n", + "\n", + "**CVPR '22 Oral**\n", + "\n", + "which is available on [GitHub](https://github.com/CompVis/latent-diffusion). PDF at [arXiv](https://arxiv.org/abs/2112.10752). Please also visit our [Project page](https://ommer-lab.com/research/latent-diffusion-models/).\n", + "\n", + "[Stable Diffusion](#stable-diffusion-v1) is a latent text-to-image diffusion\n", + "model.\n", + "Thanks to a generous compute donation from [Stability AI](https://stability.ai/) and support from [LAION](https://laion.ai/), we were able to train a Latent Diffusion Model on 512x512 images from a subset of the [LAION-5B](https://laion.ai/blog/laion-5b/) database.\n", + "Similar to Google's [Imagen](https://arxiv.org/abs/2205.11487),\n", + "this model uses a frozen CLIP ViT-L/14 text encoder to condition the model on text prompts.\n", + "With its 860M UNet and 123M text encoder, the model is relatively lightweight and runs on a GPU with at least 10GB VRAM.\n", + "See [this section](#stable-diffusion-v1) below and the [model card](https://huggingface.co/CompVis/stable-diffusion).\n", + "\n", + "## Stable Diffusion v1\n", + "\n", + "Stable Diffusion v1 refers to a specific configuration of the model\n", + "architecture that uses a downsampling-factor 8 autoencoder with an 860M UNet\n", + "and CLIP ViT-L/14 text encoder for the diffusion model. The model was pretrained on 256x256 images and\n", + "then finetuned on 512x512 images.\n", + "\n", + "*Note: Stable Diffusion v1 is a general text-to-image diffusion model and therefore mirrors biases and (mis-)conceptions that are present\n", + "in its training data.\n", + "Details on the training procedure and data, as well as the intended use of the model can be found in the corresponding [model card](https://huggingface.co/CompVis/stable-diffusion).\n", + "\n", + "## Comments\n", + "\n", + "- Our codebase for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion)\n", + " and [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch).\n", + " Thanks for open-sourcing!\n", + "\n", + "- The implementation of the transformer encoder is from [x-transformers](https://github.com/lucidrains/x-transformers) by [lucidrains](https://github.com/lucidrains?tab=repositories).\n", + "\n", + "## BibTeX\n", + "\n", + "```\n", + "@misc{rombach2021highresolution,\n", + " title={High-Resolution Image Synthesis with Latent Diffusion Models},\n", + " author={Robin Rombach and Andreas Blattmann and Dominik Lorenz and Patrick Esser and Bjรถrn Ommer},\n", + " year={2021},\n", + " eprint={2112.10752},\n", + " archivePrefix={arXiv},\n", + " primaryClass={cs.CV}\n", + "}\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iegma7yteERV" + }, + "source": [ + "# Config options for Colab instance\n", + "> Before running, make sure GPU backend is enabled. (Unless you plan on generating with Stable Horde)\n", + ">> Runtime -> Change runtime type -> Hardware Accelerator -> GPU (Make sure to save)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OXn96M9deVtF" + }, + "outputs": [], + "source": [ + "#@title { display-mode: \"form\" }\n", + "#@markdown WebUI repo (and branch)\n", + "repo_name = \"Sygil-Dev/sygil-webui\" #@param {type:\"string\"}\n", + "repo_branch = \"dev\" #@param {type:\"string\"}\n", + "\n", + "#@markdown Mount Google Drive\n", + "mount_google_drive = True #@param {type:\"boolean\"}\n", + "save_outputs_to_drive = True #@param {type:\"boolean\"}\n", + "#@markdown Folder in Google Drive to search for custom models\n", + "MODEL_DIR = \"sygil-webui/models\" #@param {type:\"string\"}\n", + "\n", + "#@markdown Folder in Google Drive to look for custom config file (streamlit.yaml)\n", + "CONFIG_DIR = \"sygil-webui\" #@param {type:\"string\"}\n", + "\n", + "#@markdown Enter auth token from Huggingface.co\n", + "#@markdown >(required for downloading stable diffusion model.)\n", + "HF_TOKEN = \"\" #@param {type:\"string\"}\n", + "\n", + "#@markdown Select which models to prefetch\n", + "STABLE_DIFFUSION = True #@param {type:\"boolean\"}\n", + "WAIFU_DIFFUSION = False #@param {type:\"boolean\"}\n", + "TRINART_SD = False #@param {type:\"boolean\"}\n", + "SD_WD_LD_TRINART_MERGED = False #@param {type:\"boolean\"}\n", + "GFPGAN = True #@param {type:\"boolean\"}\n", + "REALESRGAN = True #@param {type:\"boolean\"}\n", + "LDSR = True #@param {type:\"boolean\"}\n", + "BLIP_MODEL = False #@param {type:\"boolean\"}\n", + "\n", + "#@markdown Save models to Google Drive for faster loading in future (Be warned! Make sure you have enough space!)\n", + "SAVE_MODELS = False #@param {type:\"boolean\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IZjJSr-WPNxB" + }, + "source": [ + "# Setup\n", + "\n", + ">Runtime will crash when installing conda. This is normal as we are forcing a restart of the runtime from code.\n", + "\n", + ">Just hit \"Run All\" again. ๐Ÿ˜‘" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eq0-E5mjSpmP" + }, + "outputs": [], + "source": [ + "#@title Make sure we have access to GPU backend\n", + "!nvidia-smi -L" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cDu33xkdJ5mD" + }, + "outputs": [], + "source": [ + "#@title Install miniConda (mamba)\n", + "!pip install condacolab\n", + "import condacolab\n", + "condacolab.install_from_url(\"https://github.com/conda-forge/miniforge/releases/download/4.14.0-0/Mambaforge-4.14.0-0-Linux-x86_64.sh\")\n", + "\n", + "import condacolab\n", + "condacolab.check()\n", + "# The runtime will crash here!!! Don't panic! We planned for this remember?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pZHGf03Vp305" + }, + "outputs": [], + "source": [ + "#@title Clone webUI repo and download font\n", + "import os\n", + "REPO_URL = os.path.join('https://github.com', repo_name)\n", + "PATH_TO_REPO = os.path.join('/content', repo_name.split('/')[1])\n", + "!git clone {REPO_URL}\n", + "%cd {PATH_TO_REPO}\n", + "!git checkout {repo_branch}\n", + "!git pull" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dmN2igp5Yk3z" + }, + "outputs": [], + "source": [ + "#@title Install dependencies\n", + "!mamba install cudatoolkit=11.3 git numpy=1.22.3 pip=20.3 python=3.8.5 pytorch=1.11.0 scikit-image=0.19.2 torchvision=0.12.0 -y\n", + "!python --version\n", + "!pip install -r requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Nxaxfgo_F8Am" + }, + "outputs": [], + "source": [ + "#@title Install localtunnel to openGoogle's ports\n", + "!npm install localtunnel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pcSWo9Zkzbsf" + }, + "outputs": [], + "source": [ + "#@title Mount Google Drive (if selected)\n", + "if mount_google_drive:\n", + " # Mount google drive to store outputs.\n", + " from google.colab import drive\n", + " drive.mount('/content/drive/', force_remount=True)\n", + "\n", + "if save_outputs_to_drive:\n", + " # Make symlink to redirect downloads\n", + " OUTPUT_PATH = os.path.join('/content/drive/MyDrive', repo_name.split('/')[1], 'outputs')\n", + " os.makedirs(OUTPUT_PATH, exist_ok=True)\n", + " os.symlink(OUTPUT_PATH, os.path.join(PATH_TO_REPO, 'outputs'), target_is_directory=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vMdmh81J70yA" + }, + "outputs": [], + "source": [ + "#@title Pre-fetch models\n", + "%cd {PATH_TO_REPO}\n", + "# make list of models we want to download\n", + "model_list = {\n", + " 'stable_diffusion': f'{STABLE_DIFFUSION}',\n", + " 'waifu_diffusion': f'{WAIFU_DIFFUSION}',\n", + " 'trinart_stable_diffusion': f'{TRINART_SD}',\n", + " 'sd_wd_ld_trinart_merged': f'{SD_WD_LD_TRINART_MERGED}',\n", + " 'gfpgan': f'{GFPGAN}',\n", + " 'realesrgan': f'{REALESRGAN}',\n", + " 'ldsr': f'{LDSR}',\n", + " 'blip_model': f'{BLIP_MODEL}'}\n", + "download_list = {k for (k,v) in model_list.items() if v == 'True'}\n", + "\n", + "# get model info (file name, download link, save location)\n", + "import yaml\n", + "from pprint import pprint\n", + "with open('configs/webui/webui_streamlit.yaml') as f:\n", + " dataMap = yaml.safe_load(f)\n", + "models = dataMap['model_manager']['models']\n", + "existing_models = []\n", + "\n", + "# copy script from model manager\n", + "import requests, time, shutil\n", + "from requests.auth import HTTPBasicAuth\n", + "\n", + "if MODEL_DIR != \"\":\n", + " MODEL_DIR = os.path.join('/content/drive/MyDrive', MODEL_DIR)\n", + "else:\n", + " MODEL_DIR = '/content/drive/MyDrive'\n", + "\n", + "def download_file(file_name, file_path, file_url):\n", + " os.makedirs(file_path, exist_ok=True)\n", + " link_path = os.path.join(MODEL_DIR, file_name)\n", + " full_path = os.path.join(file_path, file_name)\n", + " if os.path.exists(link_path):\n", + " print( file_name + \" found in Google Drive\")\n", + " if not os.path.exists(full_path):\n", + " print( \" creating symlink...\")\n", + " os.symlink(link_path, full_path)\n", + " else:\n", + " print( \" symlink already exists\")\n", + " elif not os.path.exists(full_path):\n", + " print( \"Downloading \" + file_name + \"...\", end=\"\" )\n", + " token = None\n", + " if \"huggingface.co\" in file_url:\n", + " token = HTTPBasicAuth('token', HF_TOKEN)\n", + " try:\n", + " with requests.get(file_url, auth = token, stream=True) as r:\n", + " starttime = time.time()\n", + " r.raise_for_status()\n", + " with open(full_path, 'wb') as f:\n", + " for chunk in r.iter_content(chunk_size=8192):\n", + " f.write(chunk)\n", + " if ((time.time() - starttime) % 60.0) > 2 :\n", + " starttime = time.time()\n", + " print( \".\", end=\"\" )\n", + " print( \"done\" )\n", + " print( \" \" + file_name + \" downloaded to \\'\" + file_path + \"\\'\" )\n", + " if SAVE_MODELS and os.path.exists(MODEL_DIR):\n", + " shutil.copy2(full_path,MODEL_DIR)\n", + " print( \" Saved \" + file_name + \" to \" + MODEL_DIR)\n", + " except:\n", + " print( \"Failed to download \" + file_name + \".\" )\n", + " return\n", + " else:\n", + " print( full_path + \" already exists.\" )\n", + " existing_models.append(file_name)\n", + "\n", + "# download models in list\n", + "for model in download_list:\n", + " model_name = models[model]['model_name']\n", + " file_info = models[model]['files']\n", + " for file in file_info:\n", + " file_name = file_info[file]['file_name']\n", + " file_url = file_info[file]['download_link']\n", + " if 'save_location' in file_info[file]:\n", + " file_path = file_info[file]['save_location']\n", + " else:\n", + " file_path = models[model]['save_location']\n", + " download_file(file_name, file_path, file_url)\n", + "\n", + "# add custom models not in list\n", + "CUSTOM_MODEL_DIR = os.path.join(PATH_TO_REPO, 'models/custom')\n", + "if os.path.exists(MODEL_DIR):\n", + " custom_models = os.listdir(MODEL_DIR)\n", + " custom_models = [m for m in custom_models if os.path.isfile(MODEL_DIR + '/' + m)]\n", + " os.makedirs(CUSTOM_MODEL_DIR, exist_ok=True)\n", + " print( \"Custom model(s) found: \" )\n", + " for m in custom_models:\n", + " if m in existing_models:\n", + " continue\n", + " full_path = os.path.join(CUSTOM_MODEL_DIR, m)\n", + " if not os.path.exists(full_path):\n", + " print( \" \" + m )\n", + " os.symlink(os.path.join(MODEL_DIR , m), full_path)\n", + "\n", + "# get custom config file if it exists\n", + "if CONFIG_DIR != \"\":\n", + " CONFIG_FILE = os.path.join('/content/drive/MyDrive', CONFIG_DIR, 'userconfig_streamlit.yaml')\n", + " config_location = os.path.join(PATH_TO_REPO, 'configs/webui/userconfig_streamlit.yaml')\n", + " if os.path.exists(CONFIG_FILE) and not os.path.exists(config_location):\n", + " os.symlink(CONFIG_DIR, config_location)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pjIjiCuJysJI" + }, + "source": [ + "# Launch the web ui server\n", + "### (optional) JS to prevent idle timeout:\n", + "Press 'F12' OR ('CTRL' + 'SHIFT' + 'I') OR right click on this website -> inspect. Then click on the console tab and paste in the following code.\n", + "```js,\n", + "function ClickConnect(){\n", + "console.log(\"Working\");\n", + "document.querySelector(\"colab-toolbar-button#connect\").click()\n", + "}\n", + "setInterval(ClickConnect,60000)\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-WknaU7uu_q6" + }, + "outputs": [], + "source": [ + "#@title Press play on the music player to keep the tab alive (Uses only 13MB of data)\n", + "%%html\n", + "Press play on the music player to keep the tab alive, then start your generation below (Uses only 13MB of data)
\n", + "