From d7f396486e3e9b4dd31020c81c6eb446593b586d Mon Sep 17 00:00:00 2001
From: Ronen Schaffer <ronen.schaffer@ibm.com>
Date: Thu, 22 Feb 2024 04:18:37 +0200
Subject: [PATCH 001/196] Update comment (#2934)

---
 benchmarks/benchmark_serving.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index cdcfb8582143c..ff5609c37febf 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -7,7 +7,7 @@
         --disable-log-requests
 
     (TGI backend)
-    ./launch_hf_server.sh <your_model>
+    ./launch_tgi_server.sh <your_model> <max_batch_total_tokens>
 
 On the client side, run:
     python benchmarks/benchmark_serving.py \

From 5574081c49c9a5ac51662981aff80250119a97bd Mon Sep 17 00:00:00 2001
From: Mustafa Eyceoz <maxusmusti@gmail.com>
Date: Wed, 21 Feb 2024 21:24:01 -0500
Subject: [PATCH 002/196] Added early stopping to completion APIs (#2939)

---
 vllm/entrypoints/openai/protocol.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 727fec870293c..7c2aa707775ff 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -72,6 +72,7 @@ class ChatCompletionRequest(BaseModel):
     top_k: Optional[int] = -1
     ignore_eos: Optional[bool] = False
     use_beam_search: Optional[bool] = False
+    early_stopping: Optional[bool] = False
     stop_token_ids: Optional[List[int]] = Field(default_factory=list)
     skip_special_tokens: Optional[bool] = True
     spaces_between_special_tokens: Optional[bool] = True
@@ -99,6 +100,7 @@ def to_sampling_params(self) -> SamplingParams:
             top_k=self.top_k,
             ignore_eos=self.ignore_eos,
             use_beam_search=self.use_beam_search,
+            early_stopping=self.early_stopping,
             skip_special_tokens=self.skip_special_tokens,
             spaces_between_special_tokens=self.spaces_between_special_tokens,
             include_stop_str_in_output=self.include_stop_str_in_output,
@@ -129,6 +131,7 @@ class CompletionRequest(BaseModel):
     top_k: Optional[int] = -1
     ignore_eos: Optional[bool] = False
     use_beam_search: Optional[bool] = False
+    early_stopping: Optional[bool] = False
     stop_token_ids: Optional[List[int]] = Field(default_factory=list)
     skip_special_tokens: Optional[bool] = True
     spaces_between_special_tokens: Optional[bool] = True
@@ -157,6 +160,7 @@ def to_sampling_params(self):
             max_tokens=self.max_tokens if not echo_without_generation else 1,
             logprobs=self.logprobs,
             use_beam_search=self.use_beam_search,
+            early_stopping=self.early_stopping,
             prompt_logprobs=self.logprobs if self.echo else None,
             skip_special_tokens=self.skip_special_tokens,
             spaces_between_special_tokens=(self.spaces_between_special_tokens),

From 344020c926ad19d9d147f5ab6b8929669296edcb Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Thu, 22 Feb 2024 10:25:05 +0800
Subject: [PATCH 003/196] Migrate MistralForCausalLM to LlamaForCausalLM
 (#2868)

---
 vllm/model_executor/models/__init__.py |   2 +-
 vllm/model_executor/models/llama.py    |   6 +-
 vllm/model_executor/models/mistral.py  | 377 -------------------------
 3 files changed, 6 insertions(+), 379 deletions(-)
 delete mode 100644 vllm/model_executor/models/mistral.py

diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index 17d8d69ba8672..411814f2f5d09 100644
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -30,7 +30,7 @@
     "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
     # For decapoda-research/llama-*
     "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
-    "MistralForCausalLM": ("mistral", "MistralForCausalLM"),
+    "MistralForCausalLM": ("llama", "LlamaForCausalLM"),
     "MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"),
     "QuantMixtralForCausalLM": ("mixtral_quant", "MixtralForCausalLM"),
     # transformers's mpt class has lower case
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
index 1d0353d7d396e..b7f6b8f3ec374 100644
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -92,6 +92,7 @@ def __init__(
         max_position_embeddings: int = 8192,
         linear_method: Optional[LinearMethodBase] = None,
         bias: bool = False,
+        sliding_window: Optional[int] = None,
     ) -> None:
         super().__init__()
         self.hidden_size = hidden_size
@@ -141,7 +142,8 @@ def __init__(
         self.attn = PagedAttention(self.num_heads,
                                    self.head_dim,
                                    self.scaling,
-                                   num_kv_heads=self.num_kv_heads)
+                                   num_kv_heads=self.num_kv_heads,
+                                   sliding_window=sliding_window)
 
     def forward(
         self,
@@ -172,6 +174,7 @@ def __init__(
         rope_scaling = getattr(config, "rope_scaling", None)
         max_position_embeddings = getattr(config, "max_position_embeddings",
                                           8192)
+        sliding_window = getattr(config, "sliding_window", None)
         self.self_attn = LlamaAttention(
             hidden_size=self.hidden_size,
             num_heads=config.num_attention_heads,
@@ -182,6 +185,7 @@ def __init__(
             max_position_embeddings=max_position_embeddings,
             linear_method=linear_method,
             bias=getattr(config, "bias", False),
+            sliding_window=sliding_window,
         )
         self.mlp = LlamaMLP(
             hidden_size=self.hidden_size,
diff --git a/vllm/model_executor/models/mistral.py b/vllm/model_executor/models/mistral.py
deleted file mode 100644
index 2347ed752d781..0000000000000
--- a/vllm/model_executor/models/mistral.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# coding=utf-8
-# Adapted from
-# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
-# Copyright 2023 The vLLM team.
-# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
-#
-# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
-# and OPT implementations in this library. It has been modified from its
-# original forms to accommodate minor architectural differences compared
-# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Inference-only Mistral model compatible with HuggingFace weights."""
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from transformers import MistralConfig
-
-from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
-from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (LinearMethodBase,
-                                               MergedColumnParallelLinear,
-                                               QKVParallelLinear,
-                                               RowParallelLinear)
-from vllm.model_executor.layers.rotary_embedding import get_rope
-from vllm.model_executor.layers.sampler import Sampler
-from vllm.model_executor.layers.vocab_parallel_embedding import (
-    VocabParallelEmbedding, ParallelLMHead, DEFAULT_VOCAB_PADDING_SIZE)
-from vllm.model_executor.parallel_utils.parallel_state import (
-    get_tensor_model_parallel_world_size)
-from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.model_executor.weight_utils import (default_weight_loader,
-                                              hf_model_weights_iterator)
-from vllm.sequence import SamplerOutput
-from vllm.config import LoRAConfig
-
-KVCache = Tuple[torch.Tensor, torch.Tensor]
-
-
-class MistralMLP(nn.Module):
-
-    def __init__(
-        self,
-        hidden_size: int,
-        intermediate_size: int,
-        hidden_act: str,
-        linear_method: Optional[LinearMethodBase] = None,
-    ) -> None:
-        super().__init__()
-        self.gate_up_proj = MergedColumnParallelLinear(
-            hidden_size, [intermediate_size] * 2,
-            bias=False,
-            linear_method=linear_method)
-        self.down_proj = RowParallelLinear(intermediate_size,
-                                           hidden_size,
-                                           bias=False,
-                                           linear_method=linear_method)
-        if hidden_act != "silu":
-            raise ValueError(f"Unsupported activation: {hidden_act}. "
-                             "Only silu is supported for now.")
-        self.act_fn = SiluAndMul()
-
-    def forward(self, x):
-        gate_up, _ = self.gate_up_proj(x)
-        x = self.act_fn(gate_up)
-        x, _ = self.down_proj(x)
-        return x
-
-
-class MistralAttention(nn.Module):
-
-    def __init__(self,
-                 hidden_size: int,
-                 num_heads: int,
-                 num_kv_heads: int,
-                 max_position: int = 4096 * 32,
-                 rope_theta: float = 10000,
-                 linear_method: Optional[LinearMethodBase] = None,
-                 sliding_window: Optional[int] = None) -> None:
-        super().__init__()
-        self.hidden_size = hidden_size
-        tp_size = get_tensor_model_parallel_world_size()
-        self.total_num_heads = num_heads
-        assert self.total_num_heads % tp_size == 0
-        self.num_heads = self.total_num_heads // tp_size
-        self.total_num_kv_heads = num_kv_heads
-        if self.total_num_kv_heads >= tp_size:
-            # Number of KV heads is greater than TP size, so we partition
-            # the KV heads across multiple tensor parallel GPUs.
-            assert self.total_num_kv_heads % tp_size == 0
-        else:
-            # Number of KV heads is less than TP size, so we replicate
-            # the KV heads across multiple tensor parallel GPUs.
-            assert tp_size % self.total_num_kv_heads == 0
-        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
-        self.head_dim = hidden_size // self.total_num_heads
-        self.q_size = self.num_heads * self.head_dim
-        self.kv_size = self.num_kv_heads * self.head_dim
-        self.scaling = self.head_dim**-0.5
-        self.rope_theta = rope_theta
-        self.sliding_window = sliding_window
-
-        self.qkv_proj = QKVParallelLinear(
-            hidden_size,
-            self.head_dim,
-            self.total_num_heads,
-            self.total_num_kv_heads,
-            bias=False,
-            linear_method=linear_method,
-        )
-        self.o_proj = RowParallelLinear(
-            self.total_num_heads * self.head_dim,
-            hidden_size,
-            bias=False,
-            linear_method=linear_method,
-        )
-
-        self.rotary_emb = get_rope(
-            self.head_dim,
-            rotary_dim=self.head_dim,
-            max_position=max_position,
-            base=self.rope_theta,
-        )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_kv_heads,
-                                   sliding_window=self.sliding_window)
-
-    def forward(
-        self,
-        positions: torch.Tensor,
-        hidden_states: torch.Tensor,
-        kv_cache: KVCache,
-        input_metadata: InputMetadata,
-    ) -> torch.Tensor:
-        qkv, _ = self.qkv_proj(hidden_states)
-        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
-        q, k = self.rotary_emb(positions, q, k)
-        k_cache, v_cache = kv_cache
-        attn_output = self.attn(q, k, v, k_cache, v_cache, input_metadata)
-        output, _ = self.o_proj(attn_output)
-        return output
-
-
-class MistralDecoderLayer(nn.Module):
-
-    def __init__(
-        self,
-        config: MistralConfig,
-        linear_method: Optional[LinearMethodBase] = None,
-    ) -> None:
-        super().__init__()
-        self.hidden_size = config.hidden_size
-        # Requires transformers > 4.32.0
-        rope_theta = getattr(config, "rope_theta", 10000)
-        self.self_attn = MistralAttention(
-            hidden_size=self.hidden_size,
-            num_heads=config.num_attention_heads,
-            max_position=config.max_position_embeddings,
-            num_kv_heads=config.num_key_value_heads,
-            rope_theta=rope_theta,
-            linear_method=linear_method,
-            sliding_window=config.sliding_window)
-        self.mlp = MistralMLP(
-            hidden_size=self.hidden_size,
-            intermediate_size=config.intermediate_size,
-            hidden_act=config.hidden_act,
-            linear_method=linear_method,
-        )
-        self.input_layernorm = RMSNorm(config.hidden_size,
-                                       eps=config.rms_norm_eps)
-        self.post_attention_layernorm = RMSNorm(config.hidden_size,
-                                                eps=config.rms_norm_eps)
-
-    def forward(
-        self,
-        positions: torch.Tensor,
-        hidden_states: torch.Tensor,
-        kv_cache: KVCache,
-        input_metadata: InputMetadata,
-        residual: Optional[torch.Tensor],
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Self Attention
-        if residual is None:
-            residual = hidden_states
-            hidden_states = self.input_layernorm(hidden_states)
-        else:
-            hidden_states, residual = self.input_layernorm(
-                hidden_states, residual)
-        hidden_states = self.self_attn(
-            positions=positions,
-            hidden_states=hidden_states,
-            kv_cache=kv_cache,
-            input_metadata=input_metadata,
-        )
-
-        # Fully Connected
-        hidden_states, residual = self.post_attention_layernorm(
-            hidden_states, residual)
-        hidden_states = self.mlp(hidden_states)
-        return hidden_states, residual
-
-
-class MistralModel(nn.Module):
-
-    def __init__(
-        self,
-        config: MistralConfig,
-        linear_method: Optional[LinearMethodBase] = None,
-        lora_config: Optional[LoRAConfig] = None,
-    ) -> None:
-        super().__init__()
-        self.config = config
-        self.padding_idx = config.pad_token_id
-        lora_vocab = (lora_config.lora_extra_vocab_size *
-                      (lora_config.max_loras or 1)) if lora_config else 0
-        self.vocab_size = config.vocab_size + lora_vocab
-        self.org_vocab_size = config.vocab_size
-
-        self.embed_tokens = VocabParallelEmbedding(
-            self.vocab_size,
-            config.hidden_size,
-            org_num_embeddings=config.vocab_size,
-        )
-        self.layers = nn.ModuleList([
-            MistralDecoderLayer(config, linear_method)
-            for _ in range(config.num_hidden_layers)
-        ])
-        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        positions: torch.Tensor,
-        kv_caches: List[KVCache],
-        input_metadata: InputMetadata,
-    ) -> torch.Tensor:
-        hidden_states = self.embed_tokens(input_ids)
-        residual = None
-        for i in range(len(self.layers)):
-            layer = self.layers[i]
-            hidden_states, residual = layer(
-                positions,
-                hidden_states,
-                kv_caches[i],
-                input_metadata,
-                residual,
-            )
-        hidden_states, _ = self.norm(hidden_states, residual)
-        return hidden_states
-
-
-class MistralForCausalLM(nn.Module):
-    packed_modules_mapping = {
-        "qkv_proj": [
-            "q_proj",
-            "k_proj",
-            "v_proj",
-        ],
-        "gate_up_proj": [
-            "gate_proj",
-            "up_proj",
-        ],
-    }
-
-    # LoRA specific attributes
-    supported_lora_modules = [
-        "qkv_proj",
-        "o_proj",
-        "gate_up_proj",
-        "down_proj",
-        "embed_tokens",
-        "lm_head",
-    ]
-    embedding_modules = {
-        "embed_tokens": "input_embeddings",
-        "lm_head": "output_embeddings",
-    }
-    embedding_padding_modules = ["lm_head"]
-
-    def __init__(
-        self,
-        config: MistralConfig,
-        linear_method: Optional[LinearMethodBase] = None,
-        lora_config: Optional[LoRAConfig] = None,
-    ) -> None:
-        super().__init__()
-        self.config = config
-        self.linear_method = linear_method
-        self.model = MistralModel(config,
-                                  linear_method,
-                                  lora_config=lora_config)
-        unpadded_vocab_size = config.vocab_size
-        if lora_config:
-            unpadded_vocab_size += lora_config.lora_extra_vocab_size
-        self.lm_head = ParallelLMHead(
-            unpadded_vocab_size,
-            config.hidden_size,
-            org_num_embeddings=config.vocab_size,
-            padding_size=DEFAULT_VOCAB_PADDING_SIZE
-            # We need bigger padding if using lora for kernel
-            # compatibility
-            if not lora_config else lora_config.lora_vocab_padding_size,
-        )
-        self.sampler = Sampler(unpadded_vocab_size, config.vocab_size)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        positions: torch.Tensor,
-        kv_caches: List[KVCache],
-        input_metadata: InputMetadata,
-    ) -> torch.Tensor:
-        hidden_states = self.model(input_ids, positions, kv_caches,
-                                   input_metadata)
-        return hidden_states
-
-    def sample(
-        self,
-        hidden_states: torch.Tensor,
-        sampling_metadata: SamplingMetadata,
-    ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
-        return next_tokens
-
-    def load_weights(self,
-                     model_name_or_path: str,
-                     cache_dir: Optional[str] = None,
-                     load_format: str = "auto",
-                     revision: Optional[str] = None):
-        stacked_params_mapping = [
-            # (param_name, shard_name, shard_id)
-            ("qkv_proj", "q_proj", "q"),
-            ("qkv_proj", "k_proj", "k"),
-            ("qkv_proj", "v_proj", "v"),
-            ("gate_up_proj", "gate_proj", 0),
-            ("gate_up_proj", "up_proj", 1),
-        ]
-        params_dict = dict(self.named_parameters())
-        for name, loaded_weight in hf_model_weights_iterator(
-                model_name_or_path, cache_dir, load_format, revision):
-            if "rotary_emb.inv_freq" in name:
-                continue
-            for (param_name, weight_name, shard_id) in stacked_params_mapping:
-                if weight_name not in name:
-                    continue
-                name = name.replace(weight_name, param_name)
-                # Skip loading extra bias for GPTQ models.
-                if name.endswith(".bias") and name not in params_dict:
-                    continue
-                param = params_dict[name]
-                weight_loader = param.weight_loader
-                weight_loader(param, loaded_weight, shard_id)
-                break
-            else:
-                # Skip loading extra bias for GPTQ models.
-                if name.endswith(".bias") and name not in params_dict:
-                    continue
-                param = params_dict[name]
-                weight_loader = getattr(param, "weight_loader",
-                                        default_weight_loader)
-                weight_loader(param, loaded_weight)

From 95529e32537287831cddd800280a20d7c2417163 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Wed, 21 Feb 2024 18:28:23 -0800
Subject: [PATCH 004/196] Use Llama RMSNorm custom op for Gemma (#2974)

---
 vllm/model_executor/models/gemma.py | 60 +++++++++++++----------------
 1 file changed, 27 insertions(+), 33 deletions(-)

diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
index affe54c448a2c..03bd149c001d3 100644
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -22,6 +22,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -40,21 +41,6 @@
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 
 
-class GemmaRMSNorm(nn.Module):
-
-    def __init__(self, dim: int, eps: float = 1e-6):
-        super().__init__()
-        self.eps = eps
-        self.weight = nn.Parameter(torch.zeros(dim))
-
-    def _norm(self, x):
-        return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
-
-    def forward(self, x):
-        output = self._norm(x.float()).type_as(x)
-        return output * (1 + self.weight)
-
-
 class GemmaMLP(nn.Module):
 
     def __init__(
@@ -185,10 +171,10 @@ def __init__(
             intermediate_size=config.intermediate_size,
             linear_method=linear_method,
         )
-        self.input_layernorm = GemmaRMSNorm(config.hidden_size,
-                                            eps=config.rms_norm_eps)
-        self.post_attention_layernorm = GemmaRMSNorm(config.hidden_size,
-                                                     eps=config.rms_norm_eps)
+        self.input_layernorm = RMSNorm(config.hidden_size,
+                                       eps=config.rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size,
+                                                eps=config.rms_norm_eps)
 
     def forward(
         self,
@@ -196,25 +182,27 @@ def forward(
         hidden_states: torch.Tensor,
         kv_cache: KVCache,
         input_metadata: InputMetadata,
+        residual: Optional[torch.Tensor],
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         # Self Attention
-        residual = hidden_states
-        hidden_states = self.input_layernorm(hidden_states)
+        if residual is None:
+            residual = hidden_states
+            hidden_states = self.input_layernorm(hidden_states)
+        else:
+            hidden_states, residual = self.input_layernorm(
+                hidden_states, residual)
         hidden_states = self.self_attn(
             positions=positions,
             hidden_states=hidden_states,
             kv_cache=kv_cache,
             input_metadata=input_metadata,
         )
-        hidden_states = residual + hidden_states
 
         # Fully Connected
-        residual = hidden_states
-        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states, residual = self.post_attention_layernorm(
+            hidden_states, residual)
         hidden_states = self.mlp(hidden_states)
-        hidden_states = residual + hidden_states
-
-        return hidden_states
+        return hidden_states, residual
 
 
 class GemmaModel(nn.Module):
@@ -235,7 +223,7 @@ def __init__(
             GemmaDecoderLayer(config, linear_method)
             for _ in range(config.num_hidden_layers)
         ])
-        self.norm = GemmaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
 
     def forward(
         self,
@@ -246,17 +234,19 @@ def forward(
     ) -> torch.Tensor:
         hidden_states = self.embed_tokens(input_ids)
         # Normalize the embedding by sqrt(hidden_size)
-        hidden_states = hidden_states * (self.config.hidden_size**0.5)
+        hidden_states *= self.config.hidden_size**0.5
 
+        residual = None
         for i in range(len(self.layers)):
             layer = self.layers[i]
-            hidden_states = layer(
+            hidden_states, residual = layer(
                 positions,
                 hidden_states,
                 kv_caches[i],
                 input_metadata,
+                residual,
             )
-        hidden_states = self.norm(hidden_states)
+        hidden_states, _ = self.norm(hidden_states, residual)
         return hidden_states
 
 
@@ -321,6 +311,10 @@ def load_weights(self,
                 # Skip loading extra layer for lora models.
                 if "lm_head" in name:
                     continue
+                # GemmaRMSNorm is different from Llama's in that it multiplies
+                # (1 + weight) to the output, instead of just weight.
+                if "norm.weight" in name:
+                    loaded_weight += 1.0
                 param = params_dict[name]
                 weight_loader = getattr(param, "weight_loader",
                                         default_weight_loader)
@@ -329,5 +323,5 @@ def load_weights(self,
         unloaded_params = params_dict.keys() - loaded_params
         if unloaded_params:
             raise RuntimeError(
-                f"Some weights are not initialized from checkpoints: {unloaded_params}"
-            )
+                "Some weights are not initialized from checkpoints: "
+                f"{unloaded_params}")

From 93dc5a287086299a124e9f1f6fac75458ae0acbd Mon Sep 17 00:00:00 2001
From: Massimiliano Pronesti <massimiliano.pronesti@gmail.com>
Date: Thu, 22 Feb 2024 02:56:01 +0000
Subject: [PATCH 005/196] chore(vllm): codespell for spell checking  (#2820)

---
 .github/workflows/ruff.yml                    |  5 +-
 benchmarks/benchmark_serving.py               |  2 +-
 format.sh                                     | 51 +++++++++++++++++--
 mypy.ini                                      |  8 ---
 pyproject.toml                                | 18 +++++++
 requirements-dev.txt                          |  2 +
 tests/lora/test_layers.py                     |  2 +-
 tests/lora/test_llama.py                      |  4 +-
 vllm/core/block_manager.py                    |  2 +-
 vllm/core/scheduler.py                        |  2 +-
 vllm/lora/punica.py                           |  2 +-
 .../layers/triton_kernel/prefix_prefill.py    |  2 +-
 vllm/model_executor/models/decilm.py          |  2 +-
 .../parallel_utils/custom_all_reduce.py       |  4 +-
 .../parallel_utils/parallel_state.py          |  2 +-
 vllm/utils.py                                 |  2 +-
 16 files changed, 85 insertions(+), 25 deletions(-)
 delete mode 100644 mypy.ini

diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index bd38d11872dc4..8f8f5ee3cc70c 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -25,7 +25,10 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install ruff==0.1.5
+        pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1
     - name: Analysing the code with ruff
       run: |
         ruff vllm tests
+    - name: Spelling check with codespell
+      run: |
+         codespell --toml pyproject.toml
\ No newline at end of file
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index ff5609c37febf..7d389a9c7d703 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -375,7 +375,7 @@ def main(args: argparse.Namespace):
     parser.add_argument(
         "--disable-tqdm",
         action="store_true",
-        help="Specify to disbale tqdm progress bar.",
+        help="Specify to disable tqdm progress bar.",
     )
     parser.add_argument(
         "--save-result",
diff --git a/format.sh b/format.sh
index c78108869659d..eb2c5ab031626 100755
--- a/format.sh
+++ b/format.sh
@@ -24,6 +24,7 @@ builtin cd "$ROOT" || exit 1
 YAPF_VERSION=$(yapf --version | awk '{print $2}')
 RUFF_VERSION=$(ruff --version | awk '{print $2}')
 MYPY_VERSION=$(mypy --version | awk '{print $2}')
+CODESPELL_VERSION=$(codespell --version)
 
 # # params: tool name, tool version, required version
 tool_version_check() {
@@ -36,6 +37,7 @@ tool_version_check() {
 tool_version_check "yapf" $YAPF_VERSION "$(grep yapf requirements-dev.txt | cut -d'=' -f3)"
 tool_version_check "ruff" $RUFF_VERSION "$(grep "ruff==" requirements-dev.txt | cut -d'=' -f3)"
 tool_version_check "mypy" "$MYPY_VERSION" "$(grep mypy requirements-dev.txt | cut -d'=' -f3)"
+tool_version_check "codespell" "$CODESPELL_VERSION" "$(grep codespell requirements-dev.txt | cut -d'=' -f3)"
 
 YAPF_FLAGS=(
     '--recursive'
@@ -93,6 +95,47 @@ echo 'vLLM yapf: Done'
 # echo 'vLLM mypy:'
 # mypy
 
+# check spelling of specified files
+spell_check() {
+    codespell "$@"
+}
+
+spell_check_all(){
+  codespell --toml pyproject.toml
+}
+
+# Spelling  check of files that differ from main branch.
+spell_check_changed() {
+    # The `if` guard ensures that the list of filenames is not empty, which
+    # could cause ruff to receive 0 positional arguments, making it hang
+    # waiting for STDIN.
+    #
+    # `diff-filter=ACM` and $MERGEBASE is to ensure we only lint files that
+    # exist on both branches.
+    MERGEBASE="$(git merge-base origin/main HEAD)"
+
+    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
+        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
+             codespell
+    fi
+}
+
+# Run Codespell
+## This flag runs spell check of individual files. --files *must* be the first command line
+## arg to use this option.
+if [[ "$1" == '--files' ]]; then
+   spell_check "${@:2}"
+   # If `--all` is passed, then any further arguments are ignored and the
+   # entire python directory is linted.
+elif [[ "$1" == '--all' ]]; then
+   spell_check_all
+else
+   # Check spelling only of the files that changed in last commit.
+   spell_check_changed
+fi
+echo 'vLLM codespell: Done'
+
+
 # Lint specified files
 lint() {
     ruff "$@"
@@ -117,9 +160,9 @@ lint_changed() {
 }
 
 # Run Ruff
-echo 'vLLM Ruff:'
-## This flag lints individual files. --files *must* be the first command line
-## arg to use this option.
+echo 'vLLM ruff:'
+### This flag lints individual files. --files *must* be the first command line
+### arg to use this option.
 if [[ "$1" == '--files' ]]; then
    lint "${@:2}"
    # If `--all` is passed, then any further arguments are ignored and the
@@ -139,3 +182,5 @@ if ! git diff --quiet &>/dev/null; then
 
     exit 1
 fi
+
+
diff --git a/mypy.ini b/mypy.ini
deleted file mode 100644
index 55c4248ea9d26..0000000000000
--- a/mypy.ini
+++ /dev/null
@@ -1,8 +0,0 @@
-[mypy]
-python_version = 3.8
-
-ignore_missing_imports = True
-
-files = vllm
-# TODO(woosuk): Include the code from Megatron and HuggingFace.
-exclude = vllm/model_executor/parallel_utils/|vllm/model_executor/models/
diff --git a/pyproject.toml b/pyproject.toml
index b197256f6ff55..c5db016cebdb7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,4 +31,22 @@ ignore = [
     "E731",
     # line too long, handled by black formatting
     "E501",
+    # .strip() with multi-character strings
+    "B005",
+    # Loop control variable not used within loop body
+    "B007",
 ]
+
+[tool.mypy]
+python_version = "3.8"
+
+ignore_missing_imports = true
+
+files = "vllm"
+# TODO(woosuk): Include the code from Megatron and HuggingFace.
+exclude = "vllm/model_executor/parallel_utils/|vllm/model_executor/models/"
+
+
+[tool.codespell]
+ignore-words-list = "dout, te, indicies"
+skip = "./tests/prompts"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index f8126008d0794..b54a2773249cf 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,7 +1,9 @@
 # formatting
 yapf==0.32.0
 toml==0.10.2
+tomli==2.0.1
 ruff==0.1.5
+codespell==2.2.6
 
 # type checking
 mypy==0.991
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index f739bbeaab334..18ce300449dbf 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -279,7 +279,7 @@ def create_random_embedding_layer():
             256,
             org_num_embeddings=512)
         expanded_embedding.weight.data[:512, :] = embedding_data
-        # We need to deepcopy the embedding as it will be modifed
+        # We need to deepcopy the embedding as it will be modified
         # in place
         lora_embedding = VocabParallelEmbeddingWithLoRA(
             deepcopy(expanded_embedding))
diff --git a/tests/lora/test_llama.py b/tests/lora/test_llama.py
index 06fbf19eea824..dfaf8c700695a 100644
--- a/tests/lora/test_llama.py
+++ b/tests/lora/test_llama.py
@@ -15,7 +15,7 @@ def do_sample(llm, lora_path: str, lora_id: int):
         "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]",
         "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]",
         "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]",
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]"
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]"
     ]
     sampling_params = vllm.SamplingParams(temperature=0,
                                           max_tokens=256,
@@ -53,7 +53,7 @@ def test_llama_lora(sql_lora_files, tp_size):
         "\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m",
         " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ",
         " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ",
-        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the womens doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
+        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
     ]
     expected_lora_output = [
         "  SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 7f91051f03ac1..3946096d4296a 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -178,7 +178,7 @@ def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]:
         if len(block_table) < len(logical_blocks):
             if (self.block_sliding_window
                     and len(block_table) >= self.block_sliding_window):
-                # re-use a block
+                # reuse a block
                 block_table.append(block_table[len(block_table) %
                                                self.block_sliding_window])
             else:
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index f4ac2d6dc59fe..5e7cc3091d775 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -158,7 +158,7 @@ def get_num_unfinished_seq_groups(self) -> int:
         return len(self.waiting) + len(self.running) + len(self.swapped)
 
     def _schedule(self) -> SchedulerOutputs:
-        # Blocks that need to be swaped or copied before model execution.
+        # Blocks that need to be swapped or copied before model execution.
         blocks_to_swap_in: Dict[int, int] = {}
         blocks_to_swap_out: Dict[int, int] = {}
         blocks_to_copy: Dict[int, List[int]] = {}
diff --git a/vllm/lora/punica.py b/vllm/lora/punica.py
index 307a33dcf2820..fc74269e55876 100644
--- a/vllm/lora/punica.py
+++ b/vllm/lora/punica.py
@@ -87,7 +87,7 @@ def add_lora(y: torch.Tensor,
     r = wb_t_all.size(-1)
     if buffer is None:
         # We set the buffer to be float32 by default to avoid
-        # numerical innacuracies that would otherwise happen
+        # numerical inaccuracies that would otherwise happen
         # due to downcasting.
         buffer = torch.zeros((x.size(0), r),
                              dtype=torch.float32,
diff --git a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
index ba40d42307fab..a1a2ab0c4805c 100644
--- a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
+++ b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
@@ -537,7 +537,7 @@ def _fwd_kernel_alibi(
         alibi_start_q = tl.arange(
             0, BLOCK_M) + block_start_loc + cur_batch_ctx_len
         alibi_start_k = cur_batch_ctx_len
-        # # init debuger
+        # # init debugger
         # offset_db_q = tl.arange(0, BLOCK_M) + block_start_loc
         # offset_db_k = tl.arange(0, BLOCK_N)
         # calc q[BLOCK_M, BLOCK_MODEL] mul k[prefix_len: , BLOCK_DMODEL]
diff --git a/vllm/model_executor/models/decilm.py b/vllm/model_executor/models/decilm.py
index 07aa4b72bf7a0..abf4a462871b0 100644
--- a/vllm/model_executor/models/decilm.py
+++ b/vllm/model_executor/models/decilm.py
@@ -41,7 +41,7 @@ class DeciLMForCausalLM(LlamaForCausalLM):
     Based on the llama executor.
 
     The main difference is that DeciLM uses Variable Grouped Query Attention.
-    The constant number of GQA heads in the decoder is overriden with a value
+    The constant number of GQA heads in the decoder is overridden with a value
     per layer.
 
     Usually, in the HuggingFace implementation, instead of
diff --git a/vllm/model_executor/parallel_utils/custom_all_reduce.py b/vllm/model_executor/parallel_utils/custom_all_reduce.py
index ce4c8d02f7694..0c749c0484fc5 100644
--- a/vllm/model_executor/parallel_utils/custom_all_reduce.py
+++ b/vllm/model_executor/parallel_utils/custom_all_reduce.py
@@ -36,14 +36,14 @@ def init_custom_ar() -> None:
     if world_size not in _SUPPORTED_WORLD_SIZES:
         logger.warn(
             "Custom allreduce is disabled due to an unsupported world size: "
-            "%d. Supported world sizes: %s. To slience this warning, specify"
+            "%d. Supported world sizes: %s. To silence this warning, specify"
             "disable_custom_all_reduce=True explicitly.", world_size,
             str(_SUPPORTED_WORLD_SIZES))
         return
     if not _can_p2p(rank, world_size):
         logger.warn(
             "Custom allreduce is disabled because your platform lacks GPU P2P"
-            " capability. To slience this warning, specify"
+            " capability. To silence this warning, specify"
             "disable_custom_all_reduce=True explicitly.")
         return
     _CA_HANDLE = CustomAllreduce(rank, world_size)
diff --git a/vllm/model_executor/parallel_utils/parallel_state.py b/vllm/model_executor/parallel_utils/parallel_state.py
index aeb07f64c37dc..c821936d06e4e 100644
--- a/vllm/model_executor/parallel_utils/parallel_state.py
+++ b/vllm/model_executor/parallel_utils/parallel_state.py
@@ -189,7 +189,7 @@ def get_pipeline_model_parallel_next_rank():
 
 
 def get_pipeline_model_parallel_prev_rank():
-    """Return the global rank that preceeds the caller in the pipeline"""
+    """Return the global rank that precedes the caller in the pipeline"""
     assert _PIPELINE_GLOBAL_RANKS is not None, (
         "Pipeline parallel group is not initialized")
     rank_in_pipeline = get_pipeline_model_parallel_rank()
diff --git a/vllm/utils.py b/vllm/utils.py
index d7a3a3a2a9ef9..6206879929061 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -204,7 +204,7 @@ def _generate_random_fp8_e5m2(
     # NOTE(zhaoyang): Due to NaN and Inf representation for fp8 data type,
     # it may occur Inf or NaN if we directly use torch.randint
     # to generate random data for fp8 data.
-    # For example, s.11111.00 in fp8e5m2 format repesents Inf.
+    # For example, s.11111.00 in fp8e5m2 format represents Inf.
     #     | E4M3        | E5M2
     #-----|-------------|-------------------
     # Inf | N/A         | s.11111.00

From fd5dcc5c816b7392821d3d4c02b13a7cf820d962 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Wed, 21 Feb 2024 20:17:52 -0800
Subject: [PATCH 006/196] Optimize GeGLU layer in Gemma (#2975)

---
 csrc/activation_kernels.cu               | 73 ++++++++++++++++--------
 csrc/ops.h                               |  4 ++
 csrc/pybind.cpp                          |  4 ++
 tests/kernels/test_activation.py         | 50 +++++-----------
 vllm/model_executor/layers/activation.py | 23 ++++++++
 vllm/model_executor/models/gemma.py      | 31 +++++-----
 6 files changed, 108 insertions(+), 77 deletions(-)

diff --git a/csrc/activation_kernels.cu b/csrc/activation_kernels.cu
index 5ba9ab178d5a4..22b10f0571d1c 100644
--- a/csrc/activation_kernels.cu
+++ b/csrc/activation_kernels.cu
@@ -2,19 +2,16 @@
 #include <torch/extension.h>
 #include <c10/cuda/CUDAGuard.h>
 
+#include <cmath>
+
 #include "cuda_compat.h"
 #include "dispatch_utils.h"
 
 namespace vllm {
 
-template<typename T>
-__device__ __forceinline__ T silu(const T& x) {
-  // x * sigmoid(x)
-  return (T) (((float) x) / (1.0f + expf((float) -x)));
-}
-
-template<typename scalar_t>
-__global__ void silu_and_mul_kernel(
+// Activation and gating kernel template.
+template<typename scalar_t, scalar_t (*ACT_FN)(const scalar_t&)>
+__global__ void act_and_mul_kernel(
   scalar_t* __restrict__ out,               // [..., d]
   const scalar_t* __restrict__ input,       // [..., 2, d]
   const int d) {
@@ -22,32 +19,58 @@ __global__ void silu_and_mul_kernel(
   for (int64_t idx = threadIdx.x; idx < d; idx += blockDim.x) {
     const scalar_t x = VLLM_LDG(&input[token_idx * 2 * d + idx]);
     const scalar_t y = VLLM_LDG(&input[token_idx * 2 * d + d + idx]);
-    out[token_idx * d + idx] = silu(x) * y;
+    out[token_idx * d + idx] = ACT_FN(x) * y;
   }
 }
 
+template<typename T>
+__device__ __forceinline__ T silu_kernel(const T& x) {
+  // x * sigmoid(x)
+  return (T) (((float) x) / (1.0f + expf((float) -x)));
+}
+
+template<typename T>
+__device__ __forceinline__ T gelu_kernel(const T& x) {
+  // Equivalent to PyTorch GELU with 'none' approximation.
+  // Refer to:
+  // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L38
+  const float f = (float) x;
+  constexpr float ALPHA = M_SQRT1_2;
+  return (T) (f * 0.5f * (1.0f + ::erf(f * ALPHA)));
+}
+
 } // namespace vllm
 
+// Launch activation and gating kernel.
+#define LAUNCH_ACTIVATION_GATE_KERNEL(KERNEL)                                             \
+  int d = input.size(-1) / 2;                                                             \
+  int64_t num_tokens = input.numel() / input.size(-1);                                    \
+  dim3 grid(num_tokens);                                                                  \
+  dim3 block(std::min(d, 1024));                                                          \
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));                       \
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();                           \
+  VLLM_DISPATCH_FLOATING_TYPES(                                                           \
+    input.scalar_type(),                                                                  \
+    "act_and_mul_kernel",                                                                 \
+    [&] {                                                                                 \
+      vllm::act_and_mul_kernel<scalar_t, KERNEL<scalar_t>><<<grid, block, 0, stream>>>(   \
+        out.data_ptr<scalar_t>(),                                                         \
+        input.data_ptr<scalar_t>(),                                                       \
+        d);                                                                               \
+    });
+
 void silu_and_mul(
   torch::Tensor& out,      // [..., d]
   torch::Tensor& input)    // [..., 2 * d]
 {
-  int64_t num_tokens = input.numel() / input.size(-1);
-  int d = input.size(-1) / 2;
-
-  dim3 grid(num_tokens);
-  dim3 block(std::min(d, 1024));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(input));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_FLOATING_TYPES(
-    input.scalar_type(),
-    "silu_and_mul_kernel",
-    [&] {
-      vllm::silu_and_mul_kernel<scalar_t><<<grid, block, 0, stream>>>(
-        out.data_ptr<scalar_t>(),
-        input.data_ptr<scalar_t>(),
-        d);
-    });
+  LAUNCH_ACTIVATION_GATE_KERNEL(vllm::silu_kernel);
+}
+
+void gelu_and_mul(
+  torch::Tensor& out,      // [..., d]
+  torch::Tensor& input)    // [..., 2 * d]
+{
+  LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_kernel);
 }
 
 namespace vllm {
diff --git a/csrc/ops.h b/csrc/ops.h
index 2bcd0c2efc5c6..dbdd2c2c57945 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -57,6 +57,10 @@ void silu_and_mul(
   torch::Tensor& out,
   torch::Tensor& input);
 
+void gelu_and_mul(
+  torch::Tensor& out,
+  torch::Tensor& input);
+
 void gelu_new(
   torch::Tensor& out,
   torch::Tensor& input);
diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
index b36d259697167..24c22020131e8 100644
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -22,6 +22,10 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
     "silu_and_mul",
     &silu_and_mul,
     "Activation function used in SwiGLU.");
+  ops.def(
+    "gelu_and_mul",
+    &gelu_and_mul,
+    "Activation function used in GeGLU.");
   ops.def(
     "gelu_new",
     &gelu_new,
diff --git a/tests/kernels/test_activation.py b/tests/kernels/test_activation.py
index 8e216c293f070..e0dec144eba11 100644
--- a/tests/kernels/test_activation.py
+++ b/tests/kernels/test_activation.py
@@ -1,7 +1,10 @@
+from typing import Type
+
 import pytest
 import torch
 
-from vllm.model_executor.layers.activation import FastGELU, NewGELU, SiluAndMul
+from vllm.model_executor.layers.activation import (FastGELU, GeluAndMul,
+                                                   NewGELU, SiluAndMul)
 from allclose_default import get_default_atol, get_default_rtol
 
 DTYPES = [torch.half, torch.bfloat16, torch.float]
@@ -13,13 +16,15 @@
 ]
 
 
+@pytest.mark.parametrize("activation", [SiluAndMul, GeluAndMul])
 @pytest.mark.parametrize("num_tokens", NUM_TOKENS)
 @pytest.mark.parametrize("d", D)
 @pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("seed", SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 @torch.inference_mode()
-def test_silu_and_mul(
+def test_act_and_mul(
+    activation: Type[torch.nn.Module],
     num_tokens: int,
     d: int,
     dtype: torch.dtype,
@@ -31,48 +36,23 @@ def test_silu_and_mul(
         torch.cuda.manual_seed(seed)
     torch.set_default_device(device)
     x = torch.randn(num_tokens, 2 * d, dtype=dtype)
-    layer = SiluAndMul()
+    layer = activation()
     out = layer(x)
     ref_out = layer._forward(x)
-    assert torch.allclose(out,
-                          ref_out,
-                          atol=get_default_atol(out),
-                          rtol=get_default_rtol(out))
+    # The SiLU and GELU implementations are equivalent to the native PyTorch
+    # implementations, so we can do exact comparison.
+    assert torch.allclose(out, ref_out, atol=0.0, rtol=0.0)
 
 
+@pytest.mark.parametrize("activation", [FastGELU, NewGELU])
 @pytest.mark.parametrize("num_tokens", NUM_TOKENS)
 @pytest.mark.parametrize("d", D)
 @pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("seed", SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 @torch.inference_mode()
-def test_gelu_new(
-    num_tokens: int,
-    d: int,
-    dtype: torch.dtype,
-    seed: int,
-    device: str,
-) -> None:
-    torch.random.manual_seed(seed)
-    if torch.cuda.is_available():
-        torch.cuda.manual_seed(seed)
-    torch.set_default_device(device)
-    x = torch.randn(num_tokens, d, dtype=dtype)
-    layer = NewGELU()
-    out = layer(x)
-    ref_out = layer._forward(x)
-    assert torch.allclose(out,
-                          ref_out,
-                          atol=get_default_atol(out),
-                          rtol=get_default_rtol(out))
-
-
-@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
-@pytest.mark.parametrize("d", D)
-@pytest.mark.parametrize("dtype", DTYPES)
-@pytest.mark.parametrize("seed", SEEDS)
-@pytest.mark.parametrize("device", CUDA_DEVICES)
-def test_gelu_fast(
+def test_activation(
+    activation: Type[torch.nn.Module],
     num_tokens: int,
     d: int,
     dtype: torch.dtype,
@@ -84,7 +64,7 @@ def test_gelu_fast(
         torch.cuda.manual_seed(seed)
     torch.set_default_device(device)
     x = torch.randn(num_tokens, d, dtype=dtype)
-    layer = FastGELU()
+    layer = activation()
     out = layer(x)
     ref_out = layer._forward(x)
     assert torch.allclose(out,
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
index 95902ae38e256..5a3a7b2dbaee7 100644
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -37,6 +37,29 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return out
 
 
+class GeluAndMul(nn.Module):
+    """An activation function for GeGLU.
+
+    The function computes x -> GELU(x[:d]) * x[d:] where d = x.shape[-1] // 2.
+
+    Shapes:
+        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
+        return: (batch_size, seq_len, d) or (num_tokens, d)
+    """
+
+    def _forward(self, x: torch.Tensor) -> torch.Tensor:
+        """PyTorch-native implementation equivalent to forward()."""
+        d = x.shape[-1] // 2
+        return F.gelu(x[..., :d]) * x[..., d:]
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        d = x.shape[-1] // 2
+        output_shape = (x.shape[:-1] + (d, ))
+        out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
+        ops.gelu_and_mul(out, x)
+        return out
+
+
 class NewGELU(nn.Module):
 
     def _forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
index 03bd149c001d3..d8b515993d8ff 100644
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -21,10 +21,11 @@
 from transformers import GemmaConfig
 
 from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.activation import GeluAndMul
 from vllm.model_executor.layers.attention import PagedAttention
 from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (ColumnParallelLinear,
-                                               LinearMethodBase,
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               MergedColumnParallelLinear,
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
@@ -50,27 +51,21 @@ def __init__(
         linear_method: Optional[LinearMethodBase] = None,
     ) -> None:
         super().__init__()
-        self.gate_proj = ColumnParallelLinear(hidden_size,
-                                              intermediate_size,
-                                              bias=False,
-                                              linear_method=linear_method)
-        self.up_proj = ColumnParallelLinear(hidden_size,
-                                            intermediate_size,
-                                            bias=False,
-                                            linear_method=linear_method)
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size, [intermediate_size] * 2,
+            bias=False,
+            linear_method=linear_method)
         self.down_proj = RowParallelLinear(intermediate_size,
                                            hidden_size,
                                            bias=False,
                                            linear_method=linear_method)
-        self.act_fn = nn.GELU()
+        self.act_fn = GeluAndMul()
 
     def forward(self, x):
-        gate, _ = self.gate_proj(x)
-        gate = self.act_fn(gate)
-        up, _ = self.up_proj(x)
-        fuse = gate * up
-        outputs, _ = self.down_proj(fuse)
-        return outputs
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
 
 
 class GemmaAttention(nn.Module):
@@ -294,6 +289,8 @@ def load_weights(self,
             ("qkv_proj", "q_proj", "q"),
             ("qkv_proj", "k_proj", "k"),
             ("qkv_proj", "v_proj", "v"),
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
         ]
         params_dict = dict(self.named_parameters())
         loaded_params = set()

From c530e2cfe3b3d7e60130ff817cee7f3a395af232 Mon Sep 17 00:00:00 2001
From: 44670 <44670@users.noreply.github.com>
Date: Thu, 22 Feb 2024 17:40:05 +0800
Subject: [PATCH 007/196] [FIX] Fix a bug in initializing Yarn RoPE (#2983)

---
 vllm/model_executor/layers/rotary_embedding.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
index 93ec5c12536fb..87068644112c0 100644
--- a/vllm/model_executor/layers/rotary_embedding.py
+++ b/vllm/model_executor/layers/rotary_embedding.py
@@ -245,13 +245,11 @@ def _yarn_find_correction_range(low_rot: int,
 
 
 def _yarn_linear_ramp_mask(low: float, high: float, dim: int,
-                           dtype: torch.dtype,
-                           device: torch.device) -> torch.Tensor:
+                           dtype: torch.dtype) -> torch.Tensor:
     if low == high:
         high += 0.001  # Prevent singularity
 
-    linear_func = (torch.arange(dim, dtype=dtype, device=device) -
-                   low) / (high - low)
+    linear_func = (torch.arange(dim, dtype=dtype) - low) / (high - low)
     ramp_func = torch.clamp(linear_func, 0, 1)
     return ramp_func
 

From 6f32cddf1c795e74a47e84620462431154718f49 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Thu, 22 Feb 2024 09:58:29 -0800
Subject: [PATCH 008/196] Remove Flash Attention in test env (#2982)

---
 requirements-dev.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index b54a2773249cf..80d66530f47f0 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -17,7 +17,6 @@ pytest-forked
 pytest-asyncio
 httpx
 einops # required for MPT
-flash_attn # required for HuggingFace's llama implementation
 openai
 requests
-ray
\ No newline at end of file
+ray

From 4caf7044e052399f07089aa8f586d5bd641f7d53 Mon Sep 17 00:00:00 2001
From: Ronen Schaffer <ronen.schaffer@ibm.com>
Date: Fri, 23 Feb 2024 00:00:12 +0200
Subject: [PATCH 009/196] Include tokens from prompt phase in
 `counter_generation_tokens` (#2802)

---
 .buildkite/test-pipeline.yaml |  3 +++
 tests/metrics/test_metrics.py | 34 +++++++++++++++++++++++++++++++++-
 vllm/engine/llm_engine.py     |  3 +++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index a91dcdfaf2ea5..efcc4d2d07a12 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -52,6 +52,9 @@ steps:
 - label: LoRA Test
   command: pytest -v -s lora
 
+- label: Metrics Test
+  command: pytest -v -s metrics
+
 - label: Benchmarks
   working_dir: "/vllm-workspace/.buildkite"
   commands:
diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py
index da608a6a18f92..fe09aa8237f24 100644
--- a/tests/metrics/test_metrics.py
+++ b/tests/metrics/test_metrics.py
@@ -9,13 +9,16 @@
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["float"])
 @pytest.mark.parametrize("max_tokens", [128])
-def test_metrics(
+def test_metric_counter_prompt_tokens(
     vllm_runner,
     example_prompts,
     model: str,
     dtype: str,
     max_tokens: int,
 ) -> None:
+    # Reset metric
+    vllm.engine.metrics.counter_prompt_tokens.set_value({}, 0)
+
     vllm_model = vllm_runner(model, dtype=dtype, disable_log_stats=False)
     tokenizer = vllm_model.model.get_tokenizer()
     prompt_token_counts = [len(tokenizer.encode(p)) for p in example_prompts]
@@ -31,3 +34,32 @@ def test_metrics(
     assert vllm_prompt_token_count == metric_count, (
         f"prompt token count: {vllm_prompt_token_count!r}\nmetric: {metric_count!r}"
     )
+
+
+@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize("dtype", ["float"])
+@pytest.mark.parametrize("max_tokens", [128])
+def test_metric_counter_generation_tokens(
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+    max_tokens: int,
+) -> None:
+    # Reset metric
+    vllm.engine.metrics.counter_generation_tokens.set_value({}, 0)
+
+    vllm_model = vllm_runner(model, dtype=dtype, disable_log_stats=False)
+    vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
+    tokenizer = vllm_model.model.get_tokenizer()
+    metric_count = vllm.engine.metrics.counter_generation_tokens.get_value({})
+    vllm_generation_count = 0
+    for i in range(len(example_prompts)):
+        vllm_output_ids, vllm_output_str = vllm_outputs[i]
+        prompt_ids = tokenizer.encode(example_prompts[i])
+        # vllm_output_ids contains both prompt tokens and generation tokens. We're interested only in the count of the generation tokens.
+        vllm_generation_count += len(vllm_output_ids) - len(prompt_ids)
+
+    assert vllm_generation_count == metric_count, (
+        f"generation token count: {vllm_generation_count!r}\nmetric: {metric_count!r}"
+    )
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index f0de40f54db61..81c9281c55416 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -872,6 +872,9 @@ def _get_stats(self,
                 num_prompt_tokens = sum(
                     len(seq_group.prompt_token_ids)
                     for seq_group in scheduler_outputs.scheduled_seq_groups)
+                num_generation_tokens = sum(
+                    seq_group.num_seqs()
+                    for seq_group in scheduler_outputs.scheduled_seq_groups)
             else:
                 num_generation_tokens = scheduler_outputs.num_batched_tokens
 

From 57f044945f25d90d1b434014b2719ba6b06fdc44 Mon Sep 17 00:00:00 2001
From: zhaoyang-star <zhaoyangstar@foxmail.com>
Date: Fri, 23 Feb 2024 06:25:07 +0800
Subject: [PATCH 010/196] Fix nvcc not found in vlm-openai image (#2781)

---
 vllm/config.py |  2 +-
 vllm/utils.py  | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 0b8a2a27f6d43..bd0dc89b585f7 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -319,7 +319,7 @@ def _verify_cache_dtype(self) -> None:
             pass
         elif self.cache_dtype == "fp8_e5m2":
             nvcc_cuda_version = get_nvcc_cuda_version()
-            if nvcc_cuda_version < Version("11.8"):
+            if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
                 raise ValueError(
                     "FP8 is not supported when cuda version is lower than 11.8."
                 )
diff --git a/vllm/utils.py b/vllm/utils.py
index 6206879929061..8ca95e148eb39 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -181,13 +181,18 @@ def set_cuda_visible_devices(device_ids: List[int]) -> None:
     os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, device_ids))
 
 
-def get_nvcc_cuda_version() -> Version:
+def get_nvcc_cuda_version() -> Optional[Version]:
     cuda_home = os.environ.get('CUDA_HOME')
     if not cuda_home:
         cuda_home = '/usr/local/cuda'
-        logger.info(
-            f'CUDA_HOME is not found in the environment. Using {cuda_home} as CUDA_HOME.'
-        )
+        if os.path.isfile(cuda_home + '/bin/nvcc'):
+            logger.info(
+                f'CUDA_HOME is not found in the environment. Using {cuda_home} as CUDA_HOME.'
+            )
+        else:
+            logger.warning(
+                f'Not found nvcc in {cuda_home}. Skip cuda version check!')
+            return None
     nvcc_output = subprocess.check_output([cuda_home + "/bin/nvcc", "-V"],
                                           universal_newlines=True)
     output = nvcc_output.split()

From f7c1234990793008f3d44790fd274040f26c4ee4 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Fri, 23 Feb 2024 12:57:48 -0800
Subject: [PATCH 011/196] [Fix] Fissertion on YaRN model len (#2984)

---
 vllm/model_executor/layers/rotary_embedding.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
index 87068644112c0..13749570f28a2 100644
--- a/vllm/model_executor/layers/rotary_embedding.py
+++ b/vllm/model_executor/layers/rotary_embedding.py
@@ -354,7 +354,6 @@ def get_rope(
         elif scaling_type == "yarn":
             original_max_position = rope_scaling[
                 "original_max_position_embeddings"]
-            assert max_position == original_max_position * scaling_factor
             extra_kwargs = {
                 k: v
                 for k, v in rope_scaling.items()

From ef978fe4111b0eb91c81eceba4d9791b94c7ffbf Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Sun, 25 Feb 2024 19:54:00 +0000
Subject: [PATCH 012/196] Port metrics from `aioprometheus` to
 `prometheus_client` (#2730)

---
 docs/source/conf.py                   |   2 +-
 requirements-neuron.txt               |   2 +-
 requirements-rocm.txt                 |   2 +-
 requirements.txt                      |   2 +-
 tests/conftest.py                     |   2 +
 tests/metrics/test_metrics.py         |  25 ++--
 vllm/engine/llm_engine.py             |   3 +-
 vllm/engine/metrics.py                | 170 ++++++++++++++++----------
 vllm/entrypoints/openai/api_server.py |  12 +-
 9 files changed, 133 insertions(+), 87 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index adbe67b21a0c8..5a45c6f9d1e0a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -72,7 +72,7 @@
 
 # Mock out external dependencies here.
 autodoc_mock_imports = [
-    "torch", "transformers", "psutil", "aioprometheus", "sentencepiece",
+    "torch", "transformers", "psutil", "prometheus_client", "sentencepiece",
     "vllm.cuda_utils", "vllm._C"
 ]
 
diff --git a/requirements-neuron.txt b/requirements-neuron.txt
index 3f30ed08f037d..36e629add664d 100644
--- a/requirements-neuron.txt
+++ b/requirements-neuron.txt
@@ -6,4 +6,4 @@ neuronx-cc
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
-aioprometheus[starlette]
+prometheus_client
diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index 42b89ae84aa45..e759ba7d028d9 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -10,4 +10,4 @@ transformers >= 4.38.0  # Required for Gemma.
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
-aioprometheus[starlette]
+prometheus_client
diff --git a/requirements.txt b/requirements.txt
index de08bd29beaf9..de93ba6354cda 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,7 @@ xformers == 0.0.23.post1  # Required for CUDA 12.1.
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
-aioprometheus[starlette]
+prometheus_client
 pynvml == 11.5.0
 triton >= 2.1.0
 cupy-cuda12x == 12.1.0  # Required for CUDA graphs. CUDA 11.8 users should install cupy-cuda11x instead.
diff --git a/tests/conftest.py b/tests/conftest.py
index 6af9b36b6febe..30a3df89d9f12 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -165,6 +165,7 @@ def __init__(
         dtype: str = "half",
         disable_log_stats: bool = True,
         tensor_parallel_size: int = 1,
+        **kwargs,
     ) -> None:
         self.model = LLM(
             model=model_name,
@@ -174,6 +175,7 @@ def __init__(
             swap_space=0,
             disable_log_stats=disable_log_stats,
             tensor_parallel_size=tensor_parallel_size,
+            **kwargs,
         )
 
     def generate(
diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py
index fe09aa8237f24..410bdfa5c69e2 100644
--- a/tests/metrics/test_metrics.py
+++ b/tests/metrics/test_metrics.py
@@ -1,5 +1,4 @@
 import pytest
-import vllm.engine.metrics
 
 MODELS = [
     "facebook/opt-125m",
@@ -16,10 +15,10 @@ def test_metric_counter_prompt_tokens(
     dtype: str,
     max_tokens: int,
 ) -> None:
-    # Reset metric
-    vllm.engine.metrics.counter_prompt_tokens.set_value({}, 0)
-
-    vllm_model = vllm_runner(model, dtype=dtype, disable_log_stats=False)
+    vllm_model = vllm_runner(model,
+                             dtype=dtype,
+                             disable_log_stats=False,
+                             gpu_memory_utilization=0.4)
     tokenizer = vllm_model.model.get_tokenizer()
     prompt_token_counts = [len(tokenizer.encode(p)) for p in example_prompts]
     # This test needs at least 2 prompts in a batch of different lengths to verify their token count is correct despite padding.
@@ -29,7 +28,9 @@ def test_metric_counter_prompt_tokens(
     vllm_prompt_token_count = sum(prompt_token_counts)
 
     _ = vllm_model.generate_greedy(example_prompts, max_tokens)
-    metric_count = vllm.engine.metrics.counter_prompt_tokens.get_value({})
+    stat_logger = vllm_model.model.llm_engine.stat_logger
+    metric_count = stat_logger.metrics.counter_prompt_tokens.labels(
+        **stat_logger.labels)._value.get()
 
     assert vllm_prompt_token_count == metric_count, (
         f"prompt token count: {vllm_prompt_token_count!r}\nmetric: {metric_count!r}"
@@ -46,13 +47,15 @@ def test_metric_counter_generation_tokens(
     dtype: str,
     max_tokens: int,
 ) -> None:
-    # Reset metric
-    vllm.engine.metrics.counter_generation_tokens.set_value({}, 0)
-
-    vllm_model = vllm_runner(model, dtype=dtype, disable_log_stats=False)
+    vllm_model = vllm_runner(model,
+                             dtype=dtype,
+                             disable_log_stats=False,
+                             gpu_memory_utilization=0.4)
     vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
     tokenizer = vllm_model.model.get_tokenizer()
-    metric_count = vllm.engine.metrics.counter_generation_tokens.get_value({})
+    stat_logger = vllm_model.model.llm_engine.stat_logger
+    metric_count = stat_logger.metrics.counter_generation_tokens.labels(
+        **stat_logger.labels)._value.get()
     vllm_generation_count = 0
     for i in range(len(example_prompts)):
         vllm_output_ids, vllm_output_str = vllm_outputs[i]
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 81c9281c55416..c1a75924c6d72 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -128,7 +128,8 @@ def __init__(
         # Metric Logging.
         if self.log_stats:
             self.stat_logger = StatLogger(
-                local_interval=_LOCAL_LOGGING_INTERVAL_SEC)
+                local_interval=_LOCAL_LOGGING_INTERVAL_SEC,
+                labels=dict(model_name=model_config.model))
 
         self.forward_dag = None
         if USE_RAY_COMPILED_DAG:
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
index e613b9f551b2f..83e66a9372272 100644
--- a/vllm/engine/metrics.py
+++ b/vllm/engine/metrics.py
@@ -1,66 +1,94 @@
 from vllm.logger import init_logger
-from aioprometheus import Counter, Gauge, Histogram
+from prometheus_client import Counter, Gauge, Histogram, REGISTRY, disable_created_metrics
 
 import time
 import numpy as np
-from typing import List
+from typing import Dict, List
 from dataclasses import dataclass
 
 logger = init_logger(__name__)
 
-labels = {}
-
-
-def add_global_metrics_labels(**kwargs):
-    labels.update(kwargs)
-
+disable_created_metrics()
 
 # The begin-* and end* here are used by the documentation generator
 # to extract the metrics definitions.
 
+
 # begin-metrics-definitions
-gauge_avg_prompt_throughput = Gauge("vllm:avg_prompt_throughput_toks_per_s",
-                                    "Average prefill throughput in tokens/s.")
-gauge_avg_generation_throughput = Gauge(
-    "vllm:avg_generation_throughput_toks_per_s",
-    "Average generation throughput in tokens/s.")
-counter_prompt_tokens = Counter("vllm:prompt_tokens_total",
-                                "Number of prefill tokens processed.")
-counter_generation_tokens = Counter("vllm:generation_tokens_total",
-                                    "Number of generation tokens processed.")
-
-gauge_scheduler_running = Gauge(
-    "vllm:num_requests_running",
-    "Number of requests currently running on GPU.")
-gauge_scheduler_swapped = Gauge("vllm:num_requests_swapped",
-                                "Number of requests swapped to CPU.")
-gauge_scheduler_waiting = Gauge("vllm:num_requests_waiting",
-                                "Number of requests waiting to be processed.")
-
-gauge_gpu_cache_usage = Gauge(
-    "vllm:gpu_cache_usage_perc",
-    "GPU KV-cache usage. 1 means 100 percent usage.")
-gauge_cpu_cache_usage = Gauge(
-    "vllm:cpu_cache_usage_perc",
-    "CPU KV-cache usage. 1 means 100 percent usage.")
-
-histogram_time_to_first_token = Histogram(
-    "vllm:time_to_first_token_seconds",
-    "Histogram of time to first token in seconds.",
-    buckets=[
-        0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5, 0.75, 1.0,
-        2.5, 5.0, 7.5, 10.0
-    ])
-histogram_time_per_output_tokens = Histogram(
-    "vllm:time_per_output_token_seconds",
-    "Histogram of time per output token in seconds.",
-    buckets=[
-        0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5
-    ])
-histogram_e2e_request_latency = Histogram(
-    "vllm:e2e_request_latency_seconds",
-    "Histogram of end to end request latency in seconds.",
-    buckets=[1.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0, 40.0, 50.0, 60.0])
+class Metrics:
+
+    def __init__(self, labelnames: List[str]):
+        # Unregister any existing vLLM collectors
+        for collector in list(REGISTRY._collector_to_names):
+            if hasattr(collector, "_name") and "vllm" in collector._name:
+                REGISTRY.unregister(collector)
+
+        # System stats
+        self.gauge_scheduler_running = Gauge(
+            name="vllm:num_requests_running",
+            documentation="Number of requests currently running on GPU.",
+            labelnames=labelnames)
+        self.gauge_scheduler_swapped = Gauge(
+            name="vllm:num_requests_swapped",
+            documentation="Number of requests swapped to CPU.",
+            labelnames=labelnames)
+        self.gauge_scheduler_waiting = Gauge(
+            name="vllm:num_requests_waiting",
+            documentation="Number of requests waiting to be processed.",
+            labelnames=labelnames)
+        self.gauge_gpu_cache_usage = Gauge(
+            name="vllm:gpu_cache_usage_perc",
+            documentation="GPU KV-cache usage. 1 means 100 percent usage.",
+            labelnames=labelnames)
+        self.gauge_cpu_cache_usage = Gauge(
+            name="vllm:cpu_cache_usage_perc",
+            documentation="CPU KV-cache usage. 1 means 100 percent usage.",
+            labelnames=labelnames)
+
+        # Raw stats from last model iteration
+        self.counter_prompt_tokens = Counter(
+            name="vllm:prompt_tokens_total",
+            documentation="Number of prefill tokens processed.",
+            labelnames=labelnames)
+        self.counter_generation_tokens = Counter(
+            name="vllm:generation_tokens_total",
+            documentation="Number of generation tokens processed.",
+            labelnames=labelnames)
+        self.histogram_time_to_first_token = Histogram(
+            name="vllm:time_to_first_token_seconds",
+            documentation="Histogram of time to first token in seconds.",
+            labelnames=labelnames,
+            buckets=[
+                0.001, 0.005, 0.01, 0.02, 0.04, 0.06, 0.08, 0.1, 0.25, 0.5,
+                0.75, 1.0, 2.5, 5.0, 7.5, 10.0
+            ])
+        self.histogram_time_per_output_token = Histogram(
+            name="vllm:time_per_output_token_seconds",
+            documentation="Histogram of time per output token in seconds.",
+            labelnames=labelnames,
+            buckets=[
+                0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75,
+                1.0, 2.5
+            ])
+        self.histogram_e2e_request_latency = Histogram(
+            name="vllm:e2e_request_latency_seconds",
+            documentation="Histogram of end to end request latency in seconds.",
+            labelnames=labelnames,
+            buckets=[1.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0, 40.0, 50.0, 60.0])
+
+        # Legacy metrics
+        self.gauge_avg_prompt_throughput = Gauge(
+            name="vllm:avg_prompt_throughput_toks_per_s",
+            documentation="Average prefill throughput in tokens/s.",
+            labelnames=labelnames,
+        )
+        self.gauge_avg_generation_throughput = Gauge(
+            name="vllm:avg_generation_throughput_toks_per_s",
+            documentation="Average generation throughput in tokens/s.",
+            labelnames=labelnames,
+        )
+
+
 # end-metrics-definitions
 
 
@@ -87,7 +115,7 @@ class Stats:
 class StatLogger:
     """StatLogger is used LLMEngine to log to Promethus and Stdout."""
 
-    def __init__(self, local_interval: float) -> None:
+    def __init__(self, local_interval: float, labels: Dict[str, str]) -> None:
         # Metadata for logging locally.
         self.last_local_log = time.monotonic()
         self.local_interval = local_interval
@@ -96,6 +124,10 @@ def __init__(self, local_interval: float) -> None:
         self.num_prompt_tokens: List[int] = []
         self.num_generation_tokens: List[int] = []
 
+        # Prometheus metrics
+        self.labels = labels
+        self.metrics = Metrics(labelnames=list(labels.keys()))
+
     def _get_throughput(self, tracked_stats: List[int], now: float) -> float:
         return float(np.sum(tracked_stats) / (now - self.last_local_log))
 
@@ -105,23 +137,33 @@ def _local_interval_elapsed(self, now: float) -> bool:
 
     def _log_prometheus(self, stats: Stats) -> None:
         # Set system stat gauges.
-        gauge_scheduler_running.set(labels, stats.num_running)
-        gauge_scheduler_swapped.set(labels, stats.num_swapped)
-        gauge_scheduler_waiting.set(labels, stats.num_waiting)
-        gauge_gpu_cache_usage.set(labels, stats.gpu_cache_usage)
-        gauge_cpu_cache_usage.set(labels, stats.cpu_cache_usage)
+        self.metrics.gauge_scheduler_running.labels(**self.labels).set(
+            stats.num_running)
+        self.metrics.gauge_scheduler_swapped.labels(**self.labels).set(
+            stats.num_swapped)
+        self.metrics.gauge_scheduler_waiting.labels(**self.labels).set(
+            stats.num_waiting)
+        self.metrics.gauge_gpu_cache_usage.labels(**self.labels).set(
+            stats.gpu_cache_usage)
+        self.metrics.gauge_cpu_cache_usage.labels(**self.labels).set(
+            stats.cpu_cache_usage)
 
         # Add to token counters.
-        counter_prompt_tokens.add(labels, stats.num_prompt_tokens)
-        counter_generation_tokens.add(labels, stats.num_generation_tokens)
+        self.metrics.counter_prompt_tokens.labels(**self.labels).inc(
+            stats.num_prompt_tokens)
+        self.metrics.counter_generation_tokens.labels(**self.labels).inc(
+            stats.num_generation_tokens)
 
         # Observe request level latencies in histograms.
         for ttft in stats.time_to_first_tokens:
-            histogram_time_to_first_token.observe(labels, ttft)
+            self.metrics.histogram_time_to_first_token.labels(
+                **self.labels).observe(ttft)
         for tpot in stats.time_per_output_tokens:
-            histogram_time_per_output_tokens.observe(labels, tpot)
+            self.metrics.histogram_time_per_output_token.labels(
+                **self.labels).observe(tpot)
         for e2e in stats.time_e2e_requests:
-            histogram_e2e_request_latency.observe(labels, e2e)
+            self.metrics.histogram_e2e_request_latency.labels(
+                **self.labels).observe(e2e)
 
     def _log_prometheus_interval(self, prompt_throughput: float,
                                  generation_throughput: float) -> None:
@@ -130,8 +172,10 @@ def _log_prometheus_interval(self, prompt_throughput: float,
         # Moving forward, we should use counters like counter_prompt_tokens, counter_generation_tokens
         # Which log raw data and calculate summaries using rate() on the grafana/prometheus side.
         # See https://github.com/vllm-project/vllm/pull/2316#discussion_r1464204666
-        gauge_avg_prompt_throughput.set(labels, prompt_throughput)
-        gauge_avg_generation_throughput.set(labels, generation_throughput)
+        self.metrics.gauge_avg_prompt_throughput.labels(
+            **self.labels).set(prompt_throughput)
+        self.metrics.gauge_avg_generation_throughput.labels(
+            **self.labels).set(generation_throughput)
 
     def log(self, stats: Stats) -> None:
         """Called by LLMEngine.
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index a217605452e3a..b2f040114a078 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -6,8 +6,7 @@
 import importlib
 import inspect
 
-from aioprometheus import MetricsMiddleware
-from aioprometheus.asgi.starlette import metrics
+from prometheus_client import make_asgi_app
 import fastapi
 import uvicorn
 from http import HTTPStatus
@@ -18,7 +17,6 @@
 
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
-from vllm.engine.metrics import add_global_metrics_labels
 from vllm.entrypoints.openai.protocol import CompletionRequest, ChatCompletionRequest, ErrorResponse
 from vllm.logger import init_logger
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
@@ -141,8 +139,9 @@ def parse_args():
     return parser.parse_args()
 
 
-app.add_middleware(MetricsMiddleware)  # Trace HTTP server metrics
-app.add_route("/metrics", metrics)  # Exposes HTTP metrics
+# Add prometheus asgi middleware to route /metrics requests
+metrics_app = make_asgi_app()
+app.mount("/metrics", metrics_app)
 
 
 @app.exception_handler(RequestValidationError)
@@ -242,9 +241,6 @@ async def authentication(request: Request, call_next):
     openai_serving_completion = OpenAIServingCompletion(
         engine, served_model, args.lora_modules)
 
-    # Register labels for metrics
-    add_global_metrics_labels(model_name=engine_args.model)
-
     app.root_path = args.root_path
     uvicorn.run(app,
                 host=args.host,

From 70f3e8e3a1ed081003c0a2b70de151bb144f98e0 Mon Sep 17 00:00:00 2001
From: Jared Moore <27744679+jlcmoore@users.noreply.github.com>
Date: Sun, 25 Feb 2024 18:39:34 -0800
Subject: [PATCH 013/196] Add LogProbs for Chat Completions in OpenAI (#2918)

---
 tests/entrypoints/test_openai_server.py | 25 ++++++++--------
 vllm/entrypoints/openai/protocol.py     |  8 ++++++
 vllm/entrypoints/openai/serving_chat.py | 38 +++++++++++++++++++++++--
 3 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py
index 3a359502c39d5..29d0e6fd537d5 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -155,15 +155,18 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI,
     }]
 
     # test single completion
-    chat_completion = await client.chat.completions.create(
-        model=model_name,
-        messages=messages,
-        max_tokens=10,
-    )
+    chat_completion = await client.chat.completions.create(model=model_name,
+                                                           messages=messages,
+                                                           max_tokens=10,
+                                                           logprobs=True,
+                                                           top_logprobs=10)
     assert chat_completion.id is not None
     assert chat_completion.choices is not None and len(
         chat_completion.choices) == 1
     assert chat_completion.choices[0].message is not None
+    assert chat_completion.choices[0].logprobs is not None
+    assert chat_completion.choices[0].logprobs.top_logprobs is not None
+    assert len(chat_completion.choices[0].logprobs.top_logprobs[0]) == 10
     message = chat_completion.choices[0].message
     assert message.content is not None and len(message.content) >= 10
     assert message.role == "assistant"
@@ -198,13 +201,11 @@ async def test_completion_streaming(server, client: openai.AsyncOpenAI,
     single_output = single_completion.choices[0].text
     single_usage = single_completion.usage
 
-    stream = await client.completions.create(
-        model=model_name,
-        prompt=prompt,
-        max_tokens=5,
-        temperature=0.0,
-        stream=True,
-    )
+    stream = await client.completions.create(model=model_name,
+                                             prompt=prompt,
+                                             max_tokens=5,
+                                             temperature=0.0,
+                                             stream=True)
     chunks = []
     async for chunk in stream:
         chunks.append(chunk.choices[0].text)
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 7c2aa707775ff..f57a2fb775783 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -63,6 +63,8 @@ class ChatCompletionRequest(BaseModel):
     seed: Optional[int] = None
     stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
     stream: Optional[bool] = False
+    logprobs: Optional[bool] = False
+    top_logprobs: Optional[int] = None
     presence_penalty: Optional[float] = 0.0
     frequency_penalty: Optional[float] = 0.0
     logit_bias: Optional[Dict[str, float]] = None
@@ -84,6 +86,8 @@ class ChatCompletionRequest(BaseModel):
     length_penalty: Optional[float] = 1.0
 
     def to_sampling_params(self) -> SamplingParams:
+        if self.logprobs and not self.top_logprobs:
+            raise ValueError("Top logprobs must be set when logprobs is.")
         return SamplingParams(
             n=self.n,
             presence_penalty=self.presence_penalty,
@@ -96,6 +100,8 @@ def to_sampling_params(self) -> SamplingParams:
             stop=self.stop,
             stop_token_ids=self.stop_token_ids,
             max_tokens=self.max_tokens,
+            logprobs=self.top_logprobs if self.logprobs else None,
+            prompt_logprobs=self.top_logprobs if self.echo else None,
             best_of=self.best_of,
             top_k=self.top_k,
             ignore_eos=self.ignore_eos,
@@ -216,6 +222,7 @@ class ChatMessage(BaseModel):
 class ChatCompletionResponseChoice(BaseModel):
     index: int
     message: ChatMessage
+    logprobs: Optional[LogProbs] = None
     finish_reason: Optional[Literal["stop", "length"]] = None
 
 
@@ -236,6 +243,7 @@ class DeltaMessage(BaseModel):
 class ChatCompletionResponseStreamChoice(BaseModel):
     index: int
     delta: DeltaMessage
+    logprobs: Optional[LogProbs] = None
     finish_reason: Optional[Literal["stop", "length"]] = None
 
 
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 850797ae4b9b6..dd152583c2329 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -101,7 +101,10 @@ async def chat_completion_stream_generator(
         role = self.get_chat_request_role(request)
         for i in range(request.n):
             choice_data = ChatCompletionResponseStreamChoice(
-                index=i, delta=DeltaMessage(role=role), finish_reason=None)
+                index=i,
+                delta=DeltaMessage(role=role),
+                logprobs=None,
+                finish_reason=None)
             chunk = ChatCompletionStreamResponse(id=request_id,
                                                  object=chunk_object_type,
                                                  created=created_time,
@@ -118,6 +121,7 @@ async def chat_completion_stream_generator(
                         "content") and request.messages[-1].get(
                             "role") == role:
                 last_msg_content = request.messages[-1]["content"]
+
             if last_msg_content:
                 for i in range(request.n):
                     choice_data = ChatCompletionResponseStreamChoice(
@@ -129,6 +133,7 @@ async def chat_completion_stream_generator(
                         object=chunk_object_type,
                         created=created_time,
                         choices=[choice_data],
+                        logprobs=None,
                         model=model_name)
                     data = chunk.model_dump_json(exclude_unset=True)
                     yield f"data: {data}\n\n"
@@ -145,15 +150,29 @@ async def chat_completion_stream_generator(
                 if finish_reason_sent[i]:
                     continue
 
+                delta_token_ids = output.token_ids[previous_num_tokens[i]:]
+                top_logprobs = output.logprobs[
+                    previous_num_tokens[i]:] if output.logprobs else None
+
+                if request.logprobs:
+                    logprobs = self._create_logprobs(
+                        token_ids=delta_token_ids,
+                        top_logprobs=top_logprobs,
+                        num_output_top_logprobs=request.logprobs,
+                        initial_text_offset=len(previous_texts[i]),
+                    )
+                else:
+                    logprobs = None
+
                 delta_text = output.text[len(previous_texts[i]):]
                 previous_texts[i] = output.text
                 previous_num_tokens[i] = len(output.token_ids)
-
                 if output.finish_reason is None:
                     # Send token-by-token response for each request.n
                     choice_data = ChatCompletionResponseStreamChoice(
                         index=i,
                         delta=DeltaMessage(content=delta_text),
+                        logprobs=logprobs,
                         finish_reason=None)
                     chunk = ChatCompletionStreamResponse(
                         id=request_id,
@@ -174,6 +193,7 @@ async def chat_completion_stream_generator(
                     choice_data = ChatCompletionResponseStreamChoice(
                         index=i,
                         delta=DeltaMessage(content=delta_text),
+                        logprobs=logprobs,
                         finish_reason=output.finish_reason)
                     chunk = ChatCompletionStreamResponse(
                         id=request_id,
@@ -208,11 +228,25 @@ async def chat_completion_full_generator(
         assert final_res is not None
 
         choices = []
+
         role = self.get_chat_request_role(request)
         for output in final_res.outputs:
+            token_ids = output.token_ids
+            top_logprobs = output.logprobs
+
+            if request.logprobs:
+                logprobs = self._create_logprobs(
+                    token_ids=token_ids,
+                    top_logprobs=top_logprobs,
+                    num_output_top_logprobs=request.logprobs,
+                )
+            else:
+                logprobs = None
+
             choice_data = ChatCompletionResponseChoice(
                 index=output.index,
                 message=ChatMessage(role=role, content=output.text),
+                logprobs=logprobs,
                 finish_reason=output.finish_reason,
             )
             choices.append(choice_data)

From cfc15a1031ef0197a1b291d2ed93717a9bdad268 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Mon, 26 Feb 2024 13:48:56 -0800
Subject: [PATCH 014/196] Optimize Triton MoE Kernel (#2979)

Co-authored-by: Cade Daniel <edacih@gmail.com>
---
 benchmarks/kernels/benchmark_mixtral_moe.py   | 172 ++++++++++++++++++
 setup.py                                      |   4 +-
 .../layers/fused_moe/__init__.py              |   5 +
 ...584,device_name=NVIDIA_A100-SXM4-80GB.json |  20 ++
 ...168,device_name=NVIDIA_H100_80GB_HBM3.json |  24 +++
 .../layers/fused_moe/configs/README           |  10 +
 .../layers/{ => fused_moe}/fused_moe.py       |  77 ++++++--
 7 files changed, 297 insertions(+), 15 deletions(-)
 create mode 100644 benchmarks/kernels/benchmark_mixtral_moe.py
 create mode 100644 vllm/model_executor/layers/fused_moe/__init__.py
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/README
 rename vllm/model_executor/layers/{ => fused_moe}/fused_moe.py (85%)

diff --git a/benchmarks/kernels/benchmark_mixtral_moe.py b/benchmarks/kernels/benchmark_mixtral_moe.py
new file mode 100644
index 0000000000000..9e08df76947f8
--- /dev/null
+++ b/benchmarks/kernels/benchmark_mixtral_moe.py
@@ -0,0 +1,172 @@
+import json
+import os
+import sys
+
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
+from vllm.model_executor.layers.fused_moe import fused_moe
+import torch
+import torch.nn.functional as F
+import triton
+
+
+def main():
+    method = fused_moe
+    for bs in [
+            1, 2, 4, 8, 16, 24, 32, 48, 64, 96, 128, 256, 512, 1024, 1536,
+            2048, 3072, 4096
+    ]:
+        run_grid(bs, method=method)
+
+
+def run_grid(bs, method):
+    d_model = 4096
+    num_total_experts = 8
+    top_k = 2
+    tp_size = 2
+    model_intermediate_size = 14336
+    num_layers = 32
+    num_calls = 100
+
+    num_warmup_trials = 1
+    num_trials = 1
+
+    configs = []
+    if bs <= 16:
+        BLOCK_SIZES_M = [16]
+    elif bs <= 32:
+        BLOCK_SIZES_M = [16, 32]
+    elif bs <= 64:
+        BLOCK_SIZES_M = [16, 32, 64]
+    elif bs <= 128:
+        BLOCK_SIZES_M = [16, 32, 64, 128]
+    else:
+        BLOCK_SIZES_M = [16, 32, 64, 128, 256]
+
+    for block_size_n in [32, 64, 128, 256]:
+        for block_size_m in BLOCK_SIZES_M:
+            for block_size_k in [64, 128, 256]:
+                for group_size_m in [1, 16, 32, 64]:
+                    for num_warps in [4, 8]:
+                        configs.append({
+                            "BLOCK_SIZE_M": block_size_m,
+                            "BLOCK_SIZE_N": block_size_n,
+                            "BLOCK_SIZE_K": block_size_k,
+                            "GROUP_SIZE_M": group_size_m,
+                            "num_warps": num_warps,
+                            "num_stages": 4,
+                        })
+
+    best_config = None
+    best_time_us = 1e20
+
+    for config in configs:
+        print(f'{tp_size=} {bs=}')
+        print(f'{config}')
+        # warmup
+        print(f'warming up')
+        try:
+            for _ in range(num_warmup_trials):
+                run_timing(
+                    num_calls=num_calls,
+                    bs=bs,
+                    d_model=d_model,
+                    num_total_experts=num_total_experts,
+                    top_k=top_k,
+                    tp_size=tp_size,
+                    model_intermediate_size=model_intermediate_size,
+                    method=method,
+                    config=config,
+                )
+        except triton.runtime.autotuner.OutOfResources:
+            continue
+
+        # trial
+        print(f'benchmarking')
+        for _ in range(num_trials):
+            kernel_dur_ms = run_timing(
+                num_calls=num_calls,
+                bs=bs,
+                d_model=d_model,
+                num_total_experts=num_total_experts,
+                top_k=top_k,
+                tp_size=tp_size,
+                model_intermediate_size=model_intermediate_size,
+                method=method,
+                config=config,
+            )
+
+            kernel_dur_us = 1000 * kernel_dur_ms
+            model_dur_ms = kernel_dur_ms * num_layers
+
+            if kernel_dur_us < best_time_us:
+                best_config = config
+                best_time_us = kernel_dur_us
+
+            print(
+                f'{kernel_dur_us=:.1f} {model_dur_ms=:.1f} {bs=} {tp_size=} {top_k=} {num_total_experts=} {d_model=} {model_intermediate_size=} {num_layers=}'
+            )
+
+    print("best_time_us", best_time_us)
+    print("best_config", best_config)
+
+    filename = "/tmp/config.jsonl"
+    print(f"writing config to file {filename}")
+    with open(filename, "a") as f:
+        f.write(json.dumps({str(bs): best_config}) + "\n")
+
+
+def run_timing(num_calls: int, bs: int, d_model: int, num_total_experts: int,
+               top_k: int, tp_size: int, model_intermediate_size: int, method,
+               config) -> float:
+    shard_intermediate_size = model_intermediate_size // tp_size
+
+    hidden_states = torch.rand(
+        (bs, d_model),
+        device="cuda:0",
+        dtype=torch.bfloat16,
+    )
+
+    ws = torch.rand(
+        (num_total_experts, 2 * shard_intermediate_size, d_model),
+        device=hidden_states.device,
+        dtype=hidden_states.dtype,
+    )
+
+    w2s = torch.rand(
+        (num_total_experts, d_model, shard_intermediate_size),
+        device=hidden_states.device,
+        dtype=hidden_states.dtype,
+    )
+
+    gating_output = F.softmax(torch.rand(
+        (num_calls, bs, num_total_experts),
+        device=hidden_states.device,
+        dtype=torch.float32,
+    ),
+                              dim=-1)
+
+    start_event = torch.cuda.Event(enable_timing=True)
+    end_event = torch.cuda.Event(enable_timing=True)
+
+    start_event.record()
+    for i in range(num_calls):
+        hidden_states = method(
+            hidden_states=hidden_states,
+            w1=ws,
+            w2=w2s,
+            gating_output=gating_output[i],
+            topk=2,
+            renormalize=True,
+            inplace=True,
+            override_config=config,
+        )
+    end_event.record()
+    end_event.synchronize()
+
+    dur_ms = start_event.elapsed_time(end_event) / num_calls
+    return dur_ms
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/setup.py b/setup.py
index 8fcb86394f76d..16978d74e0425 100644
--- a/setup.py
+++ b/setup.py
@@ -432,7 +432,9 @@ def get_requirements() -> List[str]:
     return requirements
 
 
-package_data = {"vllm": ["py.typed"]}
+package_data = {
+    "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
+}
 if os.environ.get("VLLM_USE_PRECOMPILED"):
     ext_modules = []
     package_data["vllm"].append("*.so")
diff --git a/vllm/model_executor/layers/fused_moe/__init__.py b/vllm/model_executor/layers/fused_moe/__init__.py
new file mode 100644
index 0000000000000..1391d43c8abeb
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/__init__.py
@@ -0,0 +1,5 @@
+from vllm.model_executor.layers.fused_moe.fused_moe import fused_moe
+
+__all__ = [
+    "fused_moe",
+]
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
new file mode 100644
index 0000000000000..1fefb5ff7e42d
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
@@ -0,0 +1,20 @@
+{
+    "1": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "2": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 7},
+    "4": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 6},
+    "8": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 7},
+    "16": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 7},
+    "24": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "32": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "64": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "96": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 16, "num_warps": 4, "num_stages": 4},
+    "128": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 6},
+    "192": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 6},
+    "256": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 16, "num_warps": 8, "num_stages": 4},
+    "512": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 32, "num_warps": 8, "num_stages": 4},
+    "1024": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
+    "1536": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 16, "num_warps": 8, "num_stages": 4},
+    "2048": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 16, "num_warps": 4, "num_stages": 4},
+    "3072": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 8, "num_stages": 4},
+    "4096": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 16, "num_warps": 8, "num_stages": 4}
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
new file mode 100644
index 0000000000000..64d49ca66c1c8
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
@@ -0,0 +1,24 @@
+{
+    "1": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4},
+    "2": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "4": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "8": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 8, "num_stages": 4},
+    "16": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4},
+    "24": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4},
+    "32": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "80": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "96": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "128": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "192": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
+    "200": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 4},
+    "208": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 4},
+    "216": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 16, "num_warps": 4, "num_stages": 4},
+    "224": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 32, "num_warps": 4, "num_stages": 4},
+    "256": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 32, "num_warps": 4, "num_stages": 4},
+    "512": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
+    "1024": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
+    "1536": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
+    "2048": {"BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
+    "3072": {"BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
+    "4096": {"BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4}
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/README b/vllm/model_executor/layers/fused_moe/configs/README
new file mode 100644
index 0000000000000..45d40cbfb1a2e
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/README
@@ -0,0 +1,10 @@
+This directory contains tuned configurations for different settings of the fused_moe kernel.
+For different settings of
+- E (number of experts)
+- N (intermediate size)
+- device_name (torch.cuda.get_device_name())
+the JSON file contains a mapping from M (batch size) to the chosen configuration.
+
+The example configurations provided are for the Mixtral model for TP2 on H100
+and TP4 on A100. Mixtral has intermediate size N = 14336, i.e. for TP2 we have
+N = 7168 and for TP4 we have N = 3584.
diff --git a/vllm/model_executor/layers/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
similarity index 85%
rename from vllm/model_executor/layers/fused_moe.py
rename to vllm/model_executor/layers/fused_moe/fused_moe.py
index bc3aef1887ef8..830fde6c4eb6d 100644
--- a/vllm/model_executor/layers/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1,11 +1,19 @@
 """Fused MoE kernel."""
+import functools
+import json
+import os
+from typing import Any, Dict, Optional
+
 import torch
 import triton
 import triton.language as tl
 
 from vllm._C import ops
+from vllm.logger import init_logger
 from vllm.utils import is_hip
 
+logger = init_logger(__name__)
+
 
 @triton.jit
 def fused_moe_kernel(
@@ -210,6 +218,34 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor,
     )
 
 
+@functools.lru_cache
+def get_moe_configs(E: int, N: int) -> Optional[Dict[int, Any]]:
+    """
+    Return optimized configurations for the fused MoE kernel.
+
+    The return value will be a dictionary that maps an irregular grid of batch sizes
+    to configurations of the fused_moe kernel. To evaluate the kernel on a given batch
+    size bs, the closest batch size in the grid should be picked and the associated
+    configuration chosen to invoke the kernel.
+    """
+
+    # First look up if an optimized configuration is available in the configs directory
+    device_name = torch.cuda.get_device_name().replace(" ", "_")
+
+    config_file_path = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), "configs",
+        f"E={E},N={N},device_name={device_name}.json")
+    if os.path.exists(config_file_path):
+        with open(config_file_path) as f:
+            logger.info(
+                f"Using configuration from {config_file_path} for MoE layer.")
+            # If a configuration has been found, return it
+            return {int(key): val for key, val in json.load(f).items()}
+
+    # If no optimized configuration is available, we will use the default configuration
+    return None
+
+
 def fused_moe(
     hidden_states: torch.Tensor,
     w1: torch.Tensor,
@@ -218,6 +254,7 @@ def fused_moe(
     topk: int,
     renormalize: bool,
     inplace: bool = False,
+    override_config: Optional[Dict[str, Any]] = None,
 ) -> torch.Tensor:
     """
     This function computes a Mixture of Experts (MoE) layer using two sets of weights, w1 and w2, and top-k gating mechanism.
@@ -230,6 +267,7 @@ def fused_moe(
     - topk (int): The number of top-k experts to select.
     - renormalize (bool): If True, renormalize the top-k weights to sum to 1.
     - inplace (bool): If True, perform the operation in-place. Defaults to False.
+    - override_config (Optional[Dict[str, Any]]): Optional override for the kernel configuration.
     
     Returns:
     - torch.Tensor: The output tensor after applying the MoE layer.
@@ -279,20 +317,31 @@ def fused_moe(
     if renormalize:
         topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)
 
-    config = {
-        'BLOCK_SIZE_M': 64,
-        'BLOCK_SIZE_N': 64,
-        'BLOCK_SIZE_K': 32,
-        'GROUP_SIZE_M': 8
-    }
-
-    if topk_ids.numel() <= w1.shape[0]:
-        config = {
-            'BLOCK_SIZE_M': 16,
-            'BLOCK_SIZE_N': 32,
-            'BLOCK_SIZE_K': 64,
-            'GROUP_SIZE_M': 1
-        }
+    if override_config:
+        config = override_config
+    else:
+        # First try to load optimal config from the file
+        configs = get_moe_configs(E, w2.shape[2])
+
+        if configs:
+            # If an optimal configuration map has been found, look up the optimal config
+            config = configs[min(configs.keys(), key=lambda x: abs(x - M))]
+        else:
+            # Else use the default config
+            config = {
+                'BLOCK_SIZE_M': 64,
+                'BLOCK_SIZE_N': 64,
+                'BLOCK_SIZE_K': 32,
+                'GROUP_SIZE_M': 8
+            }
+
+            if M <= E:
+                config = {
+                    'BLOCK_SIZE_M': 16,
+                    'BLOCK_SIZE_N': 32,
+                    'BLOCK_SIZE_K': 64,
+                    'GROUP_SIZE_M': 1
+                }
 
     intermediate_cache1 = torch.empty((M, topk_ids.shape[1], N),
                                       device=hidden_states.device,

From d6e4a130b028f42a7f413d99eb91a4395fa7a04a Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 26 Feb 2024 15:00:54 -0800
Subject: [PATCH 015/196] [Minor] Remove gather_cached_kv kernel (#3043)

---
 csrc/cache.h          |   7 --
 csrc/cache_kernels.cu | 161 ------------------------------------------
 csrc/pybind.cpp       |   4 --
 3 files changed, 172 deletions(-)

diff --git a/csrc/cache.h b/csrc/cache.h
index 21c71830f7942..765e231abd26f 100644
--- a/csrc/cache.h
+++ b/csrc/cache.h
@@ -23,13 +23,6 @@ void reshape_and_cache(
   torch::Tensor& slot_mapping,
   const std::string& kv_cache_dtype);
 
-void gather_cached_kv(
-  torch::Tensor& key,
-  torch::Tensor& value,
-  torch::Tensor& key_cache,
-  torch::Tensor& value_cache,
-  torch::Tensor& slot_mapping);
-
 // Just for unittest
 void convert_fp8_e5m2(
   torch::Tensor& src_cache,
diff --git a/csrc/cache_kernels.cu b/csrc/cache_kernels.cu
index ceb7347d94670..7254010b8e3a9 100644
--- a/csrc/cache_kernels.cu
+++ b/csrc/cache_kernels.cu
@@ -269,167 +269,6 @@ void reshape_and_cache(
 
 namespace vllm {
 
-// Grid: (num_blocks, block_size).
-template<typename scalar_t>
-__global__ void gather_cached_kv_kernel(
-  scalar_t* __restrict__ key,             // [num_tokens, [stride], num_heads, head_size]
-  scalar_t* __restrict__ value,           // [num_tokens, [stride], num_heads, head_size]
-  const scalar_t* __restrict__ key_cache,   // [num_blocks, num_heads, head_size/x, block_size, x]
-  const scalar_t* __restrict__ value_cache,   // [num_blocks, num_heads, head_size, block_size]
-  const int* __restrict__ slot_mapping,   // [num_tokens]
-  const int key_stride,
-  const int value_stride,
-  const int num_heads,
-  const int head_size,
-  const int block_size,
-  const int x) {
-    const int token_idx = blockIdx.x;
-    const int slot_idx = slot_mapping[token_idx];
-    const int block_idx = slot_idx / block_size;
-    const int block_offset = slot_idx % block_size;
-
-    const int num_tokens = num_heads * head_size;
-    for (int i = threadIdx.x; i < num_tokens; i += blockDim.x) {
-      const int tgt_key_idx = token_idx * key_stride + i;
-      const int tgt_value_idx = token_idx * value_stride + i;
-
-      const int head_idx = i / head_size;
-      const int head_offset = i % head_size;
-      const int x_idx = head_offset / x;  // the offset of the [head_size/x] dimension
-      const int x_offset = head_offset % x;
-
-      const int src_key_idx = block_idx * num_heads * (head_size / x) * block_size * x
-                              + head_idx * (head_size / x) * block_size * x
-                              + x_idx * block_size * x
-                              + block_offset * x
-                              + x_offset;
-      const int src_value_idx = block_idx * num_heads * head_size * block_size
-                                + head_idx * head_size * block_size
-                                + head_offset * block_size
-                                + block_offset;
-
-      key[tgt_key_idx] = VLLM_LDG(&key_cache[src_key_idx]);
-      value[tgt_value_idx] = VLLM_LDG(&value_cache[src_value_idx]);
-    }
-}
-
-template <typename scalar_t>
-__global__ void gather_cached_kv_kernel_optimized(
-    scalar_t *__restrict__ key,             // [num_tokens, [stride], num_heads, head_size]
-    scalar_t *__restrict__ value,           // [num_tokens, [stride], num_heads, head_size]
-    const scalar_t *__restrict__ key_cache, // [num_blocks, num_heads, head_size/x, block_size, x]
-    const scalar_t *__restrict__ value_cache, // [num_blocks, num_heads, head_size, block_size]
-    const int *__restrict__ slot_mapping,   // [num_tokens]
-    const int key_stride,
-    const int value_stride,
-    const int num_heads,
-    const int head_size,
-    const int block_size,
-    const int x)
-{
-    const int token_idx = blockIdx.x;
-    const int slot_idx = slot_mapping[token_idx];
-    const int block_idx = slot_idx / block_size;
-    const int block_offset = slot_idx % block_size;
-
-    const int dim = num_heads * head_size;
-    assert(dim % 4 == 0);  // this is true for known use cases
-    const int unroll_factor = 4;
-    const int unrolled_dim = dim / unroll_factor;
-
-    for (int i = threadIdx.x; i < unrolled_dim; i += blockDim.x)
-    {
-        int tgt_key_indices[unroll_factor];
-        int tgt_value_indices[unroll_factor];
-        int src_key_indices[unroll_factor];
-        int src_value_indices[unroll_factor];
-        scalar_t keys_to_store[unroll_factor];
-        scalar_t values_to_store[unroll_factor];
-
-        #pragma unroll
-        for (int j = 0; j < unroll_factor; ++j)
-        {
-            int index = i + j * unrolled_dim;
-
-            const int tgt_key_idx = token_idx * key_stride + index;
-            const int tgt_value_idx = token_idx * value_stride + index;
-
-            const int head_idx = index / head_size;
-            const int head_offset = index % head_size;
-            const int x_idx = head_offset / x;
-            const int x_offset = head_offset % x;
-
-            const int src_key_idx = block_idx * num_heads * (head_size / x) * block_size * x
-                                    + head_idx * (head_size / x) * block_size * x
-                                    + x_idx * block_size * x
-                                    + block_offset * x
-                                    + x_offset;
-            const int src_value_idx = block_idx * num_heads * head_size * block_size
-                                      + head_idx * head_size * block_size
-                                      + head_offset * block_size
-                                      + block_offset;
-
-            tgt_key_indices[j] = tgt_key_idx;
-            tgt_value_indices[j] = tgt_value_idx;
-            src_key_indices[j] = src_key_idx;
-            src_value_indices[j] = src_value_idx;
-
-            keys_to_store[j] = VLLM_LDG(&key_cache[src_key_idx]);
-            values_to_store[j] = VLLM_LDG(&value_cache[src_value_idx]);
-        }
-
-        #pragma unroll
-        for (int j = 0; j < unroll_factor; ++j)
-        {
-            key[tgt_key_indices[j]] = keys_to_store[j];
-            value[tgt_value_indices[j]] = values_to_store[j];
-        }
-    }
-}
-
-} // namespace vllm
-
-void gather_cached_kv(
-  torch::Tensor& key,           // [out] [num_tokens, num_heads, head_size]
-  torch::Tensor& value,         // [out] [num_tokens, num_heads, head_size]
-  torch::Tensor& key_cache,     // [in]  [num_blocks, num_heads, head_size/x, block_size, x]
-  torch::Tensor& value_cache,   // [in]  [num_blocks, num_heads, head_size, block_size]
-  torch::Tensor& slot_mapping)  // [in]  [num_tokens]
-{
-  int num_tokens = key.size(0);
-  int num_heads = key.size(1);
-  int head_size = key.size(2);
-  int block_size = key_cache.size(3);
-  int x = key_cache.size(4);
-
-  int key_stride = key.stride(0);
-  int value_stride = value.stride(0);
-
-  dim3 grid(num_tokens);
-  dim3 block(std::min(num_heads * head_size, 512));
-  const at::cuda::OptionalCUDAGuard device_guard(device_of(key));
-  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-  VLLM_DISPATCH_FLOATING_AND_BYTE_TYPES(
-    key.scalar_type(),
-    "gather_cached_kv_kernel_optimized",
-    [&] {
-      vllm::gather_cached_kv_kernel_optimized<scalar_t><<<grid, block, 0, stream>>>(
-        key.data_ptr<scalar_t>(),
-        value.data_ptr<scalar_t>(),
-        key_cache.data_ptr<scalar_t>(),
-        value_cache.data_ptr<scalar_t>(),
-        slot_mapping.data_ptr<int>(),
-        key_stride,
-        value_stride,
-        num_heads,
-        head_size,
-        block_size,
-        x);
-    });
-}
-
-namespace vllm {
-
 template<typename Tout, typename Tin>
 __global__ void convert_fp8_e5m2_kernel(
   const Tin* __restrict__ src_cache,
diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
index 24c22020131e8..5d062bb5700bc 100644
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -79,10 +79,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
     "reshape_and_cache",
     &reshape_and_cache,
     "Reshape the key and value tensors and cache them");
-  cache_ops.def(
-    "gather_cached_kv",
-    &gather_cached_kv,
-    "Gather key and value from the cache into contiguous QKV tensors");
   cache_ops.def(
     "convert_fp8_e5m2",
     &convert_fp8_e5m2,

From d9f726c4d0920e705069c005fb3b1042368961ae Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Tue, 27 Feb 2024 09:25:22 +0800
Subject: [PATCH 016/196] [Minor] Remove unused config files (#3039)

---
 vllm/model_executor/models/baichuan.py      |  6 +-
 vllm/model_executor/models/olmo.py          |  4 +-
 vllm/model_executor/models/qwen.py          |  8 +--
 vllm/transformers_utils/config.py           |  2 -
 vllm/transformers_utils/configs/__init__.py |  6 --
 vllm/transformers_utils/configs/baichuan.py | 62 ------------------
 vllm/transformers_utils/configs/olmo.py     | 72 ---------------------
 vllm/transformers_utils/configs/qwen.py     | 60 -----------------
 8 files changed, 10 insertions(+), 210 deletions(-)
 delete mode 100644 vllm/transformers_utils/configs/baichuan.py
 delete mode 100644 vllm/transformers_utils/configs/olmo.py
 delete mode 100644 vllm/transformers_utils/configs/qwen.py

diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py
index f08c3c8d257ff..550dec6487f9e 100644
--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
@@ -23,6 +23,7 @@
 
 import torch
 from torch import nn
+from transformers import PretrainedConfig
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
@@ -42,7 +43,6 @@
 from vllm.model_executor.weight_utils import (default_weight_loader,
                                               hf_model_weights_iterator)
 from vllm.sequence import SamplerOutput
-from vllm.transformers_utils.configs.baichuan import BaiChuanConfig
 
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 
@@ -186,7 +186,7 @@ def forward(
 class BaiChuanDecoderLayer(nn.Module):
 
     def __init__(self,
-                 config: BaiChuanConfig,
+                 config: PretrainedConfig,
                  position_embedding: str,
                  linear_method: Optional[LinearMethodBase] = None):
         super().__init__()
@@ -245,7 +245,7 @@ def forward(
 class BaiChuanModel(nn.Module):
 
     def __init__(self,
-                 config: BaiChuanConfig,
+                 config: PretrainedConfig,
                  position_embedding: str,
                  linear_method: Optional[LinearMethodBase] = None):
         super().__init__()
diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py
index 2eb42935e8bfd..9d563039208c8 100644
--- a/vllm/model_executor/models/olmo.py
+++ b/vllm/model_executor/models/olmo.py
@@ -61,7 +61,9 @@
     hf_model_weights_iterator,
 )
 from vllm.sequence import SamplerOutput
-from vllm.transformers_utils.configs.olmo import OLMoConfig
+
+# this model must need this dependency
+from hf_olmo import OLMoConfig
 
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 
diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py
index fbc7320fb45a4..37af84c7cd53f 100644
--- a/vllm/model_executor/models/qwen.py
+++ b/vllm/model_executor/models/qwen.py
@@ -8,6 +8,7 @@
 
 import torch
 from torch import nn
+from transformers import PretrainedConfig
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
@@ -27,7 +28,6 @@
 from vllm.model_executor.weight_utils import (default_weight_loader,
                                               hf_model_weights_iterator)
 from vllm.sequence import SamplerOutput
-from vllm.transformers_utils.configs.qwen import QWenConfig
 
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 
@@ -127,7 +127,7 @@ class QWenBlock(nn.Module):
 
     def __init__(
         self,
-        config: QWenConfig,
+        config: PretrainedConfig,
         linear_method: Optional[LinearMethodBase] = None,
     ):
         super().__init__()
@@ -179,7 +179,7 @@ class QWenModel(nn.Module):
 
     def __init__(
         self,
-        config: QWenConfig,
+        config: PretrainedConfig,
         linear_method: Optional[LinearMethodBase] = None,
     ):
         super().__init__()
@@ -222,7 +222,7 @@ class QWenLMHeadModel(nn.Module):
 
     def __init__(
         self,
-        config: QWenConfig,
+        config: PretrainedConfig,
         linear_method: Optional[LinearMethodBase] = None,
     ):
         super().__init__()
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 491cb4d9a427c..6b0413f440a0e 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -5,10 +5,8 @@
 from vllm.transformers_utils.configs import *
 
 _CONFIG_REGISTRY = {
-    "baichuan": BaiChuanConfig,
     "chatglm": ChatGLMConfig,
     "mpt": MPTConfig,
-    "qwen": QWenConfig,
     "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
     "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
 }
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 47bcc2b9594be..ef955f75cedaa 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -1,18 +1,12 @@
-from vllm.transformers_utils.configs.baichuan import BaiChuanConfig
 from vllm.transformers_utils.configs.chatglm import ChatGLMConfig
 from vllm.transformers_utils.configs.mpt import MPTConfig
-from vllm.transformers_utils.configs.olmo import OLMoConfig
-from vllm.transformers_utils.configs.qwen import QWenConfig
 # RWConfig is for the original tiiuae/falcon-40b(-instruct) and
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
 
 __all__ = [
-    "BaiChuanConfig",
     "ChatGLMConfig",
     "MPTConfig",
-    "OLMoConfig",
-    "QWenConfig",
     "RWConfig",
 ]
diff --git a/vllm/transformers_utils/configs/baichuan.py b/vllm/transformers_utils/configs/baichuan.py
deleted file mode 100644
index 869817525c11a..0000000000000
--- a/vllm/transformers_utils/configs/baichuan.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# coding=utf-8
-# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
-#
-# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
-# and OPT implementations in this library. It has been modified from its
-# original forms to accommodate minor architectural differences compared
-# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from transformers.configuration_utils import PretrainedConfig
-
-
-class BaiChuanConfig(PretrainedConfig):
-    model_type = "baichuan"
-    keys_to_ignore_at_inference = ["past_key_values"]
-
-    def __init__(
-        self,
-        vocab_size=64000,
-        hidden_size=4096,
-        intermediate_size=11008,
-        num_hidden_layers=32,
-        num_attention_heads=32,
-        hidden_act="silu",
-        max_position_embeddings=4096,
-        initializer_range=0.02,
-        rms_norm_eps=1e-6,
-        use_cache=True,
-        pad_token_id=0,
-        bos_token_id=1,
-        eos_token_id=2,
-        tie_word_embeddings=False,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.hidden_act = hidden_act
-        self.initializer_range = initializer_range
-        self.rms_norm_eps = rms_norm_eps
-        self.use_cache = use_cache
-        super().__init__(
-            pad_token_id=pad_token_id,
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            tie_word_embeddings=tie_word_embeddings,
-            **kwargs,
-        )
diff --git a/vllm/transformers_utils/configs/olmo.py b/vllm/transformers_utils/configs/olmo.py
deleted file mode 100644
index a9dfc6ec88ca6..0000000000000
--- a/vllm/transformers_utils/configs/olmo.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# coding=utf-8
-# adapted from https://github.com/allenai/OLMo/blob/v0.2.4/hf_olmo/configuration_olmo.py
-"""OLMo configuration"""
-from transformers import PretrainedConfig
-
-
-class OLMoConfig(PretrainedConfig):
-    model_type = 'olmo'
-    attribute_map = {
-        'num_attention_heads': 'n_heads',
-        'hidden_size': 'd_model',
-        'num_hidden_layers': 'n_layers',
-    }
-
-    # Note that the defaults for these attributes are equivalent to the base GPT2 model.
-    def __init__(
-        self,
-        d_model=768,
-        n_heads=12,
-        n_layers=12,
-        mlp_ratio=4,
-        mlp_hidden_size=None,
-        activation_type="swiglu",
-        block_type="sequential",
-        block_group_size=1,
-        alibi=False,
-        alibi_bias_max=8.0,
-        rope=False,
-        rope_full_precision=True,
-        multi_query_attention=False,
-        attention_layer_norm=False,
-        layer_norm_type="default",
-        layer_norm_with_affine=True,
-        attention_layer_norm_with_affine=True,
-        max_sequence_length=1024,
-        include_bias=True,
-        bias_for_layer_norm=None,
-        scale_logits=False,
-        vocab_size=50257,
-        embedding_size=50304,
-        weight_tying=True,
-        eos_token_id=50256,
-        pad_token_id=50256,
-        **kwargs,
-    ):
-        self.d_model = d_model
-        self.n_heads = n_heads
-        self.n_layers = n_layers
-        self.mlp_ratio = mlp_ratio
-        self.mlp_hidden_size = mlp_hidden_size
-        self.activation_type = activation_type
-        self.block_type = block_type
-        self.block_group_size = block_group_size
-        self.alibi = alibi
-        self.alibi_bias_max = alibi_bias_max
-        self.rope = rope
-        self.rope_full_precision = rope_full_precision
-        self.multi_query_attention = multi_query_attention
-        self.attention_layer_norm = attention_layer_norm
-        self.layer_norm_type = layer_norm_type
-        self.layer_norm_with_affine = layer_norm_with_affine
-        self.attention_layer_norm_with_affine = attention_layer_norm_with_affine
-        self.max_sequence_length = max_sequence_length
-        self.include_bias = include_bias
-        self.bias_for_layer_norm = bias_for_layer_norm
-        self.scale_logits = scale_logits
-        self.vocab_size = vocab_size
-        self.embedding_size = embedding_size
-        self.weight_tying = weight_tying
-        self.eos_token_id = eos_token_id
-        self.pad_token_id = pad_token_id
-        super().__init__(**kwargs)
diff --git a/vllm/transformers_utils/configs/qwen.py b/vllm/transformers_utils/configs/qwen.py
deleted file mode 100644
index bb033a337ad04..0000000000000
--- a/vllm/transformers_utils/configs/qwen.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) Alibaba Cloud.
-# LICENSE: https://huggingface.co/Qwen/Qwen-7B/blob/main/LICENSE
-
-from transformers import PretrainedConfig
-
-
-class QWenConfig(PretrainedConfig):
-    model_type = "qwen"
-    keys_to_ignore_at_inference = ["past_key_values"]
-
-    def __init__(
-        self,
-        vocab_size=151936,
-        hidden_size=4096,
-        num_hidden_layers=32,
-        num_attention_heads=32,
-        emb_dropout_prob=0.0,
-        attn_dropout_prob=0.0,
-        layer_norm_epsilon=1e-6,
-        initializer_range=0.02,
-        max_position_embeddings=8192,
-        scale_attn_weights=True,
-        use_cache=True,
-        bf16=False,
-        fp16=False,
-        fp32=False,
-        kv_channels=128,
-        rotary_pct=1.0,
-        rotary_emb_base=10000,
-        use_dynamic_ntk=True,
-        use_logn_attn=True,
-        use_flash_attn="auto",
-        intermediate_size=22016,
-        no_bias=True,
-        tie_word_embeddings=False,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.emb_dropout_prob = emb_dropout_prob
-        self.attn_dropout_prob = attn_dropout_prob
-        self.layer_norm_epsilon = layer_norm_epsilon
-        self.initializer_range = initializer_range
-        self.scale_attn_weights = scale_attn_weights
-        self.use_cache = use_cache
-        self.max_position_embeddings = max_position_embeddings
-        self.bf16 = bf16
-        self.fp16 = fp16
-        self.fp32 = fp32
-        self.kv_channels = kv_channels
-        self.rotary_pct = rotary_pct
-        self.rotary_emb_base = rotary_emb_base
-        self.use_dynamic_ntk = use_dynamic_ntk
-        self.use_logn_attn = use_logn_attn
-        self.use_flash_attn = use_flash_attn
-        self.no_bias = no_bias
-        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)

From c1c0d00b88320f97e00a3175fac235a232893da5 Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Tue, 27 Feb 2024 09:33:38 +0800
Subject: [PATCH 017/196] Don't use cupy when `enforce_eager=True` (#3037)

---
 vllm/engine/llm_engine.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index c1a75924c6d72..f5b2145c22d6f 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -284,7 +284,10 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
             is_driver_worker=True,
         )
 
-        self._run_workers("init_model", cupy_port=get_open_port())
+        # don't use cupy for eager mode
+        self._run_workers("init_model",
+                          cupy_port=get_open_port()
+                          if not model_config.enforce_eager else None)
         self._run_workers(
             "load_model",
             max_concurrent_workers=self.parallel_config.

From 4dd6416faf7cc3035ac3f5c8375eb27e6b0eee80 Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Tue, 27 Feb 2024 10:31:10 +0800
Subject: [PATCH 018/196] Fix stablelm (#3038)

---
 vllm/model_executor/models/__init__.py |  1 +
 vllm/model_executor/models/stablelm.py | 16 ++++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index 411814f2f5d09..40b375bb6fbea 100644
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -43,6 +43,7 @@
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
     "RWForCausalLM": ("falcon", "FalconForCausalLM"),
     "StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
+    "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
 }
 
 # Models not supported by ROCm.
diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py
index 95e5ad8ede63e..44c57e5a6d4f9 100644
--- a/vllm/model_executor/models/stablelm.py
+++ b/vllm/model_executor/models/stablelm.py
@@ -94,7 +94,9 @@ def __init__(self,
             1, self.total_num_key_value_heads // tp_size)
         self.head_dim = self.hidden_size // self.total_num_heads
         self.max_position_embeddings = config.max_position_embeddings
-        self.rotary_ndims = int(self.head_dim * self.config.rope_pct)
+        rope_pct = getattr(config, "rope_pct",
+                           getattr(config, "partial_rotary_factor", 1))
+        self.rotary_ndims = int(self.head_dim * rope_pct)
         self.scaling = self.head_dim**-0.5
         self.q_size = self.num_heads * self.head_dim
         self.kv_size = self.num_key_value_heads * self.head_dim
@@ -114,7 +116,6 @@ def __init__(self,
                                         self.hidden_size,
                                         bias=False,
                                         linear_method=linear_method)
-        self.rotary_ndims = int(self.head_dim * self.config.rope_pct)
         self.rotary_emb = get_rope(
             self.head_dim,
             rotary_dim=self.rotary_ndims,
@@ -152,10 +153,11 @@ def __init__(
         super().__init__()
         self.self_attn = StablelmAttention(config)
         self.mlp = StablelmMLP(config, linear_method)
-        self.input_layernorm = nn.LayerNorm(config.hidden_size,
-                                            eps=config.norm_eps)
+        norm_eps = getattr(config, "norm_eps",
+                           getattr(config, "layer_norm_eps", 1e-05))
+        self.input_layernorm = nn.LayerNorm(config.hidden_size, eps=norm_eps)
         self.post_attention_layernorm = nn.LayerNorm(config.hidden_size,
-                                                     eps=config.norm_eps)
+                                                     eps=norm_eps)
 
     def forward(
         self,
@@ -199,7 +201,9 @@ def __init__(self,
             StablelmDecoderLayer(config, linear_method)
             for _ in range(config.num_hidden_layers)
         ])
-        self.norm = nn.LayerNorm(config.hidden_size, eps=config.norm_eps)
+        norm_eps = getattr(config, "norm_eps",
+                           getattr(config, "layer_norm_eps", 1e-05))
+        self.norm = nn.LayerNorm(config.hidden_size, eps=norm_eps)
 
     def forward(
         self,

From 48a8f4a7fd18d516ffc0a304219ef722613ea792 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=A4=A7=E6=88=90?= <1345739055@qq.com>
Date: Tue, 27 Feb 2024 11:17:06 +0800
Subject: [PATCH 019/196] Support Orion model (#2539)

Co-authored-by: zhangdacheng <zhangdacheng@ainirobot.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 README.md                               |   1 +
 docs/source/models/supported_models.rst |   3 +
 vllm/model_executor/models/__init__.py  |   1 +
 vllm/model_executor/models/orion.py     | 322 ++++++++++++++++++++++++
 4 files changed, 327 insertions(+)
 create mode 100644 vllm/model_executor/models/orion.py

diff --git a/README.md b/README.md
index 7a16bb1fef044..f771788db2b89 100644
--- a/README.md
+++ b/README.md
@@ -73,6 +73,7 @@ vLLM seamlessly supports many Hugging Face models, including the following archi
 - MPT (`mosaicml/mpt-7b`, `mosaicml/mpt-30b`, etc.)
 - OLMo (`allenai/OLMo-1B`, `allenai/OLMo-7B`, etc.)
 - OPT (`facebook/opt-66b`, `facebook/opt-iml-max-30b`, etc.)
+- Orion (`OrionStarAI/Orion-14B-Base`, `OrionStarAI/Orion-14B-Chat`, etc.)
 - Phi (`microsoft/phi-1_5`, `microsoft/phi-2`, etc.)
 - Qwen (`Qwen/Qwen-7B`, `Qwen/Qwen-7B-Chat`, etc.)
 - Qwen2 (`Qwen/Qwen2-7B-beta`, `Qwen/Qwen-7B-Chat-beta`, etc.)
diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
index c1639ca9e056a..35b548d2737ce 100644
--- a/docs/source/models/supported_models.rst
+++ b/docs/source/models/supported_models.rst
@@ -71,6 +71,9 @@ Alongside each architecture, we include some popular models that use it.
   * - :code:`OPTForCausalLM`
     - OPT, OPT-IML
     - :code:`facebook/opt-66b`, :code:`facebook/opt-iml-max-30b`, etc.
+  * - :code:`OrionForCausalLM`
+    - Orion
+    - :code:`OrionStarAI/Orion-14B-Base`, :code:`OrionStarAI/Orion-14B-Chat`, etc.
   * - :code:`PhiForCausalLM`
     - Phi
     - :code:`microsoft/phi-1_5`, :code:`microsoft/phi-2`, etc.
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index 40b375bb6fbea..66d28207d664f 100644
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -38,6 +38,7 @@
     "MPTForCausalLM": ("mpt", "MPTForCausalLM"),
     "OLMoForCausalLM": ("olmo", "OLMoForCausalLM"),
     "OPTForCausalLM": ("opt", "OPTForCausalLM"),
+    "OrionForCausalLM": ("orion", "OrionForCausalLM"),
     "PhiForCausalLM": ("phi", "PhiForCausalLM"),
     "QWenLMHeadModel": ("qwen", "QWenLMHeadModel"),
     "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py
new file mode 100644
index 0000000000000..0b067d4fc8802
--- /dev/null
+++ b/vllm/model_executor/models/orion.py
@@ -0,0 +1,322 @@
+# coding=utf-8
+# Adapted from
+# https://huggingface.co/OrionStarAI/Orion-14B-Base/blob/main/modeling_orion.py
+# Copyright (c) OrionStar Inc.
+# LICENSE: https://huggingface.co/OrionStarAI/Orion-14B-Base/blob/main/LICENSE
+"""Inference-only Orion-14B model compatible with HuggingFace weights."""
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+from torch import nn
+from transformers import PretrainedConfig
+
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.activation import SiluAndMul
+from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               MergedColumnParallelLinear,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding, ParallelLMHead)
+from vllm.model_executor.parallel_utils.parallel_state import (
+    get_tensor_model_parallel_world_size)
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
+from vllm.sequence import SamplerOutput
+
+KVCache = Tuple[torch.Tensor, torch.Tensor]
+
+
+class OrionMLP(nn.Module):
+
+    def __init__(
+        self,
+        hidden_size: int,
+        intermediate_size: int,
+        hidden_act: str,
+        linear_method: Optional[LinearMethodBase] = None,
+    ) -> None:
+        super().__init__()
+        self.gate_up_proj = MergedColumnParallelLinear(
+            hidden_size, [intermediate_size] * 2,
+            bias=False,
+            linear_method=linear_method)
+        self.down_proj = RowParallelLinear(intermediate_size,
+                                           hidden_size,
+                                           bias=False,
+                                           linear_method=linear_method)
+        if hidden_act != "silu":
+            raise ValueError(f"Unsupported activation: {hidden_act}. "
+                             "Only silu is supported for now.")
+        self.act_fn = SiluAndMul()
+
+    def forward(self, x):
+        gate_up, _ = self.gate_up_proj(x)
+        x = self.act_fn(gate_up)
+        x, _ = self.down_proj(x)
+        return x
+
+
+class OrionAttention(nn.Module):
+
+    def __init__(
+        self,
+        hidden_size: int,
+        num_heads: int,
+        num_kv_heads: int,
+        rope_theta: float = 10000,
+        rope_scaling: Optional[Dict[str, Any]] = None,
+        max_position_embeddings: int = 8192,
+        linear_method: Optional[LinearMethodBase] = None,
+    ) -> None:
+        super().__init__()
+        self.hidden_size = hidden_size
+        tp_size = get_tensor_model_parallel_world_size()
+        self.total_num_heads = num_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = num_kv_heads
+        if self.total_num_kv_heads >= tp_size:
+            # Number of KV heads is greater than TP size, so we partition
+            # the KV heads across multiple tensor parallel GPUs.
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            # Number of KV heads is less than TP size, so we replicate
+            # the KV heads across multiple tensor parallel GPUs.
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.head_dim = hidden_size // self.total_num_heads
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = self.head_dim**-0.5
+        self.rope_theta = rope_theta
+        self.max_position_embeddings = max_position_embeddings
+
+        self.qkv_proj = QKVParallelLinear(
+            hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            bias=False,
+            linear_method=linear_method,
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            hidden_size,
+            bias=False,
+            linear_method=linear_method,
+        )
+
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            rotary_dim=self.head_dim,
+            max_position=max_position_embeddings,
+            base=rope_theta,
+            rope_scaling=rope_scaling,
+        )
+        self.attn = PagedAttention(self.num_heads,
+                                   self.head_dim,
+                                   self.scaling,
+                                   num_kv_heads=self.num_kv_heads)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        kv_cache: KVCache,
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+        q, k = self.rotary_emb(positions, q, k)
+        k_cache, v_cache = kv_cache
+        attn_output = self.attn(q, k, v, k_cache, v_cache, input_metadata)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class OrionDecoderLayer(nn.Module):
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ) -> None:
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        rope_theta = getattr(config, "rope_theta", 10000)
+        rope_scaling = getattr(config, "rope_scaling", None)
+        max_position_embeddings = getattr(config, "max_position_embeddings",
+                                          8192)
+        self.self_attn = OrionAttention(
+            hidden_size=self.hidden_size,
+            num_heads=config.num_attention_heads,
+            num_kv_heads=config.num_key_value_heads,
+            rope_theta=rope_theta,
+            rope_scaling=rope_scaling,
+            max_position_embeddings=max_position_embeddings,
+            linear_method=linear_method,
+        )
+        self.mlp = OrionMLP(
+            hidden_size=self.hidden_size,
+            intermediate_size=config.intermediate_size,
+            hidden_act=config.hidden_act,
+            linear_method=linear_method,
+        )
+
+        self.input_layernorm = nn.LayerNorm(config.hidden_size,
+                                            eps=config.rms_norm_eps)
+        self.post_attention_layernorm = nn.LayerNorm(config.hidden_size,
+                                                     eps=config.rms_norm_eps)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        kv_cache: KVCache,
+        input_metadata: InputMetadata,
+        residual: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        # Self Attention
+        residual = hidden_states
+        hidden_states = self.input_layernorm(hidden_states)
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+            kv_cache=kv_cache,
+            input_metadata=input_metadata,
+        )
+
+        hidden_states = residual + hidden_states
+
+        # Fully Connected
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+        return hidden_states, None
+
+
+class OrionModel(nn.Module):
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+        self.embed_tokens = VocabParallelEmbedding(
+            config.vocab_size,
+            config.hidden_size,
+        )
+        self.layers = nn.ModuleList([
+            OrionDecoderLayer(config, linear_method)
+            for _ in range(config.num_hidden_layers)
+        ])
+        self.norm = nn.LayerNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+        residual = None
+        for i in range(len(self.layers)):
+            layer = self.layers[i]
+            hidden_states, residual = layer(
+                positions,
+                hidden_states,
+                kv_caches[i],
+                input_metadata,
+                residual,
+            )
+        hidden_states = self.norm(hidden_states)
+        return hidden_states
+
+
+class OrionForCausalLM(nn.Module):
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.linear_method = linear_method
+        self.model = OrionModel(config, linear_method)
+        self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
+        self.sampler = Sampler(config.vocab_size)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        hidden_states = self.model(input_ids, positions, kv_caches,
+                                   input_metadata)
+        return hidden_states
+
+    def sample(
+        self,
+        hidden_states: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
+                                   sampling_metadata)
+        return next_tokens
+
+    def load_weights(self,
+                     model_name_or_path: str,
+                     cache_dir: Optional[str] = None,
+                     load_format: str = "auto",
+                     revision: Optional[str] = None):
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+            ("gate_up_proj", "gate_proj", 0),
+            ("gate_up_proj", "up_proj", 1),
+        ]
+        params_dict = dict(self.named_parameters())
+        for name, loaded_weight in hf_model_weights_iterator(
+                model_name_or_path, cache_dir, load_format, revision):
+            if "rotary_emb.inv_freq" in name:
+                continue
+            if ("rotary_emb.cos_cached" in name
+                    or "rotary_emb.sin_cached" in name):
+                # Models trained using ColossalAI may include these tensors in
+                # the checkpoint. Skip them.
+                continue
+            for (param_name, weight_name, shard_id) in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader",
+                                        default_weight_loader)
+                weight_loader(param, loaded_weight)

From 2410e320b35cd704059b7c6ba8d8ba7643fe46ee Mon Sep 17 00:00:00 2001
From: Jingru <niejingru@hotmail.com>
Date: Tue, 27 Feb 2024 11:22:16 +0800
Subject: [PATCH 020/196] fix `get_ip` error in pure ipv6 environment (#2931)

---
 vllm/utils.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index 8ca95e148eb39..c8ac57de6f5f5 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -162,9 +162,16 @@ def _async_wrapper(*args, **kwargs) -> asyncio.Future:
 
 
 def get_ip() -> str:
+    # try ipv4
     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
-    s.connect(("8.8.8.8", 80))  # Doesn't need to be reachable
-    return s.getsockname()[0]
+    try:
+        s.connect(("dns.google", 80))  # Doesn't need to be reachable
+        return s.getsockname()[0]
+    except OSError:
+        # try ipv6
+        s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
+        s.connect(("dns.google", 80))
+        return s.getsockname()[0]
 
 
 def get_distributed_init_method(ip: str, port: int) -> str:
@@ -172,9 +179,16 @@ def get_distributed_init_method(ip: str, port: int) -> str:
 
 
 def get_open_port() -> int:
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.bind(("", 0))
-        return s.getsockname()[1]
+    # try ipv4
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind(("", 0))
+            return s.getsockname()[1]
+    except OSError:
+        # try ipv6
+        with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+            s.bind(("", 0))
+            return s.getsockname()[1]
 
 
 def set_cuda_visible_devices(device_ids: List[int]) -> None:

From 4bd18ec0c719d2910040e22fa60503fdbfce1332 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 26 Feb 2024 19:44:29 -0800
Subject: [PATCH 021/196] [Minor] Fix type annotation in fused moe (#3045)

---
 vllm/model_executor/layers/fused_moe/fused_moe.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 830fde6c4eb6d..08e3c2d5b706e 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -2,7 +2,7 @@
 import functools
 import json
 import os
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Tuple
 
 import torch
 import triton
@@ -137,7 +137,7 @@ def fused_moe_kernel(
 
 def moe_align_block_size(
         topk_ids: torch.Tensor, block_size: int,
-        num_experts: int) -> (torch.Tensor, torch.Tensor, torch.Tensor):
+        num_experts: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     """
     Aligns the token distribution across experts to be compatible with block size for matrix multiplication.
 
@@ -185,7 +185,8 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor,
                             sorted_token_ids: torch.Tensor,
                             expert_ids: torch.Tensor,
                             num_tokens_post_padded: torch.Tensor,
-                            mul_routed_weight: bool, top_k: int, config: dict):
+                            mul_routed_weight: bool, top_k: int,
+                            config: Dict[str, Any]) -> None:
     assert topk_weights.stride(1) == 1
     assert sorted_token_ids.stride(0) == 1
 

From e0ade06d6305cf84b41c1962cdd9dfdbfee16ac9 Mon Sep 17 00:00:00 2001
From: Dylan Hawk <51147702+dylanwhawk@users.noreply.github.com>
Date: Mon, 26 Feb 2024 19:51:53 -0800
Subject: [PATCH 022/196] Support logit bias for OpenAI API (#3027)

---
 tests/entrypoints/test_openai_server.py       | 48 +++++++++++++++++++
 vllm/entrypoints/openai/protocol.py           | 33 +++++++++++++
 vllm/entrypoints/openai/serving_chat.py       |  8 +---
 vllm/entrypoints/openai/serving_completion.py |  6 +--
 4 files changed, 83 insertions(+), 12 deletions(-)

diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py
index 29d0e6fd537d5..72e2374899793 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -9,6 +9,8 @@
 import openai  # use the official client for correctness check
 from huggingface_hub import snapshot_download  # downloading lora to test lora requests
 
+from vllm.transformers_utils.tokenizer import get_tokenizer
+
 MAX_SERVER_START_WAIT_S = 600  # wait for server to start for 60 seconds
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"  # any model with a chat template should work here
 LORA_NAME = "typeof/zephyr-7b-beta-lora"  # technically this needs Mistral-7B-v0.1 as base, but we're not testing generation quality here
@@ -310,5 +312,51 @@ async def test_batch_completions(server, client: openai.AsyncOpenAI,
     assert texts[0] == texts[1]
 
 
+async def test_logits_bias(server, client: openai.AsyncOpenAI):
+    prompt = "Hello, my name is"
+    max_tokens = 5
+    tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME)
+
+    # Test exclusive selection
+    token_id = 1000
+    completion = await client.completions.create(
+        model=MODEL_NAME,
+        prompt=prompt,
+        max_tokens=max_tokens,
+        temperature=0.0,
+        logit_bias={str(token_id): 100},
+    )
+    assert completion.choices[0].text is not None and len(
+        completion.choices[0].text) >= 5
+    response_tokens = tokenizer(completion.choices[0].text,
+                                add_special_tokens=False)["input_ids"]
+    expected_tokens = tokenizer(tokenizer.decode([token_id] * 5),
+                                add_special_tokens=False)["input_ids"]
+    assert all([
+        response == expected
+        for response, expected in zip(response_tokens, expected_tokens)
+    ])
+
+    # Test ban
+    completion = await client.completions.create(
+        model=MODEL_NAME,
+        prompt=prompt,
+        max_tokens=max_tokens,
+        temperature=0.0,
+    )
+    response_tokens = tokenizer(completion.choices[0].text,
+                                add_special_tokens=False)["input_ids"]
+    first_response = completion.choices[0].text
+    completion = await client.completions.create(
+        model=MODEL_NAME,
+        prompt=prompt,
+        max_tokens=max_tokens,
+        temperature=0.0,
+        logit_bias={str(token): -100
+                    for token in response_tokens},
+    )
+    assert first_response != completion.choices[0].text
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index f57a2fb775783..e85e7e2b1ede9 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -8,6 +8,8 @@
 from vllm.utils import random_uuid
 from vllm.sampling_params import SamplingParams
 
+import torch
+
 
 class ErrorResponse(BaseModel):
     object: str = "error"
@@ -88,6 +90,21 @@ class ChatCompletionRequest(BaseModel):
     def to_sampling_params(self) -> SamplingParams:
         if self.logprobs and not self.top_logprobs:
             raise ValueError("Top logprobs must be set when logprobs is.")
+
+        logits_processors = None
+        if self.logit_bias:
+
+            def logit_bias_logits_processor(
+                    token_ids: List[int],
+                    logits: torch.Tensor) -> torch.Tensor:
+                for token_id, bias in self.logit_bias.items():
+                    # Clamp the bias between -100 and 100 per OpenAI API spec
+                    bias = min(100, max(-100, bias))
+                    logits[int(token_id)] += bias
+                return logits
+
+            logits_processors = [logit_bias_logits_processor]
+
         return SamplingParams(
             n=self.n,
             presence_penalty=self.presence_penalty,
@@ -111,6 +128,7 @@ def to_sampling_params(self) -> SamplingParams:
             spaces_between_special_tokens=self.spaces_between_special_tokens,
             include_stop_str_in_output=self.include_stop_str_in_output,
             length_penalty=self.length_penalty,
+            logits_processors=logits_processors,
         )
 
 
@@ -149,6 +167,20 @@ class CompletionRequest(BaseModel):
     def to_sampling_params(self):
         echo_without_generation = self.echo and self.max_tokens == 0
 
+        logits_processors = None
+        if self.logit_bias:
+
+            def logit_bias_logits_processor(
+                    token_ids: List[int],
+                    logits: torch.Tensor) -> torch.Tensor:
+                for token_id, bias in self.logit_bias.items():
+                    # Clamp the bias between -100 and 100 per OpenAI API spec
+                    bias = min(100, max(-100, bias))
+                    logits[int(token_id)] += bias
+                return logits
+
+            logits_processors = [logit_bias_logits_processor]
+
         return SamplingParams(
             n=self.n,
             best_of=self.best_of,
@@ -172,6 +204,7 @@ def to_sampling_params(self):
             spaces_between_special_tokens=(self.spaces_between_special_tokens),
             include_stop_str_in_output=self.include_stop_str_in_output,
             length_penalty=self.length_penalty,
+            logits_processors=logits_processors,
         )
 
 
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index dd152583c2329..5635ac6c9e106 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -39,19 +39,13 @@ async def create_chat_completion(
         See  https://platform.openai.com/docs/api-reference/chat/create
         for the API specification. This API mimics the OpenAI ChatCompletion API.
 
-        NOTE: Currently we do not support the following features:
+        NOTE: Currently we do not support the following feature:
             - function_call (Users should implement this by themselves)
-            - logit_bias (to be supported by vLLM engine)
         """
         error_check_ret = await self._check_model(request)
         if error_check_ret is not None:
             return error_check_ret
 
-        if request.logit_bias is not None and len(request.logit_bias) > 0:
-            # TODO: support logit_bias in vLLM engine.
-            return self.create_error_response(
-                "logit_bias is not currently supported")
-
         try:
             prompt = self.tokenizer.apply_chat_template(
                 conversation=request.messages,
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 667b659f81e9e..610f53549da48 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -264,10 +264,9 @@ async def create_completion(self, request: CompletionRequest,
         See https://platform.openai.com/docs/api-reference/completions/create
         for the API specification. This API mimics the OpenAI Completion API.
 
-        NOTE: Currently we do not support the following features:
+        NOTE: Currently we do not support the following feature:
             - suffix (the language models we currently support do not support
             suffix)
-            - logit_bias (to be supported by vLLM engine)
         """
         error_check_ret = await self._check_model(request)
         if error_check_ret is not None:
@@ -277,9 +276,6 @@ async def create_completion(self, request: CompletionRequest,
         if request.suffix is not None:
             return self.create_error_response(
                 "suffix is not currently supported")
-        if request.logit_bias is not None and len(request.logit_bias) > 0:
-            return self.create_error_response(
-                "logit_bias is not currently supported")
 
         model_name = request.model
         request_id = f"cmpl-{random_uuid()}"

From 8b430d7dea5695324636fc458c1cce52213bd499 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 26 Feb 2024 20:23:50 -0800
Subject: [PATCH 023/196] [Minor] Fix StableLMEpochForCausalLM ->
 StableLmForCausalLM (#3046)

---
 docs/source/models/supported_models.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
index 35b548d2737ce..9d4ec663a16e5 100644
--- a/docs/source/models/supported_models.rst
+++ b/docs/source/models/supported_models.rst
@@ -83,7 +83,7 @@ Alongside each architecture, we include some popular models that use it.
   * - :code:`Qwen2ForCausalLM`
     - Qwen2
     - :code:`Qwen/Qwen2-beta-7B`, :code:`Qwen/Qwen2-beta-7B-Chat`, etc.
-  * - :code:`StableLMEpochForCausalLM`
+  * - :code:`StableLmForCausalLM`
     - StableLM
     - :code:`stabilityai/stablelm-3b-4e1t/` , :code:`stabilityai/stablelm-base-alpha-7b-v2`, etc.
 

From 71bcaf99e2cb2c677bf3a9addb9e8039cbcab22a Mon Sep 17 00:00:00 2001
From: Tao He <sighingnow@gmail.com>
Date: Tue, 27 Feb 2024 17:14:31 +0800
Subject: [PATCH 024/196] Enable GQA support in the prefix prefill kernels
 (#3007)

Signed-off-by: Tao He <sighingnow@gmail.com>
---
 tests/kernels/test_prefix_prefill.py          | 61 +++++++++++++------
 vllm/model_executor/layers/attention.py       | 34 ++++++-----
 .../layers/triton_kernel/prefix_prefill.py    | 39 ++++++++----
 3 files changed, 87 insertions(+), 47 deletions(-)

diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index ac93b32588cca..c068b38a66910 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -8,7 +8,8 @@
 from xformers import ops as xops
 from xformers.ops.fmha.attn_bias import BlockDiagonalCausalFromBottomRightMask
 
-NUM_HEADS = [12]
+NUM_HEADS = [64]
+NUM_QUERIES_PER_KV = [1, 8, 64]
 HEAD_SIZES = [128]
 DTYPES = [torch.float16]
 CUDA_DEVICES = [
@@ -17,12 +18,14 @@
 
 
 @pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("num_queries_per_kv", NUM_HEADS)
 @pytest.mark.parametrize("head_size", HEAD_SIZES)
 @pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 @torch.inference_mode()
 def test_contexted_kv_attention(
     num_heads: int,
+    num_queries_per_kv: int,
     head_size: int,
     dtype: torch.dtype,
     device: str,
@@ -41,28 +44,29 @@ def test_contexted_kv_attention(
     subquery_lens = [random.randint(16, MAX_SEQ_LEN) for _ in range(BS)]
     ctx_lens = [random.randint(16, MAX_CTX_LEN) for _ in range(BS)]
     seq_lens = [a + b for a, b in zip(subquery_lens, ctx_lens)]
+    num_kv_heads = num_heads // num_queries_per_kv
 
     num_tokens = sum(subquery_lens)
     query = torch.empty(num_tokens, num_heads, head_size, dtype=dtype)
     query.uniform_(-1e-3, 1e-3)
     output = torch.empty(num_tokens, num_heads, head_size, dtype=dtype)
 
-    kv = torch.empty(sum(seq_lens), 2, num_heads, head_size, dtype=dtype)
+    kv = torch.empty(sum(seq_lens), 2, num_kv_heads, head_size, dtype=dtype)
     kv.uniform_(-1e-3, 1e-3)
     key, value = kv.unbind(dim=1)
 
     k_cache = torch.zeros(cache_size,
                           block_size,
-                          num_heads,
+                          num_kv_heads,
                           head_size,
                           dtype=dtype)
     v_cache = torch.zeros(cache_size,
                           block_size,
-                          num_heads,
+                          num_kv_heads,
                           head_size,
                           dtype=dtype)
-    k = torch.zeros(sum(subquery_lens), num_heads, head_size, dtype=dtype)
-    v = torch.zeros(sum(subquery_lens), num_heads, head_size, dtype=dtype)
+    k = torch.zeros(sum(subquery_lens), num_kv_heads, head_size, dtype=dtype)
+    v = torch.zeros(sum(subquery_lens), num_kv_heads, head_size, dtype=dtype)
     values = torch.arange(0, cache_size, dtype=torch.long)
     values = values[torch.randperm(cache_size)]
     block_table = values[:BS * max_block_per_request].view(
@@ -93,19 +97,21 @@ def test_contexted_kv_attention(
                 end_loc = start_loc + block_size
             start_slot = block_table[i, block_id] * block_size
             end_slot = start_slot + end_loc - start_loc
-            k_cache.view(-1, num_heads, head_size)[start_slot:end_slot].copy_(
-                key[start_loc:end_loc])
-            v_cache.view(-1, num_heads, head_size)[start_slot:end_slot].copy_(
-                value[start_loc:end_loc])
+            k_cache.view(-1, num_kv_heads,
+                         head_size)[start_slot:end_slot].copy_(
+                             key[start_loc:end_loc])
+            v_cache.view(-1, num_kv_heads,
+                         head_size)[start_slot:end_slot].copy_(
+                             value[start_loc:end_loc])
             cur_ctx += block_size
             block_id += 1
     # transpose K_cache[num_blocks, block_size, num_kv_heads, head_size]
     # to K_cache[num_blocks, num_kv_heads, head_size/8, block_size, 8]
-    k_cache = k_cache.view(-1, block_size, num_heads, head_size // 8,
+    k_cache = k_cache.view(-1, block_size, num_kv_heads, head_size // 8,
                            8).permute(0, 2, 3, 1, 4).contiguous()
     # transpose V_cache[num_blocks, block_size, num_kv_heads, head_size]
     # to V_cache[num_blocks, num_kv_heads, head_size, block_size]
-    v_cache = v_cache.view(-1, block_size, num_heads,
+    v_cache = v_cache.view(-1, block_size, num_kv_heads,
                            head_size).permute(0, 2, 3, 1).contiguous()
 
     # Warm up the Triton kernel by calling it once before actually measuring generation time
@@ -123,12 +129,29 @@ def test_contexted_kv_attention(
 
     attn_op = xops.fmha.cutlass.FwOp()
 
+    if num_kv_heads != num_heads:
+        # As of Nov 2023, xformers only supports MHA. For MQA/GQA,
+        # project the key and value tensors to the desired number of
+        # heads.
+        #
+        # see also: vllm/model_executor/layers/attention.py
+        query = query.view(query.shape[0], num_kv_heads, num_queries_per_kv,
+                           query.shape[-1])
+        key = key[:, :, None, :].expand(key.shape[0], num_kv_heads,
+                                        num_queries_per_kv, key.shape[-1])
+        value = value[:, :,
+                      None, :].expand(value.shape[0], num_kv_heads,
+                                      num_queries_per_kv, value.shape[-1])
+    query = query.unsqueeze(0)
+    key = key.unsqueeze(0)
+    value = value.unsqueeze(0)
+
     attn_bias = BlockDiagonalCausalFromBottomRightMask.from_seqlens(
         subquery_lens, seq_lens)
     output_ref = xops.memory_efficient_attention_forward(
-        query.unsqueeze(0),
-        key.unsqueeze(0),
-        value.unsqueeze(0),
+        query,
+        key,
+        value,
         attn_bias=attn_bias,
         p=0.0,
         scale=scale,
@@ -137,9 +160,9 @@ def test_contexted_kv_attention(
     torch.cuda.synchronize()
     start_time = time.time()
     output_ref = xops.memory_efficient_attention_forward(
-        query.unsqueeze(0),
-        key.unsqueeze(0),
-        value.unsqueeze(0),
+        query,
+        key,
+        value,
         attn_bias=attn_bias,
         p=0.0,
         scale=scale,
@@ -148,5 +171,5 @@ def test_contexted_kv_attention(
     torch.cuda.synchronize()
     end_time = time.time()
     print(f"xformers Time: {(end_time - start_time)*1000:.2f} ms")
-    output_ref = output_ref.squeeze(0)
+    output_ref = output_ref.squeeze(0, 2)
     assert torch.allclose(output_ref, output, atol=1e-6, rtol=0)
diff --git a/vllm/model_executor/layers/attention.py b/vllm/model_executor/layers/attention.py
index 0622a54db1bc0..2a82325b80213 100644
--- a/vllm/model_executor/layers/attention.py
+++ b/vllm/model_executor/layers/attention.py
@@ -137,25 +137,27 @@ def forward(
             )
 
         if input_metadata.is_prompt:
-            # Prompt run.
-            if self.num_kv_heads != self.num_heads:
-                # As of Nov 2023, xformers only supports MHA. For MQA/GQA,
-                # project the key and value tensors to the desired number of
-                # heads.
-                # TODO(woosuk): Use MQA/GQA kernels for higher performance.
-                query = query.view(query.shape[0], self.num_kv_heads,
-                                   self.num_queries_per_kv, query.shape[-1])
-                key = key[:, :,
-                          None, :].expand(key.shape[0], self.num_kv_heads,
-                                          self.num_queries_per_kv,
-                                          key.shape[-1])
-                value = value[:, :, None, :].expand(value.shape[0],
-                                                    self.num_kv_heads,
-                                                    self.num_queries_per_kv,
-                                                    value.shape[-1])
             # normal attention
             if (key_cache is None or value_cache is None
                     or input_metadata.block_tables.numel() == 0):
+                if self.num_kv_heads != self.num_heads:
+                    # As of Nov 2023, xformers only supports MHA. For MQA/GQA,
+                    # project the key and value tensors to the desired number of
+                    # heads.
+                    # TODO(woosuk): Use MQA/GQA kernels for higher performance.
+                    query = query.view(query.shape[0], self.num_kv_heads,
+                                       self.num_queries_per_kv,
+                                       query.shape[-1])
+                    key = key[:, :,
+                              None, :].expand(key.shape[0], self.num_kv_heads,
+                                              self.num_queries_per_kv,
+                                              key.shape[-1])
+                    value = value[:, :,
+                                  None, :].expand(value.shape[0],
+                                                  self.num_kv_heads,
+                                                  self.num_queries_per_kv,
+                                                  value.shape[-1])
+
                 # Set attention bias if not provided. This typically happens at
                 # the very attention layer of every iteration.
                 # FIXME(woosuk): This is a hack.
diff --git a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
index a1a2ab0c4805c..70f09224f1cf6 100644
--- a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
+++ b/vllm/model_executor/layers/triton_kernel/prefix_prefill.py
@@ -45,6 +45,7 @@ def _fwd_kernel(
         stride_v_cache_h,
         stride_v_cache_d,
         stride_v_cache_bl,
+        num_queries_per_kv: int,
         BLOCK_M: tl.constexpr,
         BLOCK_DMODEL: tl.constexpr,
         BLOCK_N: tl.constexpr,
@@ -53,6 +54,8 @@ def _fwd_kernel(
         cur_head = tl.program_id(1)
         start_m = tl.program_id(2)
 
+        cur_kv_head = cur_head // num_queries_per_kv
+
         cur_batch_ctx_len = tl.load(B_Ctxlen + cur_batch)
         cur_batch_seq_len = tl.load(B_Seqlen + cur_batch)
         cur_batch_in_all_start_index = tl.load(B_Start_Loc + cur_batch)
@@ -85,13 +88,14 @@ def _fwd_kernel(
                          mask=(start_n + offs_n) < cur_batch_ctx_len,
                          other=0)
             off_k = (bn[None, :] * stride_k_cache_bs +
-                     cur_head * stride_k_cache_h +
+                     cur_kv_head * stride_k_cache_h +
                      (offs_d[:, None] // x) * stride_k_cache_d +
                      ((start_n + offs_n[None, :]) % block_size) *
                      stride_k_cache_bl +
                      (offs_d[:, None] % x) * stride_k_cache_x)
             off_v = (
-                bn[:, None] * stride_v_cache_bs + cur_head * stride_v_cache_h +
+                bn[:, None] * stride_v_cache_bs +
+                cur_kv_head * stride_v_cache_h +
                 offs_d[None, :] * stride_v_cache_d +
                 (start_n + offs_n[:, None]) % block_size * stride_v_cache_bl)
             k = tl.load(K_cache + off_k,
@@ -131,9 +135,9 @@ def _fwd_kernel(
             l_i = l_i_new
             m_i = m_i_new
 
-        off_k = (offs_n[None, :] * stride_kbs + cur_head * stride_kh +
+        off_k = (offs_n[None, :] * stride_kbs + cur_kv_head * stride_kh +
                  offs_d[:, None] * stride_kd)
-        off_v = (offs_n[:, None] * stride_vbs + cur_head * stride_vh +
+        off_v = (offs_n[:, None] * stride_vbs + cur_kv_head * stride_vh +
                  offs_d[None, :] * stride_vd)
         k_ptrs = K + off_k
         v_ptrs = V + off_v
@@ -232,6 +236,7 @@ def _fwd_kernel_flash_attn_v2(
         stride_v_cache_h,
         stride_v_cache_d,
         stride_v_cache_bl,
+        num_queries_per_kv: int,
         BLOCK_M: tl.constexpr,
         BLOCK_DMODEL: tl.constexpr,
         BLOCK_N: tl.constexpr,
@@ -240,6 +245,8 @@ def _fwd_kernel_flash_attn_v2(
         cur_head = tl.program_id(1)
         start_m = tl.program_id(2)
 
+        cur_kv_head = cur_head // num_queries_per_kv
+
         cur_batch_ctx_len = tl.load(B_Ctxlen + cur_batch)
         cur_batch_seq_len = tl.load(B_Seqlen + cur_batch)
         cur_batch_in_all_start_index = tl.load(B_Start_Loc + cur_batch)
@@ -272,13 +279,14 @@ def _fwd_kernel_flash_attn_v2(
                          mask=(start_n + offs_n) < cur_batch_ctx_len,
                          other=0)
             off_k = (bn[None, :] * stride_k_cache_bs +
-                     cur_head * stride_k_cache_h +
+                     cur_kv_head * stride_k_cache_h +
                      (offs_d[:, None] // x) * stride_k_cache_d +
                      ((start_n + offs_n[None, :]) % block_size) *
                      stride_k_cache_bl +
                      (offs_d[:, None] % x) * stride_k_cache_x)
             off_v = (
-                bn[:, None] * stride_v_cache_bs + cur_head * stride_v_cache_h +
+                bn[:, None] * stride_v_cache_bs +
+                cur_kv_head * stride_v_cache_h +
                 offs_d[None, :] * stride_v_cache_d +
                 (start_n + offs_n[:, None]) % block_size * stride_v_cache_bl)
             k = tl.load(K_cache + off_k,
@@ -317,9 +325,9 @@ def _fwd_kernel_flash_attn_v2(
             l_i = l_i_new
             m_i = m_i_new
 
-        off_k = (offs_n[None, :] * stride_kbs + cur_head * stride_kh +
+        off_k = (offs_n[None, :] * stride_kbs + cur_kv_head * stride_kh +
                  offs_d[:, None] * stride_kd)
-        off_v = (offs_n[:, None] * stride_vbs + cur_head * stride_vh +
+        off_v = (offs_n[:, None] * stride_vbs + cur_kv_head * stride_vh +
                  offs_d[None, :] * stride_vd)
         k_ptrs = K + off_k
         v_ptrs = V + off_v
@@ -420,6 +428,7 @@ def _fwd_kernel_alibi(
         stride_v_cache_h,
         stride_v_cache_d,
         stride_v_cache_bl,
+        num_queries_per_kv: int,
         BLOCK_M: tl.constexpr,
         BLOCK_DMODEL: tl.constexpr,
         BLOCK_N: tl.constexpr,
@@ -429,6 +438,8 @@ def _fwd_kernel_alibi(
         cur_head = tl.program_id(1)
         start_m = tl.program_id(2)
 
+        cur_kv_head = cur_head // num_queries_per_kv
+
         # cur_batch_seq_len: the length of prompts
         # cur_batch_ctx_len: the length of prefix
         # cur_batch_in_all_start_index: the start id of the dim=0
@@ -468,13 +479,14 @@ def _fwd_kernel_alibi(
                          mask=(start_n + offs_n) < cur_batch_ctx_len,
                          other=0)
             off_k = (bn[None, :] * stride_k_cache_bs +
-                     cur_head * stride_k_cache_h +
+                     cur_kv_head * stride_k_cache_h +
                      (offs_d[:, None] // x) * stride_k_cache_d +
                      ((start_n + offs_n[None, :]) % block_size) *
                      stride_k_cache_bl +
                      (offs_d[:, None] % x) * stride_k_cache_x)
             off_v = (
-                bn[:, None] * stride_v_cache_bs + cur_head * stride_v_cache_h +
+                bn[:, None] * stride_v_cache_bs +
+                cur_kv_head * stride_v_cache_h +
                 offs_d[None, :] * stride_v_cache_d +
                 (start_n + offs_n[:, None]) % block_size * stride_v_cache_bl)
             k = tl.load(K_cache + off_k,
@@ -522,9 +534,9 @@ def _fwd_kernel_alibi(
             l_i = l_i_new
             m_i = m_i_new
 
-        off_k = (offs_n[None, :] * stride_kbs + cur_head * stride_kh +
+        off_k = (offs_n[None, :] * stride_kbs + cur_kv_head * stride_kh +
                  offs_d[:, None] * stride_kd)
-        off_v = (offs_n[:, None] * stride_vbs + cur_head * stride_vh +
+        off_v = (offs_n[:, None] * stride_vbs + cur_kv_head * stride_vh +
                  offs_d[None, :] * stride_vd)
         k_ptrs = K + off_k
         v_ptrs = V + off_v
@@ -628,6 +640,7 @@ def context_attention_fwd(q,
 
         sm_scale = 1.0 / (Lq**0.5)
         batch, head = b_seq_len.shape[0], q.shape[1]
+        num_queries_per_kv = q.shape[1] // k.shape[1]
 
         grid = (batch, head, triton.cdiv(max_input_len, BLOCK))  # batch, head,
 
@@ -674,6 +687,7 @@ def context_attention_fwd(q,
                 v_cache.stride(2),
                 v_cache.stride(
                     3),  #[num_blocks, num_kv_heads, head_size, block_size]
+                num_queries_per_kv=num_queries_per_kv,
                 BLOCK_M=BLOCK,
                 BLOCK_DMODEL=Lk,
                 BLOCK_N=BLOCK,
@@ -721,6 +735,7 @@ def context_attention_fwd(q,
             v_cache.stride(2),
             v_cache.stride(
                 3),  #[num_blocks, num_kv_heads, head_size, block_size]
+            num_queries_per_kv=num_queries_per_kv,
             BLOCK_M=BLOCK,
             BLOCK_DMODEL=Lk,
             BLOCK_N=BLOCK,

From a8683102cc0ab9c1a0c3ae1ba2b7954f78eba1b3 Mon Sep 17 00:00:00 2001
From: Ganesh Jagadeesan <ganesh.jcs@gmail.com>
Date: Wed, 28 Feb 2024 00:26:15 -0500
Subject: [PATCH 025/196] multi-lora documentation fix (#3064)

---
 docs/source/models/lora.rst | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/source/models/lora.rst b/docs/source/models/lora.rst
index 1910f26506611..21b18c75fc552 100644
--- a/docs/source/models/lora.rst
+++ b/docs/source/models/lora.rst
@@ -58,7 +58,7 @@ LoRA adapted models can also be served with the Open-AI compatible vLLM server.
 
 .. code-block:: bash
 
-    python -m vllm.entrypoints.api_server \
+    python -m vllm.entrypoints.openai.api_server \
         --model meta-llama/Llama-2-7b-hf \
         --enable-lora \
         --lora-modules sql-lora=~/.cache/huggingface/hub/models--yard1--llama-2-7b-sql-lora-test/
@@ -89,3 +89,15 @@ with its base model:
 Requests can specify the LoRA adapter as if it were any other model via the ``model`` request parameter. The requests will be
 processed according to the server-wide LoRA configuration (i.e. in parallel with base model requests, and potentially other
 LoRA adapter requests if they were provided and ``max_loras`` is set high enough).
+
+The following is an example request 
+
+.. code-block::bash 
+    curl http://localhost:8000/v1/completions \
+        -H "Content-Type: application/json" \
+        -d '{
+            "model": "sql-lora",
+            "prompt": "San Francisco is a",
+            "max_tokens": 7,
+            "temperature": 0
+        }' | jq

From e46fa5d52e02ee48d5fdd12b35e39993008b4bd6 Mon Sep 17 00:00:00 2001
From: "Allen.Dou" <allen.dou@hotmail.com>
Date: Wed, 28 Feb 2024 13:38:26 +0800
Subject: [PATCH 026/196] Restrict prometheus_client >= 0.18.0 to prevent
 errors when importing pkgs (#3070)

---
 requirements-neuron.txt | 2 +-
 requirements-rocm.txt   | 2 +-
 requirements.txt        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements-neuron.txt b/requirements-neuron.txt
index 36e629add664d..858472c20ca8c 100644
--- a/requirements-neuron.txt
+++ b/requirements-neuron.txt
@@ -6,4 +6,4 @@ neuronx-cc
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
-prometheus_client
+prometheus_client >= 0.18.0
diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index e759ba7d028d9..53bd11de7c9de 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -10,4 +10,4 @@ transformers >= 4.38.0  # Required for Gemma.
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
-prometheus_client
+prometheus_client >= 0.18.0
diff --git a/requirements.txt b/requirements.txt
index de93ba6354cda..d4599ec95d945 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,7 @@ xformers == 0.0.23.post1  # Required for CUDA 12.1.
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
-prometheus_client
+prometheus_client >= 0.18.0
 pynvml == 11.5.0
 triton >= 2.1.0
 cupy-cuda12x == 12.1.0  # Required for CUDA graphs. CUDA 11.8 users should install cupy-cuda11x instead.

From 3b7178cfa4a317922d4aef9dd3b2647b8d950e7d Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfc@amazon.com>
Date: Wed, 28 Feb 2024 09:34:34 -0800
Subject: [PATCH 027/196] [Neuron] Support inference with transformers-neuronx
 (#2569)

---
 examples/offline_inference_neuron.py       |  33 ++++
 tests/lora/conftest.py                     |   8 +-
 vllm/config.py                             |  41 ++++-
 vllm/engine/arg_utils.py                   |  16 +-
 vllm/engine/llm_engine.py                  |  21 ++-
 vllm/lora/layers.py                        |   4 +
 vllm/model_executor/__init__.py            |   3 +-
 vllm/model_executor/layers/sampler.py      |  18 +-
 vllm/model_executor/model_loader.py        |  10 +-
 vllm/model_executor/models/__init__.py     |  12 +-
 vllm/model_executor/models/neuron/llama.py |  79 +++++++++
 vllm/model_executor/neuron_model_loader.py |  66 +++++++
 vllm/model_executor/sampling_metadata.py   |   4 +-
 vllm/model_executor/utils.py               |  17 ++
 vllm/utils.py                              |   8 +
 vllm/worker/cache_engine.py                |  11 +-
 vllm/worker/model_runner.py                |  16 +-
 vllm/worker/neuron_worker.py               | 191 +++++++++++++++++++++
 18 files changed, 516 insertions(+), 42 deletions(-)
 create mode 100644 examples/offline_inference_neuron.py
 create mode 100644 vllm/model_executor/models/neuron/llama.py
 create mode 100644 vllm/model_executor/neuron_model_loader.py
 create mode 100644 vllm/worker/neuron_worker.py

diff --git a/examples/offline_inference_neuron.py b/examples/offline_inference_neuron.py
new file mode 100644
index 0000000000000..9b9dc4d94892f
--- /dev/null
+++ b/examples/offline_inference_neuron.py
@@ -0,0 +1,33 @@
+from vllm import LLM, SamplingParams
+
+# Sample prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+# Create a sampling params object.
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+# Create an LLM.
+llm = LLM(
+    model="openlm-research/open_llama_3b",
+    max_num_seqs=8,
+    # The max_model_len and block_size arguments are required to be same as max sequence length,
+    # when targeting neuron device. Currently, this is a known limitation in continuous batching
+    # support in transformers-neuronx.
+    # TODO(liangfu): Support paged-attention in transformers-neuronx.
+    max_model_len=128,
+    block_size=128,
+    # The device can be automatically detected when AWS Neuron SDK is installed.
+    # The device argument can be either unspecified for automated detection, or explicitly assigned.
+    device="neuron")
+# Generate texts from the prompts. The output is a list of RequestOutput objects
+# that contain the prompt, generated text, and other information.
+outputs = llm.generate(prompts, sampling_params)
+# Print the outputs.
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs[0].text
+    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
index 0ca0715334c25..75f4e41290c36 100644
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -131,9 +131,11 @@ def llama_2_7b_engine_extra_embeddings() -> nn.Module:
     cleanup()
     get_model_old = get_model
 
-    def get_model_patched(model_config, device_config, lora_config=None):
-        return get_model_old(model_config, device_config,
-                             LoRAConfig(max_loras=4, max_lora_rank=8))
+    def get_model_patched(model_config, device_config, **kwargs):
+        return get_model_old(model_config,
+                             device_config,
+                             lora_config=LoRAConfig(max_loras=4,
+                                                    max_lora_rank=8))
 
     with patch("vllm.worker.model_runner.get_model", get_model_patched):
         engine = vllm.LLM("meta-llama/Llama-2-7b-hf", enable_lora=False)
diff --git a/vllm/config.py b/vllm/config.py
index bd0dc89b585f7..fc848b72d7f2a 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -8,7 +8,7 @@
 
 from vllm.logger import init_logger
 from vllm.transformers_utils.config import get_config
-from vllm.utils import get_cpu_memory, is_hip, get_nvcc_cuda_version
+from vllm.utils import get_cpu_memory, is_hip, is_neuron, get_nvcc_cuda_version
 
 logger = init_logger(__name__)
 
@@ -380,13 +380,21 @@ def __init__(
         disable_custom_all_reduce: bool = False,
     ) -> None:
         self.pipeline_parallel_size = pipeline_parallel_size
-        self.tensor_parallel_size = tensor_parallel_size
+        if is_neuron():
+            # For Neuron device support, here we assign TP=1 to avoid sharding within vLLM directly.
+            # Transformer-neuronx would take neuron_tp_degree attribute, and distribute the workload
+            # to multiple NeuronCores.
+            self.tensor_parallel_size = 1
+            self.neuron_tp_degree = tensor_parallel_size
+        else:
+            self.tensor_parallel_size = tensor_parallel_size
         self.worker_use_ray = worker_use_ray
         self.max_parallel_loading_workers = max_parallel_loading_workers
         self.disable_custom_all_reduce = disable_custom_all_reduce
 
-        self.world_size = pipeline_parallel_size * tensor_parallel_size
-        if self.world_size > 1:
+        self.world_size = pipeline_parallel_size * self.tensor_parallel_size
+        # Ray worker is not supported for Neuron backend.
+        if self.world_size > 1 and not is_neuron():
             self.worker_use_ray = True
         self._verify_args()
 
@@ -465,8 +473,29 @@ def _verify_args(self) -> None:
 
 class DeviceConfig:
 
-    def __init__(self, device: str = "cuda") -> None:
-        self.device = torch.device(device)
+    def __init__(self, device: str = "auto") -> None:
+        if device == "auto":
+            # Automated device type detection
+            if torch.cuda.is_available():
+                self.device_type = "cuda"
+            elif is_neuron():
+                self.device_type = "neuron"
+            else:
+                raise RuntimeError("No supported device detected.")
+        else:
+            # Device type is assigned explicitly
+            self.device_type = device
+
+        # Some device types require processing inputs on CPU
+        if self.device_type in ["neuron"]:
+            self.device = torch.device("cpu")
+        else:
+            # Set device with device type
+            self.device = torch.device(self.device_type)
+
+    @property
+    def is_neuron(self):
+        return self.device_type == "neuron"
 
 
 @dataclass
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index a4efd171b871d..c01e7311fb89a 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -44,7 +44,7 @@ class EngineArgs:
     lora_extra_vocab_size: int = 256
     lora_dtype = 'auto'
     max_cpu_loras: Optional[int] = None
-    device: str = 'cuda'
+    device: str = 'auto'
 
     def __post_init__(self):
         if self.tokenizer is None:
@@ -171,7 +171,7 @@ def add_cli_args(
         parser.add_argument('--block-size',
                             type=int,
                             default=EngineArgs.block_size,
-                            choices=[8, 16, 32],
+                            choices=[8, 16, 32, 128],
                             help='token block size')
         parser.add_argument('--seed',
                             type=int,
@@ -264,13 +264,11 @@ def add_cli_args(
             help=('Maximum number of LoRAs to store in CPU memory. '
                   'Must be >= than max_num_seqs. '
                   'Defaults to max_num_seqs.'))
-        parser.add_argument(
-            "--device",
-            type=str,
-            default=EngineArgs.device,
-            choices=["cuda"],
-            help=('Device type for vLLM execution. '
-                  'Currently, only CUDA-compatible devices are supported.'))
+        parser.add_argument("--device",
+                            type=str,
+                            default=EngineArgs.device,
+                            choices=["auto", "cuda", "neuron"],
+                            help='Device type for vLLM execution.')
         return parser
 
     @classmethod
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index f5b2145c22d6f..f0fd7efdef813 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -3,6 +3,7 @@
 import os
 import time
 import pickle
+import importlib
 from typing import (TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple,
                     Union)
 
@@ -20,7 +21,8 @@
                            SequenceGroupOutput, SequenceOutput, SequenceStatus)
 from vllm.transformers_utils.tokenizer import (detokenize_incrementally,
                                                TokenizerGroup)
-from vllm.utils import Counter, set_cuda_visible_devices, get_ip, get_open_port, get_distributed_init_method
+from vllm.utils import (Counter, set_cuda_visible_devices, get_ip,
+                        get_open_port, get_distributed_init_method)
 
 if ray:
     from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
@@ -31,6 +33,12 @@
 logger = init_logger(__name__)
 _LOCAL_LOGGING_INTERVAL_SEC = 5
 
+# A map between the device type (in device config) to its worker module.
+DEVICE_TO_WORKER_MODULE_MAP = {
+    "cuda": "vllm.worker.worker",
+    "neuron": "vllm.worker.neuron_worker",
+}
+
 # If the env var is set, it uses the Ray's compiled DAG API
 # which optimizes the control plane overhead.
 # Run VLLM with VLLM_USE_RAY_COMPILED_DAG=1 to enable it.
@@ -138,10 +146,17 @@ def __init__(
     def get_tokenizer_for_seq(self, sequence: Sequence):
         return self.tokenizer.get_lora_tokenizer(sequence.lora_request)
 
+    def _dispatch_worker(self):
+        worker_module = DEVICE_TO_WORKER_MODULE_MAP[
+            self.device_config.device_type]
+        imported_worker = importlib.import_module(worker_module)
+        Worker = imported_worker.Worker
+        return Worker
+
     def _init_workers(self):
         # Lazy import the Worker to avoid importing torch.cuda/xformers
         # before CUDA_VISIBLE_DEVICES is set in the Worker
-        from vllm.worker.worker import Worker
+        Worker = self._dispatch_worker()
 
         assert self.parallel_config.world_size == 1, (
             "Ray is required if parallel_config.world_size > 1.")
@@ -243,7 +258,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
 
         # Lazy import the Worker to avoid importing torch.cuda/xformers
         # before CUDA_VISIBLE_DEVICES is set in the Worker
-        from vllm.worker.worker import Worker
+        Worker = self._dispatch_worker()
 
         # Initialize torch distributed process group for the workers.
         model_config = copy.deepcopy(self.model_config)
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index e1aac20b038b4..e667d70f71e39 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -795,6 +795,10 @@ def __init__(
         self.dtype = dtype
         self.device = device
 
+    @property
+    def logits_as_hidden_states(self):
+        return self.base_layer.logits_as_hidden_states
+
     @property
     def vocab_size(self):
         return self.base_layer.vocab_size
diff --git a/vllm/model_executor/__init__.py b/vllm/model_executor/__init__.py
index 0d5b2004ad7cb..cd6dbde5f54cf 100644
--- a/vllm/model_executor/__init__.py
+++ b/vllm/model_executor/__init__.py
@@ -1,7 +1,6 @@
 from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.model_loader import get_model
 from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.model_executor.utils import set_random_seed
+from vllm.model_executor.utils import set_random_seed, get_model
 
 __all__ = [
     "InputMetadata",
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index 884d84387e505..71655b216fb3d 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -10,6 +10,7 @@
 from vllm.sampling_params import SamplingParams, SamplingType
 from vllm.sequence import (PromptLogprobs, SampleLogprobs, SamplerOutput,
                            SequenceData, SequenceGroupOutput, SequenceOutput)
+from vllm.utils import is_neuron
 
 
 class Sampler(nn.Module):
@@ -32,6 +33,8 @@ def __init__(self,
                  org_vocab_size: Optional[int] = None) -> None:
         super().__init__()
         self.vocab_size = vocab_size
+        # Transformers-neuronx generate outputs as logits directly.
+        self.logits_as_hidden_states = is_neuron()
         # original vocabulary size (without LoRA).
         self.org_vocab_size = org_vocab_size or vocab_size
 
@@ -55,10 +58,14 @@ def forward(
         embedding_bias: Optional[torch.Tensor] = None,
     ) -> Optional[SamplerOutput]:
         # Get the hidden states that we use for sampling.
-        hidden_states = _prune_hidden_states(hidden_states, sampling_metadata)
+        if self.logits_as_hidden_states:
+            logits = hidden_states
+        else:
+            hidden_states = _prune_hidden_states(hidden_states,
+                                                 sampling_metadata)
 
-        # Get the logits for the next tokens.
-        logits = self._get_logits(hidden_states, embedding, embedding_bias)
+            # Get the logits for the next tokens.
+            logits = self._get_logits(hidden_states, embedding, embedding_bias)
 
         # Only perform sampling in the driver worker.
         # Note: `_get_logits` is still distributed across TP workers because
@@ -395,7 +402,8 @@ def _sample(
         sample_metadata[sampling_type] = (seq_group_ids, seq_groups,
                                           is_prompts, sample_indices)
         if sampling_type == SamplingType.GREEDY:
-            greedy_samples = torch.argmax(logprobs[sample_indices], dim=-1)
+            greedy_samples = torch.argmax(logprobs[sample_indices.long()],
+                                          dim=-1)
         elif sampling_type in (SamplingType.RANDOM, SamplingType.RANDOM_SEED):
             max_best_of = 1
             for seq_group, is_prompt in zip(seq_groups, is_prompts):
@@ -407,7 +415,7 @@ def _sample(
                 "generators": sampling_metadata.generators,
             }
             multinomial_samples[sampling_type] = _multinomial(
-                probs[sample_indices], max_best_of, **seeded_args)
+                probs[sample_indices.long()], max_best_of, **seeded_args)
         elif sampling_type == SamplingType.BEAM:
             beam_search_logprobs = logprobs[sample_indices]
         else:
diff --git a/vllm/model_executor/model_loader.py b/vllm/model_executor/model_loader.py
index ebe092b5d62ba..cb64d80c8147d 100644
--- a/vllm/model_executor/model_loader.py
+++ b/vllm/model_executor/model_loader.py
@@ -1,11 +1,11 @@
 """Utilities for selecting and loading models."""
 import contextlib
-from typing import Optional, Type
+from typing import Type
 
 import torch
 import torch.nn as nn
 
-from vllm.config import DeviceConfig, ModelConfig, LoRAConfig
+from vllm.config import DeviceConfig, ModelConfig
 from vllm.model_executor.models import ModelRegistry
 from vllm.model_executor.weight_utils import (get_quant_config,
                                               initialize_dummy_weights)
@@ -37,9 +37,9 @@ def _get_model_architecture(model_config: ModelConfig) -> Type[nn.Module]:
         f"Supported architectures: {ModelRegistry.get_supported_archs()}")
 
 
-def get_model(model_config: ModelConfig,
-              device_config: DeviceConfig,
-              lora_config: Optional[LoRAConfig] = None) -> nn.Module:
+def get_model(model_config: ModelConfig, device_config: DeviceConfig,
+              **kwargs) -> nn.Module:
+    lora_config = kwargs.get("lora_config", None)
     model_class = _get_model_architecture(model_config)
 
     # Get the (maybe quantized) linear method.
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index 66d28207d664f..e4f3a785cd99a 100644
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 from vllm.logger import init_logger
-from vllm.utils import is_hip
+from vllm.utils import is_hip, is_neuron
 
 logger = init_logger(__name__)
 
@@ -61,6 +61,9 @@
     "Sliding window attention is not yet supported in ROCm's flash attention",
 }
 
+# Models not supported by Neuron.
+_NEURON_SUPPORTED_MODELS = {"LlamaForCausalLM": "neuron.llama"}
+
 
 class ModelRegistry:
 
@@ -77,8 +80,15 @@ def load_model_cls(model_arch: str) -> Optional[Type[nn.Module]]:
                 logger.warning(
                     f"Model architecture {model_arch} is partially supported "
                     "by ROCm: " + _ROCM_PARTIALLY_SUPPORTED_MODELS[model_arch])
+        elif is_neuron():
+            if model_arch not in _NEURON_SUPPORTED_MODELS:
+                raise ValueError(
+                    f"Model architecture {model_arch} is not supported by "
+                    "Neuron for now.")
 
         module_name, model_cls_name = _MODELS[model_arch]
+        if is_neuron():
+            module_name = _NEURON_SUPPORTED_MODELS[model_arch]
         module = importlib.import_module(
             f"vllm.model_executor.models.{module_name}")
         return getattr(module, model_cls_name, None)
diff --git a/vllm/model_executor/models/neuron/llama.py b/vllm/model_executor/models/neuron/llama.py
new file mode 100644
index 0000000000000..e2856da99d9b1
--- /dev/null
+++ b/vllm/model_executor/models/neuron/llama.py
@@ -0,0 +1,79 @@
+"""Inference-only LLaMA model compatible with HuggingFace weights."""
+import os
+from typing import List, Optional, Tuple
+
+import torch
+from torch import nn
+from transformers import LlamaConfig
+
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+from vllm.sequence import SamplerOutput
+
+KVCache = Tuple[torch.Tensor, torch.Tensor]
+
+
+class LlamaForCausalLM(nn.Module):
+
+    def __init__(
+        self,
+        config: LlamaConfig,
+        linear_method=None,
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.linear_method = linear_method
+        self.model = None
+        self.sampler = Sampler(config.vocab_size)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        with torch.inference_mode():
+            block_size = self.model.context_buckets[-1]
+            if input_metadata.is_prompt:
+                seq_ids = input_metadata.slot_mapping[:, 0] // block_size
+            else:
+                seq_ids = input_metadata.block_tables
+            logits = self.model(input_ids,
+                                cache_ids=positions,
+                                start_ids=seq_ids.flatten())
+        return logits
+
+    def sample(
+        self,
+        hidden_states: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        next_tokens = self.sampler(self.model.chkpt_model.lm_head,
+                                   hidden_states, sampling_metadata)
+        return next_tokens
+
+    def load_weights(self,
+                     model_name_or_path: str,
+                     cache_dir: Optional[str] = None,
+                     load_format: str = "auto",
+                     revision: Optional[str] = None,
+                     **kwargs):
+        from transformers_neuronx.llama.model import LlamaForSampling
+
+        split_model_dir = f"{model_name_or_path}-split"
+        if os.path.isdir(os.path.join(model_name_or_path,
+                                      "pytorch_model.bin")):
+            split_model_dir = model_name_or_path
+        elif not os.path.exists(f"{model_name_or_path}-split"):
+            from transformers.models.llama import LlamaForCausalLM
+            from transformers_neuronx.module import save_pretrained_split
+
+            hf_model = LlamaForCausalLM.from_pretrained(model_name_or_path,
+                                                        low_cpu_mem_usage=True)
+            save_pretrained_split(hf_model, f"{model_name_or_path}-split")
+
+        self.model = LlamaForSampling.from_pretrained(split_model_dir,
+                                                      **kwargs)
+        self.model.to_neuron()
diff --git a/vllm/model_executor/neuron_model_loader.py b/vllm/model_executor/neuron_model_loader.py
new file mode 100644
index 0000000000000..b8d63d4ff12fc
--- /dev/null
+++ b/vllm/model_executor/neuron_model_loader.py
@@ -0,0 +1,66 @@
+"""Utilities for selecting and loading models."""
+from typing import Type
+
+import torch
+import torch.nn as nn
+from transformers import PretrainedConfig
+
+from vllm.config import ModelConfig, DeviceConfig
+from vllm.model_executor.models import ModelRegistry
+
+TORCH_DTYPE_TO_NEURON_AMP = {
+    "auto": "f32",
+    "half": "f16",
+    "float16": "f16",
+    "bfloat16": "bf16",
+    "float": "f32",
+    "float32": "f32",
+    torch.float16: "f16",
+    torch.bfloat16: "bf16",
+    torch.float32: "f32",
+}
+
+
+def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
+    architectures = getattr(config, "architectures", [])
+    for arch in architectures:
+        model_cls = ModelRegistry.load_model_cls(arch)
+        if model_cls is not None:
+            return model_cls
+    raise ValueError(
+        f"Model architectures {architectures} are not supported for now. "
+        f"Supported architectures: {ModelRegistry.get_supported_archs()}")
+
+
+def get_model(model_config: ModelConfig, device_config: DeviceConfig,
+              **kwargs) -> nn.Module:
+    from transformers_neuronx.config import NeuronConfig, ContinuousBatchingConfig
+
+    parallel_config = kwargs.get("parallel_config")
+    scheduler_config = kwargs.get("scheduler_config")
+
+    model_class = _get_model_architecture(model_config.hf_config)
+    linear_method = None
+
+    # Create a model instance.
+    model = model_class(model_config.hf_config, linear_method)
+
+    continuous_batching_config = ContinuousBatchingConfig(
+        batch_size_for_shared_caches=scheduler_config.max_num_seqs)
+    neuron_config = NeuronConfig(
+        continuous_batching=continuous_batching_config)
+
+    # Load the weights from the cached or downloaded files.
+    model.load_weights(
+        model_config.model,
+        model_config.download_dir,
+        model_config.load_format,
+        model_config.revision,
+        tp_degree=parallel_config.neuron_tp_degree,
+        amp=TORCH_DTYPE_TO_NEURON_AMP[model_config.dtype],
+        neuron_config=neuron_config,
+        context_length_estimate=[scheduler_config.max_model_len],
+        n_positions=[scheduler_config.max_model_len],
+        batch_size=scheduler_config.max_num_seqs)
+
+    return model.eval()
diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
index d0ffeecd2d74d..7deb80801856e 100644
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@@ -5,7 +5,7 @@
 
 from vllm.sampling_params import SamplingParams, SamplingType
 from vllm.sequence import SequenceData
-from vllm.utils import in_wsl
+from vllm.utils import in_wsl, is_neuron
 
 _SAMPLING_EPS = 1e-5
 
@@ -155,7 +155,7 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
                    dtype: torch.dtype) -> "SamplingTensors":
         # Note that the performance will be very bad without
         # pinned memory.
-        pin_memory = not in_wsl()
+        pin_memory = not in_wsl() and not is_neuron()
         prompt_max_len = max(len(tokens) for tokens in prompt_tokens)
         prompt_padded_tokens = [
             tokens + [vocab_size] * (prompt_max_len - len(tokens))
diff --git a/vllm/model_executor/utils.py b/vllm/model_executor/utils.py
index 336bc1cd005cf..0113e3edf0675 100644
--- a/vllm/model_executor/utils.py
+++ b/vllm/model_executor/utils.py
@@ -1,10 +1,18 @@
 """Utils for model executor."""
 import random
+import importlib
 from typing import Any, Dict, Optional
 
 import numpy as np
 import torch
 
+from vllm.config import DeviceConfig, ModelConfig
+
+DEVICE_TO_MODEL_LOADER_MAP = {
+    "cuda": "model_loader",
+    "neuron": "neuron_model_loader",
+}
+
 
 def set_random_seed(seed: int) -> None:
     random.seed(seed)
@@ -33,3 +41,12 @@ def set_weight_attrs(
         assert not hasattr(
             weight, key), (f"Overwriting existing tensor attribute: {key}")
         setattr(weight, key, value)
+
+
+def get_model(model_config: ModelConfig, device_config: DeviceConfig,
+              **kwargs) -> torch.nn.Module:
+    model_loader_module = DEVICE_TO_MODEL_LOADER_MAP[device_config.device_type]
+    imported_model_loader = importlib.import_module(
+        f"vllm.model_executor.{model_loader_module}")
+    get_model_fn = imported_model_loader.get_model
+    return get_model_fn(model_config, device_config, **kwargs)
diff --git a/vllm/utils.py b/vllm/utils.py
index c8ac57de6f5f5..a4f9bfe6aac99 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -118,6 +118,14 @@ def is_hip() -> bool:
     return torch.version.hip is not None
 
 
+def is_neuron() -> bool:
+    try:
+        import transformers_neuronx
+    except ImportError:
+        transformers_neuronx = None
+    return transformers_neuronx is not None
+
+
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
     """Returns the maximum shared memory per thread block in bytes."""
     # NOTE: This import statement should be executed lazily since
diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
index bbe33989fc2a4..880299783935c 100644
--- a/vllm/worker/cache_engine.py
+++ b/vllm/worker/cache_engine.py
@@ -3,10 +3,9 @@
 
 import torch
 
-from vllm._C import cache_ops
 from vllm.config import CacheConfig, ModelConfig, ParallelConfig
 from vllm.logger import init_logger
-from vllm.utils import in_wsl, STR_DTYPE_TO_TORCH_DTYPE
+from vllm.utils import in_wsl, is_neuron, STR_DTYPE_TO_TORCH_DTYPE
 
 logger = init_logger(__name__)
 
@@ -39,6 +38,10 @@ def __init__(
         self.num_gpu_blocks = cache_config.num_gpu_blocks
         self.num_cpu_blocks = cache_config.num_cpu_blocks
 
+        # Skip initializing CUDA stream and buffer for Neuron backend.
+        if is_neuron():
+            return
+
         if cache_config.cache_dtype == "auto":
             self.dtype = model_config.dtype
         else:
@@ -121,6 +124,8 @@ def _swap(
         dst: List[KVCache],
         src_to_dst: Dict[int, int],
     ) -> None:
+        from vllm._C import cache_ops
+
         with torch.cuda.stream(self.cache_stream):
             for i in range(self.num_layers):
                 src_key_cache, src_value_cache = src[i]
@@ -140,6 +145,8 @@ def swap_out(self, src_to_dst: Dict[int, int]) -> None:
         self._swap(self.gpu_cache, self.cpu_cache, src_to_dst)
 
     def copy(self, src_to_dsts: Dict[int, List[int]]) -> None:
+        from vllm._C import cache_ops
+
         key_caches = [key_cache for key_cache, _ in self.gpu_cache]
         value_caches = [value_cache for _, value_cache in self.gpu_cache]
         # NOTE(woosuk): This operation implicitly synchronizes the CPU and GPU.
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index b99a409e02d1e..efe570778fb43 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -80,9 +80,16 @@ def __init__(
         self.in_wsl = in_wsl()
         self.kv_cache_dtype = kv_cache_dtype
 
+        # Set enforce_eager to True for Neuron backend, to avoid capturing graph
+        if self.device_config.is_neuron:
+            self.model_config.enforce_eager = True
+
     def load_model(self) -> None:
-        self.model = get_model(self.model_config, self.device_config,
-                               self.lora_config)
+        self.model = get_model(self.model_config,
+                               self.device_config,
+                               lora_config=self.lora_config,
+                               parallel_config=self.parallel_config,
+                               scheduler_config=self.scheduler_config)
 
         vocab_size = self.model.config.vocab_size
 
@@ -393,6 +400,7 @@ def _prepare_sample(
         selected_token_start_idx = 0
         categorized_sample_indices = {t: [] for t in SamplingType}
         categorized_sample_indices_start_idx = 0
+        pin_memory = not self.in_wsl and not self.device_config.is_neuron
 
         max_subquery_len = max(subquery_lens) if subquery_lens else 1
         for i, seq_group_metadata in enumerate(seq_group_metadata_list):
@@ -443,12 +451,12 @@ def _prepare_sample(
         selected_token_indices = _async_h2d(selected_token_indices,
                                             dtype=torch.long,
                                             target_device=self.device,
-                                            pin_memory=not self.in_wsl)
+                                            pin_memory=pin_memory)
         categorized_sample_indices = {
             t: _async_h2d(seq_ids,
                           dtype=torch.int,
                           target_device=self.device,
-                          pin_memory=not self.in_wsl)
+                          pin_memory=pin_memory)
             for t, seq_ids in categorized_sample_indices.items()
         }
 
diff --git a/vllm/worker/neuron_worker.py b/vllm/worker/neuron_worker.py
new file mode 100644
index 0000000000000..3229a21c11a38
--- /dev/null
+++ b/vllm/worker/neuron_worker.py
@@ -0,0 +1,191 @@
+"""A Neuron worker class."""
+from typing import Dict, List, Optional, Tuple
+
+import torch
+import torch.distributed
+
+from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
+                         ParallelConfig, SchedulerConfig, LoRAConfig)
+from vllm.model_executor import set_random_seed
+from vllm.model_executor.parallel_utils.communication_op import (
+    broadcast_tensor_dict)
+from vllm.model_executor.parallel_utils.parallel_state import (
+    ensure_model_parallel_initialized)
+from vllm.sequence import SamplerOutput, SequenceGroupMetadata
+from vllm.worker.cache_engine import CacheEngine
+from vllm.worker.model_runner import ModelRunner
+
+
+class Worker:
+    """A worker class that executes the model on a group of neuron cores.
+    """
+
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        parallel_config: ParallelConfig,
+        scheduler_config: SchedulerConfig,
+        device_config: DeviceConfig,
+        local_rank: int,
+        rank: int,
+        distributed_init_method: str,
+        lora_config: Optional[LoRAConfig] = None,
+        kv_cache_dtype: Optional[str] = "auto",
+        is_driver_worker: bool = False,
+    ) -> None:
+        self.model_config = model_config
+        self.parallel_config = parallel_config
+        self.scheduler_config = scheduler_config
+        self.device_config = device_config
+        self.local_rank = local_rank
+        self.rank = rank
+        self.distributed_init_method = distributed_init_method
+        self.lora_config = lora_config
+        self.is_driver_worker = is_driver_worker
+        if self.is_driver_worker:
+            assert self.rank == 0, "The driver worker must have rank 0."
+
+        self.model_runner = ModelRunner(model_config,
+                                        parallel_config,
+                                        scheduler_config,
+                                        device_config,
+                                        lora_config=self.lora_config,
+                                        is_driver_worker=is_driver_worker)
+        # Uninitialized cache engine. Will be initialized by
+        # self.init_cache_engine().
+        self.cache_config = None
+        self.cache_engine = None
+        self.cache_events = None
+        self.gpu_cache = None
+
+    def init_model(self) -> None:
+        # Initialize the distributed environment.
+        _init_distributed_environment(self.parallel_config,
+                                      self.rank,
+                                      self.distributed_init_method,
+                                      distributed_backend="gloo")
+
+        # Initialize the model.
+        set_random_seed(self.model_config.seed)
+
+    def load_model(self):
+        self.model_runner.load_model()
+
+    @torch.inference_mode()
+    def profile_num_available_blocks(
+        self,
+        block_size: int = 128,
+        gpu_memory_utilization: float = 0.9,
+        cpu_swap_space: int = 0,
+        cache_dtype: str = "float16",
+    ) -> Tuple[int, int]:
+        """Simply returns max_num_seqs as num_gpu_blocks, 0 as num_cpu_blocks."""
+        num_gpu_blocks = self.scheduler_config.max_num_seqs
+        num_cpu_blocks = 0
+        return num_gpu_blocks, num_cpu_blocks
+
+    def init_cache_engine(self, cache_config: CacheConfig) -> None:
+        self.cache_config = cache_config
+        self.cache_engine = CacheEngine(self.cache_config, self.model_config,
+                                        self.parallel_config)
+        self.model_runner.set_block_size(self.cache_engine.block_size)
+
+    def warm_up_model(self) -> None:
+        # Warm up is maintained in transformers-neuronx
+        pass
+
+    def cache_swap(
+        self,
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+    ) -> None:
+        # Issue cache operations.
+        issued_cache_op = False
+        if blocks_to_swap_in:
+            self.cache_engine.swap_in(blocks_to_swap_in)
+            issued_cache_op = True
+        if blocks_to_swap_out:
+            self.cache_engine.swap_out(blocks_to_swap_out)
+            issued_cache_op = True
+        if blocks_to_copy:
+            self.cache_engine.copy(blocks_to_copy)
+            issued_cache_op = True
+
+        cache_events = self.cache_events if issued_cache_op else None
+
+        # Wait for cache operations to finish.
+        if cache_events is not None:
+            raise NotImplementedError(
+                "cache operations are not implemented for neuron backend.")
+
+    @torch.inference_mode()
+    def execute_model(
+        self,
+        seq_group_metadata_list: Optional[List[SequenceGroupMetadata]] = None,
+        blocks_to_swap_in: Optional[Dict[int, int]] = None,
+        blocks_to_swap_out: Optional[Dict[int, int]] = None,
+        blocks_to_copy: Optional[Dict[int, List[int]]] = None,
+    ) -> Optional[SamplerOutput]:
+        if self.is_driver_worker:
+            assert seq_group_metadata_list is not None
+            num_seq_groups = len(seq_group_metadata_list)
+            assert blocks_to_swap_in is not None
+            assert blocks_to_swap_out is not None
+            assert blocks_to_copy is not None
+            data = {
+                "num_seq_groups": num_seq_groups,
+                "blocks_to_swap_in": blocks_to_swap_in,
+                "blocks_to_swap_out": blocks_to_swap_out,
+                "blocks_to_copy": blocks_to_copy,
+            }
+            broadcast_tensor_dict(data, src=0)
+        else:
+            data = broadcast_tensor_dict(src=0)
+            num_seq_groups = data["num_seq_groups"]
+            blocks_to_swap_in = data["blocks_to_swap_in"]
+            blocks_to_swap_out = data["blocks_to_swap_out"]
+            blocks_to_copy = data["blocks_to_copy"]
+
+        self.cache_swap(blocks_to_swap_in, blocks_to_swap_out, blocks_to_copy)
+
+        # If there is no input, we don't need to execute the model.
+        if num_seq_groups == 0:
+            return {}
+
+        output = self.model_runner.execute_model(seq_group_metadata_list,
+                                                 self.gpu_cache)
+        return output
+
+
+def _init_distributed_environment(
+    parallel_config: ParallelConfig,
+    rank: int,
+    distributed_init_method: Optional[str] = None,
+    distributed_backend: Optional[str] = None,
+) -> None:
+    """Initialize the distributed environment."""
+    if torch.distributed.is_initialized():
+        torch_world_size = torch.distributed.get_world_size()
+        if torch_world_size != parallel_config.world_size:
+            raise RuntimeError(
+                "torch.distributed is already initialized but the torch world "
+                "size does not match parallel_config.world_size "
+                f"({torch_world_size} vs. {parallel_config.world_size}).")
+    elif not distributed_init_method:
+        raise ValueError(
+            "distributed_init_method must be set if torch.distributed "
+            "is not already initialized")
+    else:
+        distributed_backend = distributed_backend if distributed_backend else "nccl"
+        torch.distributed.init_process_group(
+            backend=distributed_backend,
+            world_size=parallel_config.world_size,
+            rank=rank,
+            init_method=distributed_init_method,
+        )
+
+    # A small all_reduce for warmup.
+    torch.distributed.all_reduce(torch.zeros(1))
+    ensure_model_parallel_initialized(parallel_config.tensor_parallel_size,
+                                      parallel_config.pipeline_parallel_size)

From 929b4f2973ec6a53ea4f0f03d21147ef8b8278be Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Wed, 28 Feb 2024 13:03:28 -0800
Subject: [PATCH 028/196] Add LoRA support for Gemma (#3050)

---
 .buildkite/test-pipeline.yaml       |  2 +-
 csrc/punica/bgmv/bgmv_config.h      |  2 ++
 tests/lora/conftest.py              |  5 ++++
 tests/lora/test_gemma.py            | 46 +++++++++++++++++++++++++++++
 tests/lora/test_punica.py           |  4 +--
 vllm/model_executor/models/gemma.py | 28 ++++++++++++++++--
 vllm/model_executor/models/llama.py |  2 +-
 7 files changed, 82 insertions(+), 7 deletions(-)
 create mode 100644 tests/lora/test_gemma.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index efcc4d2d07a12..c65ab04b8ddda 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -50,7 +50,7 @@ steps:
   command: pytest -v -s worker
 
 - label: LoRA Test
-  command: pytest -v -s lora
+  command: pytest -v -s lora --forked
 
 - label: Metrics Test
   command: pytest -v -s metrics
diff --git a/csrc/punica/bgmv/bgmv_config.h b/csrc/punica/bgmv/bgmv_config.h
index ebf638f104c3f..d5fee9c40d00c 100644
--- a/csrc/punica/bgmv/bgmv_config.h
+++ b/csrc/punica/bgmv/bgmv_config.h
@@ -28,6 +28,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
     f(in_T, out_T, W_T, narrow, 5120) \
     f(in_T, out_T, W_T, narrow, 5504) \
     f(in_T, out_T, W_T, narrow, 5632) \
+    f(in_T, out_T, W_T, narrow, 6144) \
     f(in_T, out_T, W_T, narrow, 6912) \
     f(in_T, out_T, W_T, narrow, 7168) \
     f(in_T, out_T, W_T, narrow, 8192) \
@@ -39,6 +40,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
     f(in_T, out_T, W_T, narrow, 14336) \
     f(in_T, out_T, W_T, narrow, 16384) \
     f(in_T, out_T, W_T, narrow, 20480) \
+    f(in_T, out_T, W_T, narrow, 24576) \
     f(in_T, out_T, W_T, narrow, 28672) \
     f(in_T, out_T, W_T, narrow, 32000) \
     f(in_T, out_T, W_T, narrow, 32256) \
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
index 75f4e41290c36..67273144ecd02 100644
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -126,6 +126,11 @@ def mixtral_lora_files():
     return snapshot_download(repo_id="terrysun/mixtral-lora-adapter")
 
 
+@pytest.fixture(scope="session")
+def gemma_lora_files():
+    return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")
+
+
 @pytest.fixture
 def llama_2_7b_engine_extra_embeddings() -> nn.Module:
     cleanup()
diff --git a/tests/lora/test_gemma.py b/tests/lora/test_gemma.py
new file mode 100644
index 0000000000000..0082c6e74e888
--- /dev/null
+++ b/tests/lora/test_gemma.py
@@ -0,0 +1,46 @@
+import vllm
+from vllm.lora.request import LoRARequest
+
+MODEL_PATH = "google/gemma-7b"
+
+
+def do_sample(llm, lora_path: str, lora_id: int) -> str:
+    prompts = [
+        "Quote: Imagination is",
+        "Quote: Be yourself;",
+        "Quote: So many books,",
+    ]
+    sampling_params = vllm.SamplingParams(temperature=0, max_tokens=32)
+    outputs = llm.generate(
+        prompts,
+        sampling_params,
+        lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
+        if lora_id else None)
+    # Print the outputs.
+    generated_texts = []
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text.strip()
+        generated_texts.append(generated_text)
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    return generated_texts
+
+
+def test_gemma_lora(gemma_lora_files):
+    llm = vllm.LLM(MODEL_PATH,
+                   max_model_len=1024,
+                   enable_lora=True,
+                   max_loras=4)
+
+    expected_lora_output = [
+        "more important than knowledge.\nAuthor: Albert Einstein\n",
+        "everyone else is already taken.\nAuthor: Oscar Wilde\n",
+        "so little time\nAuthor: Frank Zappa\n",
+    ]
+
+    output1 = do_sample(llm, gemma_lora_files, lora_id=1)
+    for i in range(len(expected_lora_output)):
+        assert output1[i].startswith(expected_lora_output[i])
+    output2 = do_sample(llm, gemma_lora_files, lora_id=2)
+    for i in range(len(expected_lora_output)):
+        assert output2[i].startswith(expected_lora_output[i])
diff --git a/tests/lora/test_punica.py b/tests/lora/test_punica.py
index 903814faa5dc7..cbe0f6fa2e851 100644
--- a/tests/lora/test_punica.py
+++ b/tests/lora/test_punica.py
@@ -44,8 +44,8 @@ def _lora_ref_impl(
 
 H1 = H2 = [
     128, 256, 512, 1024, 1280, 2048, 2560, 2752, 3072, 3456, 3584, 4096, 5120,
-    5504, 5632, 6912, 7168, 8192, 9216, 10240, 11008, 13824, 14336, 32000,
-    32256, 32512, 32768, 33024
+    5504, 5632, 6144, 6912, 7168, 8192, 9216, 10240, 11008, 13824, 14336,
+    24576, 32000, 32256, 32512, 32768, 33024
 ]
 SEED = [0xabcdabcd987]
 
diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
index d8b515993d8ff..03948132d32c3 100644
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -20,6 +20,7 @@
 from torch import nn
 from transformers import GemmaConfig
 
+from vllm.config import LoRAConfig
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import GeluAndMul
 from vllm.model_executor.layers.attention import PagedAttention
@@ -246,12 +247,36 @@ def forward(
 
 
 class GemmaForCausalLM(nn.Module):
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
+    # LoRA specific attributes
+    supported_lora_modules = [
+        "qkv_proj",
+        "o_proj",
+        "gate_up_proj",
+        "down_proj",
+    ]
+    # Gemma does not apply LoRA to the embedding layer.
+    embedding_modules = {}
+    embedding_padding_modules = []
 
     def __init__(
         self,
         config: GemmaConfig,
         linear_method: Optional[LinearMethodBase] = None,
+        lora_config: Optional[LoRAConfig] = None,
     ) -> None:
+        del lora_config  # Unused.
         super().__init__()
         self.config = config
         self.linear_method = linear_method
@@ -305,9 +330,6 @@ def load_weights(self,
                 weight_loader(param, loaded_weight, shard_id)
                 break
             else:
-                # Skip loading extra layer for lora models.
-                if "lm_head" in name:
-                    continue
                 # GemmaRMSNorm is different from Llama's in that it multiplies
                 # (1 + weight) to the output, instead of just weight.
                 if "norm.weight" in name:
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
index b7f6b8f3ec374..d35887cc0f6a3 100644
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -27,6 +27,7 @@
 from torch import nn
 from transformers import LlamaConfig
 
+from vllm.config import LoRAConfig
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.attention import PagedAttention
@@ -45,7 +46,6 @@
 from vllm.model_executor.weight_utils import (default_weight_loader,
                                               hf_model_weights_iterator)
 from vllm.sequence import SamplerOutput
-from vllm.config import LoRAConfig
 
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 

From 01a5d18a537b65a156cfa1a77706693a24c869c1 Mon Sep 17 00:00:00 2001
From: CHU Tianxiang <tianxiang.ctx@alibaba-inc.com>
Date: Thu, 29 Feb 2024 13:52:23 +0800
Subject: [PATCH 029/196] Add Support for 2/3/8-bit GPTQ Quantization Models
 (#2330)

---
 csrc/ops.h                                    |    6 +-
 csrc/quantization/gptq/matrix_view.cuh        |  123 ++
 csrc/quantization/gptq/q_gemm.cu              | 1452 +++++++++++++++--
 csrc/quantization/gptq/qdq_2.cuh              |   87 +
 csrc/quantization/gptq/qdq_3.cuh              |  141 ++
 csrc/quantization/gptq/qdq_4.cuh              |  100 +-
 csrc/quantization/gptq/qdq_8.cuh              |   40 +
 .../layers/quantization/gptq.py               |   16 +-
 8 files changed, 1736 insertions(+), 229 deletions(-)
 create mode 100644 csrc/quantization/gptq/qdq_2.cuh
 create mode 100644 csrc/quantization/gptq/qdq_3.cuh
 create mode 100644 csrc/quantization/gptq/qdq_8.cuh

diff --git a/csrc/ops.h b/csrc/ops.h
index dbdd2c2c57945..08dfb0e8604f1 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -98,11 +98,13 @@ torch::Tensor gptq_gemm(
   torch::Tensor b_gptq_qzeros,
   torch::Tensor b_gptq_scales,
   torch::Tensor b_g_idx,
-  bool use_exllama);
+  bool use_exllama,
+  int bit);
 
 void gptq_shuffle(
   torch::Tensor q_weight,
-  torch::Tensor q_perm);
+  torch::Tensor q_perm,
+  int bit);
 
 void moe_align_block_size(
   torch::Tensor topk_ids,
diff --git a/csrc/quantization/gptq/matrix_view.cuh b/csrc/quantization/gptq/matrix_view.cuh
index 1fdf019b29028..eda3436eb5375 100644
--- a/csrc/quantization/gptq/matrix_view.cuh
+++ b/csrc/quantization/gptq/matrix_view.cuh
@@ -146,6 +146,129 @@ public:
     __device__ __forceinline__ const uint32_t* item_uint32_ptr(int row, int column) { return &data[row / 8 * width + column]; }
 };
 
+class MatrixView_q2_row
+{
+public:
+    const uint32_t* data;
+    const int height;
+    const int width;
+
+    __device__ __forceinline__ MatrixView_q2_row(const uint32_t* data, const int height, const int width)
+        : data(data), height(height), width(width)
+    { }
+
+    __device__ __forceinline__ int item(int row, int column) const
+    {
+        int shift = (column & 0x0f) * 2;
+        return (data[row * width / 16 + column / 16] >> shift) & 0x03;
+    }
+
+    __device__ __forceinline__ void item2(int (&items)[2], int row, int column) const
+    {
+        int shift = (column & 0x0f) * 2;
+        uint32_t d = data[row * width / 16 + column / 16] >> shift;
+        items[0] = d & 0x03;
+        items[1] = (d >> 2) & 0x03;
+    }
+
+    __device__ __forceinline__ void item4(int (&items)[4], int row, int column) const
+    {
+        int shift = (column & 0x0f) * 2;
+        uint32_t d = data[row * width / 16 + column / 16] >> shift;
+        items[0] = d & 0x03;
+        items[1] = (d >> 2) & 0x03;
+        items[2] = (d >> 4) & 0x03;
+        items[3] = (d >> 6) & 0x03;
+    }
+};
+
+class MatrixView_q3_row
+{
+public:
+    const uint32_t* data;
+    const int height;
+    const int width;
+
+    __device__ __forceinline__ MatrixView_q3_row(const uint32_t* data, const int height, const int width)
+        : data(data), height(height), width(width)
+    { }
+
+    __device__ __forceinline__ int item(int row, int column) const
+    {
+        int z_w = column * 3 / 32;
+        int z_mod =  column & 0x1f;
+
+        if (z_mod == 10) {
+            return (data[row * width * 3 / 32 + z_w] >> 30) | ((data[row * width * 3 / 32 + (z_w + 1)] << 2) & 0x4);
+        } else if (z_mod == 21) {
+            return (data[row * width * 3 / 32 + z_w] >> 31) | ((data[row * width * 3 / 32 + (z_w + 1)] << 1) & 0x6);
+        } else if (z_mod < 10) {
+            return (data[row * width * 3 / 32 + z_w] >> (z_mod * 3)) & 0x07;
+        } else if (z_mod < 21) {
+            return (data[row * width * 3 / 32 + z_w] >> (z_mod * 3  - 32)) & 0x07;
+        } else {
+            return (data[row * width * 3 / 32 + z_w] >> (z_mod * 3  - 64)) & 0x07;
+        }
+    }
+
+    __device__ __forceinline__ void item4(int (&items)[4], int row, int column) const
+    {
+        int shift = (column & 0x1f);
+        uint32_t d;
+        if (shift <= 4) {
+            d = data[row * width / 32 * 3 + column * 3 / 32] >> (shift * 3);
+        } else if (shift == 8) {
+            d = (data[row * width / 32 * 3 + column * 3 / 32] >> 24) | ((data[row * width / 32 * 3 + column * 3 / 32 + 1] & 0x0f) << 8);
+        } else if (shift <= 16) {
+            d = data[row * width / 32 * 3 + column * 3 / 32] >> (shift * 3 - 32);
+        } else if (shift == 20) {
+            d = (data[row * width / 32 * 3 + column * 3 / 32] >> 28) | ((data[row * width / 32 * 3 + column * 3 / 32 + 1] & 0xff) << 4);
+        } else {
+            d = data[row * width / 32 * 3 + column * 3 / 32] >> (shift * 3 - 64);
+        }
+        items[0] = d & 0x07;
+        items[1] = (d >> 3) & 0x07;
+        items[2] = (d >> 6) & 0x07;
+        items[3] = (d >> 9) & 0x07;
+    }
+};
+
+class MatrixView_q8_row
+{
+public:
+    const uint32_t* data;
+    const int height;
+    const int width;
+
+    __device__ __forceinline__ MatrixView_q8_row(const uint32_t* data, const int height, const int width)
+        : data(data), height(height), width(width)
+    { }
+
+    __device__ __forceinline__ int item(int row, int column) const
+    {
+        int shift = (column & 0x03) * 8;
+        return (data[row * width / 4 + column / 4] >> shift) & 0xff;
+    }
+
+    __device__ __forceinline__ void item2(int (&items)[2], int row, int column) const
+    {
+        int shift = (column & 0x03) * 8;
+        uint32_t d = data[row * width / 4 + column / 4] >> shift;
+        items[0] = d & 0xff;
+        items[1] = (d >> 8) & 0xff;
+    }
+
+    __device__ __forceinline__ void item4(int (&items)[4], int row, int column) const
+    {
+        int shift = (column & 0x03) * 2;
+        uint32_t d = data[row * width / 4 + column / 4] >> shift;
+        items[0] = d & 0xff;
+        items[1] = (d >> 8) & 0xff;
+        items[2] = (d >> 16) & 0xff;
+        items[3] = (d >> 24) & 0xff;
+    }
+};
+
 }  // namespace gptq
 }  // namespace vllm
 #endif
diff --git a/csrc/quantization/gptq/q_gemm.cu b/csrc/quantization/gptq/q_gemm.cu
index a5d2345f1e7fd..655158e38f557 100644
--- a/csrc/quantization/gptq/q_gemm.cu
+++ b/csrc/quantization/gptq/q_gemm.cu
@@ -13,7 +13,10 @@ Adapted from https://github.com/turboderp/exllamav2 and https://github.com/qwopq
 
 #include "compat.cuh"
 #include "matrix_view.cuh"
+#include "qdq_2.cuh"
+#include "qdq_3.cuh"
 #include "qdq_4.cuh"
+#include "qdq_8.cuh"
 
 namespace vllm {
 namespace gptq {
@@ -22,6 +25,7 @@ namespace gptq {
 #define BLOCK_M_SIZE_MAX 8
 #define MAX_GROUPS_IN_BLOCK (BLOCK_KN_SIZE / 32)
 #define MAX_Q_GEMM_ROWS 50
+#define MAX_Q_GEMM_ROWS_8BIT 24
 #define MAX_ALT_GEMM_ROWS 8
 #define THREADS_X 32
 #define THREADS_Y 32
@@ -75,6 +79,106 @@ __forceinline__ __device__ float dot22_8_f(half2(&dq)[4], const half* a_ptr)
     return __half2float(__low2half(result)) + __half2float(__high2half(result));
 }
 
+__forceinline__ __device__ half2 dot22_8(half2(&dq)[4], const half* a_ptr, const half2 g_result, const half qs_h)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 4; i++) result = __hfma2(dq[i], *a2_ptr++, result);
+    return __hfma2(result, __halves2half2(qs_h, qs_h), g_result);
+}
+
+__forceinline__ __device__ half2 dot22_16(half2(&dq)[8], const half* a_ptr, const half2 g_result, const half qs_h)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 8; i++) result = __hfma2(dq[i], *a2_ptr++, result);
+    return __hfma2(result, __halves2half2(qs_h, qs_h), g_result);
+}
+
+__forceinline__ __device__ half2 dot22_32(half2(&dq)[16], const half* a_ptr, const half2 g_result, const half qs_h)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 16; i += 1) result = __hfma2(dq[i], *a2_ptr++, result);
+    return __hfma2(result, __halves2half2(qs_h, qs_h), g_result);
+}
+
+__forceinline__ __device__ float dot22_8_f(half2(&dq)[4], const half* a_ptr, const float g_result, const float qs_f)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 4; i++) result = __hfma2(dq[i], *a2_ptr++, result);
+    float result_f = __half2float(__low2half(result)) + __half2float(__high2half(result));
+    return fma(result_f, qs_f, g_result);
+}
+
+__forceinline__ __device__ float dot22_16_f(half2(&dq)[8], const half* a_ptr, const float g_result, const float qs_f)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 8; i++) result = __hfma2(dq[i], *a2_ptr++, result);
+    float result_f = __half2float(__low2half(result)) + __half2float(__high2half(result));
+    return fma(result_f, qs_f, g_result);
+}
+
+__forceinline__ __device__ float dot22_32_f(half2(&dq)[16], const half* a_ptr, const float g_result, const float qs_f)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 16; i += 1) result = __hfma2(dq[i], *a2_ptr++, result);
+    float result_f = __half2float(__low2half(result)) + __half2float(__high2half(result));
+    return fma(result_f, qs_f, g_result);
+}
+
+__forceinline__ __device__ half dot22_8_h(half2(&dq)[4], const half* a_ptr, const half g_result, const half qs_h)
+{
+    // Use FP32 accumulator to avoid potential overflow since unscaled weights are in the range -128..127
+
+    float result = {};
+    #pragma unroll
+    for (int i = 0; i < 4; i++)
+    {
+        half2 w01 = dq[i];
+        float w0 = __low2float(w01);
+        float w1 = __high2float(w01);
+        float x0 = __half2float(*a_ptr++);
+        float x1 = __half2float(*a_ptr++);
+        result = fma(w0, x0, result);
+        result = fma(w1, x1, result);
+    }
+    float qs = __half2float(qs_h);
+    result *= qs;
+    half result_h = __float2half_rn(result);
+    return __hadd(result_h, g_result);
+}
+
+__forceinline__ __device__ half dot22_16_h(half2(&dq)[8], const half* a_ptr, const half g_result, const half qs_h)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 8; i++) result = __hfma2(dq[i], *a2_ptr++, result);
+    half result_h = __hadd(__low2half(result), __high2half(result));
+    return __hfma(result_h, qs_h, g_result);
+}
+
+__forceinline__ __device__ half dot22_32_h(half2(&dq)[16], const half* a_ptr, const half g_result, const half qs_h)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 16; i += 1) result = __hfma2(dq[i], *a2_ptr++, result);
+    half result_h = __hadd(__low2half(result), __high2half(result));
+    return __hfma(result_h, qs_h, g_result);
+}
+
+
 typedef void (*fp_gemm_half_q_half_gptq_kernel)
 (
     const half*,
@@ -89,8 +193,9 @@ typedef void (*fp_gemm_half_q_half_gptq_kernel)
     const int*
 );
 
+
 template <bool first_block, int m_count>
-__global__ void gemm_half_q_half_gptq_kernel
+__global__ void gemm_half_q_half_gptq_4bit_kernel
 (
     const half* __restrict__ a,
     const uint32_t* __restrict__ b_q_weight,
@@ -231,80 +336,794 @@ __global__ void gemm_half_q_half_gptq_kernel
     }
 }
 
-
-fp_gemm_half_q_half_gptq_kernel pick_gemm_half_q_half_gptq_kernel(bool first_block, const int m_count)
+template <bool first_block, int m_count>
+__global__ void gemm_half_q_half_gptq_2bit_kernel
+(
+    const half* __restrict__ a,
+    const uint32_t* __restrict__ b_q_weight,
+    const uint32_t* __restrict__ b_gptq_qzeros,
+    const half* __restrict__ b_gptq_scales,
+    half* __restrict__ c,
+    const int size_m,
+    const int size_n,
+    const int size_k,
+    const int groups,
+    const int* __restrict__ b_q_perm
+)
 {
-    #if BLOCK_M_SIZE_MAX >= 1
-    if (m_count == 1) return gemm_half_q_half_gptq_kernel<true, 1>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 2
-    if (m_count == 2) return gemm_half_q_half_gptq_kernel<true, 2>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 3
-    if (m_count == 3) return gemm_half_q_half_gptq_kernel<true, 3>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 4
-    if (m_count == 4) return gemm_half_q_half_gptq_kernel<true, 4>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 5
-    if (m_count == 5) return gemm_half_q_half_gptq_kernel<true, 5>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 6
-    if (m_count == 6) return gemm_half_q_half_gptq_kernel<true, 6>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 7
-    if (m_count == 7) return gemm_half_q_half_gptq_kernel<true, 7>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 8
-    if (m_count == 8) return gemm_half_q_half_gptq_kernel<true, 8>;
-    #endif
-    return NULL;
-}
+    MatrixView_half a_(a, size_m, size_k);
+    MatrixView_half_rw c_(c, size_m, size_n);
+    MatrixView_q2_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
+    MatrixView_half b_gptq_scales_(b_gptq_scales, groups, size_n);
 
+    int t = threadIdx.x;
 
-void gemm_half_q_half_cuda_part
+    // Block
+    int offset_n = blockIdx.x * BLOCK_KN_SIZE * 4;
+    int offset_m = blockIdx.y * m_count;
+    int offset_k = blockIdx.z * BLOCK_KN_SIZE;
+
+    int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
+    int end_m = min(offset_m + m_count, size_m);
+    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+
+    int n = offset_n + t * 4;
+
+    // Preload block_a
+    __shared__ half block_a[m_count][BLOCK_KN_SIZE];
+
+    if (offset_k + t < end_k)
+    {
+        for (int m = 0; m < m_count; ++m)
+        {
+            const half* a_ptr = a_.item_ptr(offset_m + m, 0);
+            half* block_a_ptr = block_a[m];
+
+            half a0;
+            if (b_q_perm) a0 = a_ptr[b_q_perm[offset_k + t]];
+            else a0 = a_ptr[offset_k + t];
+            block_a_ptr[t] = a0;
+        }
+    }
+
+    // Zero output
+    if (n >= size_n) return;
+
+    if (blockIdx.z == 0)
+    {
+        for (int m = 0; m < m_count; m++)
+            *((uint64_t*)c_.item_ptr(offset_m + m, n)) = 0;
+    }
+
+    __syncthreads();
+
+    // Find initial group
+    int groupsize = size_k / groups;
+    int group = offset_k / groupsize;
+    int nextgroup = offset_k + groupsize;
+
+    // a, b offset
+    int qk = offset_k / (32 / 2);
+
+    const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
+    const half* a_ptr = &block_a[0][0];
+    int a_stride = BLOCK_KN_SIZE;
+
+    // Initial group
+    int zeros[4];
+    half scales[4];
+    b_gptq_qzeros_.item4(zeros, group, n);
+    b_gptq_scales_.item4(scales, group, n);
+    // Column result
+    half block_c[m_count][4] = {};
+
+    // Dequantize and multiply
+    int k = offset_k;
+    while (k < end_k)
+    {
+        if (k == nextgroup)
+        {
+            group++;
+            nextgroup += groupsize;
+            b_gptq_qzeros_.item4(zeros, group, n);
+            b_gptq_scales_.item4(scales, group, n);
+        }
+
+        #pragma unroll
+        for (int j = 0; j < 1; j++)
+        {
+            const int4* b_ptr4 = (int4*) b_ptr;
+            int4 load_int4 = *b_ptr4;
+
+            half2 dq[4][8];
+            dequant_2bit_16(load_int4.x, dq[0], size_n, zeros[0] + 1);
+            dequant_2bit_16(load_int4.y, dq[1], size_n, zeros[1] + 1);
+            dequant_2bit_16(load_int4.z, dq[2], size_n, zeros[2] + 1);
+            dequant_2bit_16(load_int4.w, dq[3], size_n, zeros[3] + 1);
+
+            #pragma unroll
+            for (int m = 0; m < m_count; m++)
+            {
+                block_c[m][0] = dot22_16_h(dq[0], a_ptr + m * a_stride, block_c[m][0], scales[0]);
+                block_c[m][1] = dot22_16_h(dq[1], a_ptr + m * a_stride, block_c[m][1], scales[1]);
+                block_c[m][2] = dot22_16_h(dq[2], a_ptr + m * a_stride, block_c[m][2], scales[2]);
+                block_c[m][3] = dot22_16_h(dq[3], a_ptr + m * a_stride, block_c[m][3], scales[3]);
+            }
+
+            b_ptr += size_n;
+            a_ptr += 16;
+        }
+
+        k += 16;
+    }
+
+    for (int m = 0; m < m_count; m++)
+    {
+        half2 *out = (half2*) c_.item_ptr(offset_m + m, n);
+        half2 result01 = __halves2half2(block_c[m][0], block_c[m][1]);
+        half2 result23 = __halves2half2(block_c[m][2], block_c[m][3]);
+        atomicAdd(out    , result01);
+        atomicAdd(out + 1, result23);
+    }
+}
+
+template <bool first_block, int m_count>
+__global__ void gemm_half_q_half_gptq_3bit_kernel
 (
-    const half* a,
-    const uint32_t* b_q_weight,
-    const uint32_t* b_gptq_qzeros,
-    const half* b_gptq_scales,
-    const int* b_q_perm,
-    half* c,
-    int size_m,
-    int size_n,
-    int size_k,
-    int m_count,
-    int groups
+    const half* __restrict__ a,
+    const uint32_t* __restrict__ b_q_weight,
+    const uint32_t* __restrict__ b_gptq_qzeros,
+    const half* __restrict__ b_gptq_scales,
+    half* __restrict__ c,
+    const int size_m,
+    const int size_n,
+    const int size_k,
+    const int groups,
+    const int* __restrict__ b_q_perm
 )
 {
-    dim3 blockDim, gridDim;
-    blockDim.x = BLOCK_KN_SIZE;
-    blockDim.y = 1;
-    blockDim.z = 1;
-    gridDim.x = DIVIDE(size_n, BLOCK_KN_SIZE * 4);
-    gridDim.y = DIVIDE(size_m, m_count);
-    gridDim.z = DIVIDE(size_k, BLOCK_KN_SIZE);
+    MatrixView_half a_(a, size_m, size_k);
+    MatrixView_half_rw c_(c, size_m, size_n);
+    MatrixView_q3_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
+    MatrixView_half b_gptq_scales_(b_gptq_scales, groups, size_n);
 
-    fp_gemm_half_q_half_gptq_kernel kernel = pick_gemm_half_q_half_gptq_kernel(true, m_count);
+    int t = threadIdx.x;
 
-    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-    kernel<<<gridDim, blockDim, 0, stream>>>
-    (
-        a,
-        b_q_weight,
-        b_gptq_qzeros,
-        b_gptq_scales,
-        c,
-        size_m,
-        size_n,
-        size_k,
-        groups,
-        b_q_perm
-    );
-}
+    // Block
+    int offset_n = blockIdx.x * BLOCK_KN_SIZE * 4;
+    int offset_m = blockIdx.y * m_count;
+    int offset_k = blockIdx.z * BLOCK_KN_SIZE;
+
+    int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
+    int end_m = min(offset_m + m_count, size_m);
+    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+
+    int n = offset_n + t * 4;
+
+    // Preload block_a
+    __shared__ half block_a[m_count][BLOCK_KN_SIZE];
+
+    if (offset_k + t < end_k)
+    {
+        for (int m = 0; m < m_count; ++m)
+        {
+            const half* a_ptr = a_.item_ptr(offset_m + m, 0);
+            half* block_a_ptr = block_a[m];
 
+            half a0;
+            if (b_q_perm) a0 = a_ptr[b_q_perm[offset_k + t]];
+            else a0 = a_ptr[offset_k + t];
+            block_a_ptr[t] = a0;
+        }
+    }
+
+    // Zero output
+    if (n >= size_n) return;
+
+    if (blockIdx.z == 0)
+    {
+        for (int m = 0; m < m_count; m++)
+            *((uint64_t*)c_.item_ptr(offset_m + m, n)) = 0;
+    }
+
+    __syncthreads();
+
+    // Find initial group
+    int groupsize = size_k / groups;
+    int group = offset_k / groupsize;
+    int nextgroup = offset_k + groupsize;
+
+    // a, b offset
+    int qk = offset_k / 32 * 3;
+
+    const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
+    const half* a_ptr = &block_a[0][0];
+    int a_stride = BLOCK_KN_SIZE;
+
+    // Initial group
+    int zeros[4];
+    half scales[4];
+    b_gptq_qzeros_.item4(zeros, group, n);
+    b_gptq_scales_.item4(scales, group, n);
+    // Column result
+    half block_c[m_count][4] = {};
+
+    // Dequantize and multiply
+    int k = offset_k;
+    while (k < end_k)
+    {
+        if (k == nextgroup)
+        {
+            group++;
+            nextgroup += groupsize;
+            b_gptq_qzeros_.item4(zeros, group, n);
+            b_gptq_scales_.item4(scales, group, n);
+        }
+
+        #pragma unroll
+        for (int j = 0; j < 1; j++)
+        {
+            int4 load_int4[3];
+            load_int4[0] = *((int4*) b_ptr); b_ptr += size_n;
+            load_int4[1] = *((int4*) b_ptr); b_ptr += size_n;
+            load_int4[2] = *((int4*) b_ptr); b_ptr += size_n;
+
+            half2 dq[4][16];
+            dequant_3bit_32(load_int4[0].x, load_int4[1].x, load_int4[2].x, dq[0], size_n, zeros[0] + 1);
+            dequant_3bit_32(load_int4[0].y, load_int4[1].y, load_int4[2].y, dq[1], size_n, zeros[1] + 1);
+            dequant_3bit_32(load_int4[0].z, load_int4[1].z, load_int4[2].z, dq[2], size_n, zeros[2] + 1);
+            dequant_3bit_32(load_int4[0].w, load_int4[1].w, load_int4[2].w, dq[3], size_n, zeros[3] + 1);
+
+            #pragma unroll
+            for (int m = 0; m < m_count; m++)
+            {
+                block_c[m][0] = dot22_32_h(dq[0], a_ptr + m * a_stride, block_c[m][0], scales[0]);
+                block_c[m][1] = dot22_32_h(dq[1], a_ptr + m * a_stride, block_c[m][1], scales[1]);
+                block_c[m][2] = dot22_32_h(dq[2], a_ptr + m * a_stride, block_c[m][2], scales[2]);
+                block_c[m][3] = dot22_32_h(dq[3], a_ptr + m * a_stride, block_c[m][3], scales[3]);
+            }
+            a_ptr += 32;
+        }
+
+        k += 32;
+    }
+
+    for (int m = 0; m < m_count; m++)
+    {
+        half2 *out = (half2*) c_.item_ptr(offset_m + m, n);
+        half2 result01 = __halves2half2(block_c[m][0], block_c[m][1]);
+        half2 result23 = __halves2half2(block_c[m][2], block_c[m][3]);
+        atomicAdd(out    , result01);
+        atomicAdd(out + 1, result23);
+    }
+}
+
+template <bool first_block, int m_count>
+__global__ void gemm_half_q_half_gptq_8bit_kernel
+(
+    const half* __restrict__ a,
+    const uint32_t* __restrict__ b_q_weight,
+    const uint32_t* __restrict__ b_gptq_qzeros,
+    const half* __restrict__ b_gptq_scales,
+    half* __restrict__ c,
+    const int size_m,
+    const int size_n,
+    const int size_k,
+    const int groups,
+    const int* __restrict__ b_q_perm
+)
+{
+    MatrixView_half a_(a, size_m, size_k);
+    MatrixView_half_rw c_(c, size_m, size_n);
+    MatrixView_q8_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
+    MatrixView_half b_gptq_scales_(b_gptq_scales, groups, size_n);
+
+    int t = threadIdx.x;
+
+    // Block
+    int offset_n = blockIdx.x * BLOCK_KN_SIZE * 4;
+    int offset_m = blockIdx.y * m_count;
+    int offset_k = blockIdx.z * BLOCK_KN_SIZE;
+
+    int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
+    int end_m = min(offset_m + m_count, size_m);
+    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+
+    int n = offset_n + t * 4;
+
+    // Preload block_a
+    __shared__ half block_a[m_count][BLOCK_KN_SIZE];
+
+    if (offset_k + t < end_k)
+    {
+        for (int m = 0; m < m_count; ++m)
+        {
+            const half* a_ptr = a_.item_ptr(offset_m + m, 0);
+            half* block_a_ptr = block_a[m];
+
+            half a0;
+            if (b_q_perm) a0 = a_ptr[b_q_perm[offset_k + t]];
+            else a0 = a_ptr[offset_k + t];
+            block_a_ptr[t] = a0;
+        }
+    }
+
+    // Zero output
+    if (n >= size_n) return;
+
+    if (blockIdx.z == 0)
+    {
+        for (int m = 0; m < m_count; m++)
+            *((uint64_t*)c_.item_ptr(offset_m + m, n)) = 0;
+    }
+
+    __syncthreads();
+
+    // Find initial group
+    int groupsize = size_k / groups;
+    int group = offset_k / groupsize;
+    int nextgroup = offset_k + groupsize;
+
+    // a, b offset
+    int qk = offset_k / (32 / 8);
+
+    const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
+    const half* a_ptr = &block_a[0][0];
+    int a_stride = BLOCK_KN_SIZE;
+
+    // Initial group
+    int zeros[4];
+    half scales[4];
+    b_gptq_qzeros_.item4(zeros, group, n);
+    b_gptq_scales_.item4(scales, group, n);
+    // Column result
+    half block_c[m_count][4] = {};
+
+    // Dequantize and multiply
+    int k = offset_k;
+    while (k < end_k)
+    {
+        if (k == nextgroup)
+        {
+            group++;
+            nextgroup += groupsize;
+            b_gptq_qzeros_.item4(zeros, group, n);
+            b_gptq_scales_.item4(scales, group, n);
+        }
+
+        #pragma unroll
+        for (int j = 0; j < 4; j++)
+        {
+            int4 load_int4[2];
+            load_int4[0] = *((int4*) b_ptr); b_ptr += size_n;
+            load_int4[1] = *((int4*) b_ptr); b_ptr += size_n;
+
+            half2 dq[4][4];
+            dequant_8bit_8(load_int4[0].x, load_int4[1].x, dq[0], size_n, zeros[0] + 1);
+            dequant_8bit_8(load_int4[0].y, load_int4[1].y, dq[1], size_n, zeros[1] + 1);
+            dequant_8bit_8(load_int4[0].z, load_int4[1].z, dq[2], size_n, zeros[2] + 1);
+            dequant_8bit_8(load_int4[0].w, load_int4[1].w, dq[3], size_n, zeros[3] + 1);
+
+            for (int m = 0; m < m_count; m++)
+            {
+                block_c[m][0] = dot22_8_h(dq[0], a_ptr + m * a_stride, block_c[m][0], scales[0]);
+                block_c[m][1] = dot22_8_h(dq[1], a_ptr + m * a_stride, block_c[m][1], scales[1]);
+                block_c[m][2] = dot22_8_h(dq[2], a_ptr + m * a_stride, block_c[m][2], scales[2]);
+                block_c[m][3] = dot22_8_h(dq[3], a_ptr + m * a_stride, block_c[m][3], scales[3]);
+            }
+            a_ptr += 8;
+        }
+        k += 32;
+    }
+
+    for (int m = 0; m < m_count; m++)
+    {
+        half2 *out = (half2*) c_.item_ptr(offset_m + m, n);
+        half2 result01 = __halves2half2(block_c[m][0], block_c[m][1]);
+        half2 result23 = __halves2half2(block_c[m][2], block_c[m][3]);
+        atomicAdd(out    , result01);
+        atomicAdd(out + 1, result23);
+    }
+}
+
+fp_gemm_half_q_half_gptq_kernel pick_gemm_half_q_half_gptq_kernel(
+    bool first_block, const int m_count, const int bit)
+{
+    #define SELECT_KERNEL(M_COUNT)                                            \
+    if (m_count == M_COUNT) {                                                 \
+      if (bit == 2) return gemm_half_q_half_gptq_2bit_kernel<true, M_COUNT>;  \
+      if (bit == 3) return gemm_half_q_half_gptq_3bit_kernel<true, M_COUNT>;  \
+      if (bit == 4) return gemm_half_q_half_gptq_4bit_kernel<true, M_COUNT>;  \
+      if (bit == 8) return gemm_half_q_half_gptq_8bit_kernel<true, M_COUNT>;  \
+    }
+    #if BLOCK_M_SIZE_MAX >= 1
+    SELECT_KERNEL(1);
+    #endif
+    #if BLOCK_M_SIZE_MAX >= 2
+    SELECT_KERNEL(2);
+    #endif
+    #if BLOCK_M_SIZE_MAX >= 3
+    SELECT_KERNEL(3);
+    #endif
+    #if BLOCK_M_SIZE_MAX >= 4
+    SELECT_KERNEL(4);
+    #endif
+    #if BLOCK_M_SIZE_MAX >= 5
+    SELECT_KERNEL(5);
+    #endif
+    #if BLOCK_M_SIZE_MAX >= 6
+    SELECT_KERNEL(6);
+    #endif
+    #if BLOCK_M_SIZE_MAX >= 7
+    SELECT_KERNEL(7);
+    #endif
+    #if BLOCK_M_SIZE_MAX >= 8
+    SELECT_KERNEL(8);
+    #endif
+    return NULL;
+}
+
+
+void gemm_half_q_half_cuda_part
+(
+    const half* a,
+    const uint32_t* b_q_weight,
+    const uint32_t* b_gptq_qzeros,
+    const half* b_gptq_scales,
+    const int* b_q_perm,
+    half* c,
+    int size_m,
+    int size_n,
+    int size_k,
+    int m_count,
+    int groups,
+    int bit
+)
+{
+    dim3 blockDim, gridDim;
+    blockDim.x = BLOCK_KN_SIZE;
+    blockDim.y = 1;
+    blockDim.z = 1;
+    gridDim.x = DIVIDE(size_n, BLOCK_KN_SIZE * 4);
+    gridDim.y = DIVIDE(size_m, m_count);
+    gridDim.z = DIVIDE(size_k, BLOCK_KN_SIZE);
+
+    fp_gemm_half_q_half_gptq_kernel kernel = pick_gemm_half_q_half_gptq_kernel(true, m_count, bit);
+
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+    kernel<<<gridDim, blockDim, 0, stream>>>
+    (
+        a,
+        b_q_weight,
+        b_gptq_qzeros,
+        b_gptq_scales,
+        c,
+        size_m,
+        size_n,
+        size_k,
+        groups,
+        b_q_perm
+    );
+}
+
+
+__global__ void reconstruct_exllama_8bit_kernel
+(
+    const uint32_t* __restrict__ b_q_weight,
+    const int* __restrict__ b_q_perm,
+    const uint32_t* __restrict__ b_gptq_qzeros,
+    const half* __restrict__ b_gptq_scales,
+    const int size_k,
+    const int size_n,
+    const int groups,
+    half* __restrict__ b
+)
+{
+    MatrixView_half_rw b_(b, size_k, size_n);
+    MatrixView_q8_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
+    MatrixView_half b_gptq_scales_(b_gptq_scales, groups, size_n);
+
+    int offset_k = BLOCK_KN_SIZE * blockIdx.y;
+    int offset_n = BLOCK_KN_SIZE * blockIdx.x * 4;
+
+    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+
+    // Preload remapping table
+    __shared__ int perm[BLOCK_KN_SIZE];
+    int t = threadIdx.x;
+
+    if (b_q_perm)
+    {
+        if (offset_k + t < size_k)
+            perm[t] = b_q_perm[offset_k + t];
+    }
+
+    // Column
+    int n = offset_n + t * 4;
+    if (n >= size_n) return;
+
+    // Find initial group
+    int groupsize = size_k / groups;
+    int group = offset_k / groupsize;
+    int nextgroup = offset_k + groupsize;
+
+    // b offset
+    int qk = offset_k / (32 / 8);
+
+    const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
+
+    // Initial zeros/scale
+    int zeros[4];
+    half2 scales[4];
+    b_gptq_qzeros_.item4(zeros, group, n);
+    b_gptq_scales_.item4_h2(scales, group, n);
+
+    __syncthreads();
+
+    int k = offset_k;
+    int lk = 0;
+
+    while (k < end_k)
+    {
+        if (k == nextgroup)
+        {
+            group++;
+            nextgroup += groupsize;
+            b_gptq_qzeros_.item4(zeros, group, n);
+            b_gptq_scales_.item4_h2(scales, group, n);
+        }
+
+        for (int p = 0; p < 4; p++)
+        {
+            int4 load_int4[2];
+            load_int4[0] = *((int4*) b_ptr); b_ptr += size_n;
+            load_int4[1] = *((int4*) b_ptr); b_ptr += size_n;
+
+            half2 dq[4][4];
+            dequant_8bit_8(load_int4[0].x, load_int4[1].x, dq[0], size_n, zeros[0] + 1);
+            dequant_8bit_8(load_int4[0].y, load_int4[1].y, dq[1], size_n, zeros[1] + 1);
+            dequant_8bit_8(load_int4[0].z, load_int4[1].z, dq[2], size_n, zeros[2] + 1);
+            dequant_8bit_8(load_int4[0].w, load_int4[1].w, dq[3], size_n, zeros[3] + 1);
+
+            //half* dqh = (half*)dq;
+            if (b_q_perm)
+            {
+                for (int j = 0; j < 4; j++)
+                {
+                    for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
+                    b_.set4(perm[lk++], n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
+                    b_.set4(perm[lk++], n, __high2half(dq[0][j]), __high2half(dq[1][j]), __high2half(dq[2][j]), __high2half(dq[3][j]));
+                }
+            }
+            else
+            {
+                for (int j = 0; j < 4; j++)
+                {
+                    for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
+                    b_.set4(offset_k + lk++, n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
+                    b_.set4(offset_k + lk++, n, __high2half(dq[0][j]), __high2half(dq[1][j]), __high2half(dq[2][j]), __high2half(dq[3][j]));
+                }
+            }
+        }
+        k += 32;
+    }
+}
+
+__global__ void reconstruct_exllama_4bit_kernel
+(
+    const uint32_t* __restrict__ b_q_weight,
+    const int* __restrict__ b_q_perm,
+    const uint32_t* __restrict__ b_gptq_qzeros,
+    const half* __restrict__ b_gptq_scales,
+    const int size_k,
+    const int size_n,
+    const int groups,
+    half* __restrict__ b
+)
+{
+    MatrixView_half_rw b_(b, size_k, size_n);
+    MatrixView_q4_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
+    MatrixView_half b_gptq_scales_(b_gptq_scales, groups, size_n);
+
+    int offset_k = BLOCK_KN_SIZE * blockIdx.y;
+    int offset_n = BLOCK_KN_SIZE * blockIdx.x * 4;
+
+    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+
+    // Preload remapping table
+    __shared__ int perm[BLOCK_KN_SIZE];
+    int t = threadIdx.x;
+
+    if (b_q_perm)
+    {
+        if (offset_k + t < size_k)
+            perm[t] = b_q_perm[offset_k + t];
+    }
+
+    // Column
+    int n = offset_n + t * 4;
+    if (n >= size_n) return;
+
+    // Find initial group
+    int groupsize = size_k / groups;
+    int group = offset_k / groupsize;
+    int nextgroup = offset_k + groupsize;
+
+    // b offset
+    int qk = offset_k / (32 / 4);
+
+    const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
+
+    // Initial zeros/scale
+    int zeros[4];
+    half2 scales[4];
+    half2 z1z16[4][2];
+    half2 y1y16[4][2];
+    b_gptq_qzeros_.item4(zeros, group, n);
+    b_gptq_scales_.item4_h2(scales, group, n);
+    dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
+    dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
+    dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
+    dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
+
+    __syncthreads();
+
+    int k = offset_k;
+    int lk = 0;
+
+    while (k < end_k)
+    {
+        if (k == nextgroup)
+        {
+            group++;
+            nextgroup += groupsize;
+            b_gptq_qzeros_.item4(zeros, group, n);
+            b_gptq_scales_.item4_h2(scales, group, n);
+            dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
+            dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
+            dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
+            dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
+        }
+
+        for (int p = 0; p < 4; p++)
+        {
+            half2 dq[4][4];
+            const int4* b_ptr4 = (int4*) b_ptr;
+            int4 load_int4 = *b_ptr4;
+
+            dequant_4bit_8_gptq(load_int4.x, dq[0], z1z16[0], y1y16[0], size_n, false);
+            dequant_4bit_8_gptq(load_int4.y, dq[1], z1z16[1], y1y16[1], size_n, false);
+            dequant_4bit_8_gptq(load_int4.z, dq[2], z1z16[2], y1y16[2], size_n, false);
+            dequant_4bit_8_gptq(load_int4.w, dq[3], z1z16[3], y1y16[3], size_n, false);
+
+            b_ptr += size_n;
+            //half* dqh = (half*)dq;
+            if (b_q_perm)
+            {
+                for (int j = 0; j < 4; j++)
+                {
+                    for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
+                    b_.set4(perm[lk++], n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
+                    b_.set4(perm[lk++], n, __high2half(dq[0][j]), __high2half(dq[1][j]), __high2half(dq[2][j]), __high2half(dq[3][j]));
+                }
+            }
+            else
+            {
+                for (int j = 0; j < 4; j++)
+                {
+                    for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
+                    b_.set4(offset_k + lk++, n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
+                    b_.set4(offset_k + lk++, n, __high2half(dq[0][j]), __high2half(dq[1][j]), __high2half(dq[2][j]), __high2half(dq[3][j]));
+                }
+            }
+        }
+        k += 32;
+    }
+}
+
+__global__ void reconstruct_exllama_3bit_kernel
+(
+    const uint32_t* __restrict__ b_q_weight,
+    const int* __restrict__ b_q_perm,
+    const uint32_t* __restrict__ b_gptq_qzeros,
+    const half* __restrict__ b_gptq_scales,
+    const int size_k,
+    const int size_n,
+    const int groups,
+    half* __restrict__ b
+)
+{
+    MatrixView_half_rw b_(b, size_k, size_n);
+    MatrixView_q3_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
+    MatrixView_half b_gptq_scales_(b_gptq_scales, groups, size_n);
+
+    int offset_k = BLOCK_KN_SIZE * blockIdx.y;
+    int offset_n = BLOCK_KN_SIZE * blockIdx.x * 4;
+
+    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+
+    // Preload remapping table
+    __shared__ int perm[BLOCK_KN_SIZE];
+    int t = threadIdx.x;
+
+    if (b_q_perm)
+    {
+        if (offset_k + t < size_k)
+            perm[t] = b_q_perm[offset_k + t];
+    }
+
+    // Column
+    int n = offset_n + t * 4;
+    if (n >= size_n) return;
+
+    // Find initial group
+    int groupsize = size_k / groups;
+    int group = offset_k / groupsize;
+    int nextgroup = offset_k + groupsize;
+
+    // b offset
+    int qk = offset_k / 32* 3;
+
+    const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
+
+    // Initial zeros/scale
+    int zeros[4];
+    half2 scales[4];
+    b_gptq_qzeros_.item4(zeros, group, n);
+    b_gptq_scales_.item4_h2(scales, group, n);
+
+    __syncthreads();
+
+    int k = offset_k;
+    int lk = 0;
+
+    while (k < end_k)
+    {
+        if (k == nextgroup)
+        {
+            group++;
+            nextgroup += groupsize;
+            b_gptq_qzeros_.item4(zeros, group, n);
+            b_gptq_scales_.item4_h2(scales, group, n);
+        }
+
+        for (int p = 0; p < 1; p++)
+        {
+            int4 load_int4[3];
+            load_int4[0] = *((int4*) b_ptr); b_ptr += size_n;
+            load_int4[1] = *((int4*) b_ptr); b_ptr += size_n;
+            load_int4[2] = *((int4*) b_ptr); b_ptr += size_n;
+
+            half2 dq[4][16];
+            dequant_3bit_32(load_int4[0].x, load_int4[1].x, load_int4[2].x, dq[0], size_n, zeros[0] + 1);
+            dequant_3bit_32(load_int4[0].y, load_int4[1].y, load_int4[2].y, dq[1], size_n, zeros[1] + 1);
+            dequant_3bit_32(load_int4[0].z, load_int4[1].z, load_int4[2].z, dq[2], size_n, zeros[2] + 1);
+            dequant_3bit_32(load_int4[0].w, load_int4[1].w, load_int4[2].w, dq[3], size_n, zeros[3] + 1);
+
+            if (b_q_perm)
+            {
+                for (int j = 0; j < 16; j++)
+                {
+                    for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
+                    b_.set4(perm[lk++], n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
+                    b_.set4(perm[lk++], n, __high2half(dq[0][j]), __high2half(dq[1][j]), __high2half(dq[2][j]), __high2half(dq[3][j]));
+                }
+            }
+            else
+            {
+                for (int j = 0; j < 16; j++)
+                {
+                    for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
+                    b_.set4(offset_k + lk++, n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
+                    b_.set4(offset_k + lk++, n, __high2half(dq[0][j]), __high2half(dq[1][j]), __high2half(dq[2][j]), __high2half(dq[3][j]));
+                }
+            }
+        }
+        k += 32;
+    }
+}
 
-__global__ void reconstruct_exllama_kernel
+__global__ void reconstruct_exllama_2bit_kernel
 (
     const uint32_t* __restrict__ b_q_weight,
     const int* __restrict__ b_q_perm,
@@ -317,7 +1136,7 @@ __global__ void reconstruct_exllama_kernel
 )
 {
     MatrixView_half_rw b_(b, size_k, size_n);
-    MatrixView_q4_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
+    MatrixView_q2_row b_gptq_qzeros_(b_gptq_qzeros, groups, size_n);
     MatrixView_half b_gptq_scales_(b_gptq_scales, groups, size_n);
 
     int offset_k = BLOCK_KN_SIZE * blockIdx.y;
@@ -345,21 +1164,15 @@ __global__ void reconstruct_exllama_kernel
     int nextgroup = offset_k + groupsize;
 
     // b offset
-    int qk = offset_k / (32 / 4);
+    int qk = offset_k / (32 / 2);
 
     const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
 
     // Initial zeros/scale
     int zeros[4];
     half2 scales[4];
-    half2 z1z16[4][2];
-    half2 y1y16[4][2];
     b_gptq_qzeros_.item4(zeros, group, n);
     b_gptq_scales_.item4_h2(scales, group, n);
-    dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
-    dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
-    dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
-    dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
 
     __syncthreads();
 
@@ -374,28 +1187,24 @@ __global__ void reconstruct_exllama_kernel
             nextgroup += groupsize;
             b_gptq_qzeros_.item4(zeros, group, n);
             b_gptq_scales_.item4_h2(scales, group, n);
-            dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
-            dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
-            dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
-            dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
         }
 
-        for (int p = 0; p < 4; p++)
+        for (int p = 0; p < 2; p++)
         {
-            half2 dq[4][4];
             const int4* b_ptr4 = (int4*) b_ptr;
             int4 load_int4 = *b_ptr4;
 
-            dequant_4bit_8_gptq(load_int4.x, dq[0], z1z16[0], y1y16[0], size_n, false);
-            dequant_4bit_8_gptq(load_int4.y, dq[1], z1z16[1], y1y16[1], size_n, false);
-            dequant_4bit_8_gptq(load_int4.z, dq[2], z1z16[2], y1y16[2], size_n, false);
-            dequant_4bit_8_gptq(load_int4.w, dq[3], z1z16[3], y1y16[3], size_n, false);
+            half2 dq[4][8];
+            dequant_2bit_16(load_int4.x, dq[0], size_n, zeros[0] + 1);
+            dequant_2bit_16(load_int4.y, dq[1], size_n, zeros[1] + 1);
+            dequant_2bit_16(load_int4.z, dq[2], size_n, zeros[2] + 1);
+            dequant_2bit_16(load_int4.w, dq[3], size_n, zeros[3] + 1);
 
             b_ptr += size_n;
             //half* dqh = (half*)dq;
             if (b_q_perm)
             {
-                for (int j = 0; j < 4; j++)
+                for (int j = 0; j < 8; j++)
                 {
                     for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
                     b_.set4(perm[lk++], n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
@@ -404,7 +1213,7 @@ __global__ void reconstruct_exllama_kernel
             }
             else
             {
-                for (int j = 0; j < 4; j++)
+                for (int j = 0; j < 8; j++)
                 {
                     for (int v = 0; v < 4; v++) dq[v][j] = __hmul2(scales[v], dq[v][j]);
                     b_.set4(offset_k + lk++, n, __low2half(dq[0][j]), __low2half(dq[1][j]), __low2half(dq[2][j]), __low2half(dq[3][j]));
@@ -416,7 +1225,6 @@ __global__ void reconstruct_exllama_kernel
     }
 }
 
-
 void reconstruct_exllama
 (
     const uint32_t* b_q_weight,
@@ -426,7 +1234,8 @@ void reconstruct_exllama
     half* out,
     int height,
     int width,
-    int groups
+    int groups,
+    int bit
 )
 {
     dim3 blockDim, gridDim;
@@ -435,6 +1244,15 @@ void reconstruct_exllama
     gridDim.y = DIVIDE(height, BLOCK_KN_SIZE);
     gridDim.x = DIVIDE(width, BLOCK_KN_SIZE);
 
+    auto reconstruct_exllama_kernel = reconstruct_exllama_4bit_kernel;
+    if (bit == 2) {
+        reconstruct_exllama_kernel = reconstruct_exllama_2bit_kernel;
+    } else if (bit == 3) {
+        reconstruct_exllama_kernel = reconstruct_exllama_3bit_kernel;
+    } else if (bit == 8) {
+        reconstruct_exllama_kernel = reconstruct_exllama_8bit_kernel;
+    }
+
     const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     reconstruct_exllama_kernel<<<gridDim, blockDim, 0, stream>>>
     (
@@ -450,7 +1268,7 @@ void reconstruct_exllama
 }
 
 
-__global__ void gemm_half_q_half_alt_kernel(
+__global__ void gemm_half_q_half_alt_4bit_kernel(
     const half2* __restrict__ vec,
     const uint32_t* __restrict__ mat,
     half* __restrict__ mul,
@@ -548,6 +1366,95 @@ __global__ void gemm_half_q_half_alt_kernel(
 }
 
 
+__global__ void gemm_half_q_half_alt_8bit_kernel(
+    const half2* __restrict__ vec,
+    const uint32_t* __restrict__ mat,
+    half* __restrict__ mul,
+    const half* __restrict__ scales,
+    const uint32_t* __restrict__ zeros,
+    const int* __restrict__ g_idx,
+    int batch,
+    int height,
+    int width
+)
+{
+    int zero_width = width / 4;
+    int vec_height = height * 2;
+    const int blockwidth2 = BLOCK_KN_SIZE / 2;
+    int b = blockIdx.y * BLOCK_M_SIZE_MAX;
+    int b_end = min(BLOCK_M_SIZE_MAX, batch - b);
+    int h = BLOCK_KN_SIZE * blockIdx.z / 4;
+    int h_end = min(BLOCK_KN_SIZE / 4, height - h) * 2;
+    int w = BLOCK_KN_SIZE * blockIdx.x + threadIdx.x;
+
+    __shared__ half2 blockvec[BLOCK_M_SIZE_MAX][blockwidth2];
+    if (threadIdx.x < h_end) {
+        for (int m = 0; m < b_end; ++m) {
+          blockvec[m][threadIdx.x] =
+              vec[(m + b) * vec_height + blockIdx.z * BLOCK_KN_SIZE / 2 +
+                  threadIdx.x];
+        }
+    }
+
+
+    if (blockIdx.z == 0)
+    {
+        for (int m = 0; m < b_end; m++)
+            mul[(b + m) * width + w] = __int2half_rn(0);
+    }
+    __syncthreads();
+
+    int i = width * h + w;
+    int g_h = h * 4;
+    int k = 0;
+    int z_w = w / 4;
+    int z_mod = (w % 4) * 8;
+    half2 res2;
+    half res[BLOCK_M_SIZE_MAX] = {};
+
+    unsigned int tmp;
+    while (k < h_end) {
+        tmp = mat[i];
+        half2 scales_tmp[2];
+        half2 zeros_tmp[2];
+        for (int tmp_k = 0; tmp_k < 2; tmp_k++) {
+            int g = g_idx[g_h + (k + tmp_k) * 2];
+            int g2 = g_idx[g_h + (k + tmp_k) * 2 + 1];
+            half scale_f = scales[g * width + w];
+            half scale_f2 = scales[g2 * width + w];
+            half2 scale = __halves2half2(scale_f, scale_f2);
+            half2 zero = __halves2half2(
+                __hmul(scale_f, __int2half_rn(-((zeros[g * zero_width + z_w] >> z_mod) & 0xff) - 1)),
+                __hmul(scale_f2, __int2half_rn(-((zeros[g2 * zero_width + z_w] >> z_mod) & 0xff) - 1))
+            );
+            scales_tmp[tmp_k] = scale;
+            zeros_tmp[tmp_k] = zero;
+        }
+        for (int m = 0; m < b_end; m++) {
+#ifndef USE_ROCM
+            res2 = {};
+#else
+            res2.x = __half_as_ushort(__float2half(0));
+            res2.y = __half_as_ushort(__float2half(0));
+#endif
+            half2 v12 = __halves2half2(__int2half_rn(tmp & 0xFF), __int2half_rn((tmp >> 8) & 0xFF));
+            res2 = __hfma2(__hfma2(v12, scales_tmp[0], zeros_tmp[0]), blockvec[m][k + 0], res2);
+            half2 v34 = __halves2half2(__int2half_rn((tmp >> 16) & 0xFF), __int2half_rn((tmp >> 24) & 0xFF));
+            res2 = __hfma2(__hfma2(v34, scales_tmp[1], zeros_tmp[1]), blockvec[m][k + 1], res2);
+#ifndef USE_ROCM
+            res[m] = __hadd(res[m], __hadd(res2.x, res2.y));
+#else
+            res[m] = __hadd(res[m], __hadd(__ushort_as_half(res2.x), __ushort_as_half(res2.y)));
+#endif
+        }
+        i += width;
+        k += 2;
+    }
+    for (int m = 0; m < b_end; m++) {
+        atomicAdd(&mul[(b + m) * width + w], res[m]);
+    }
+}
+
 void gemm_half_q_half_alt
 (
     const half* a,
@@ -558,7 +1465,8 @@ void gemm_half_q_half_alt
     half* c,
     int size_m,
     int size_n,
-    int size_k
+    int size_k,
+    int bit
 )
 {
     dim3 blockDim, gridDim;
@@ -569,8 +1477,13 @@ void gemm_half_q_half_alt
     gridDim.y = DIVIDE(size_m, BLOCK_M_SIZE_MAX);
     gridDim.z = DIVIDE(size_k, BLOCK_KN_SIZE);
 
+    auto kernel = gemm_half_q_half_alt_4bit_kernel;
+    if (bit == 8) {
+        kernel = gemm_half_q_half_alt_8bit_kernel;
+    }
+
     const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-    gemm_half_q_half_alt_kernel<<<gridDim, blockDim, 0, stream>>>
+    kernel<<<gridDim, blockDim, 0, stream>>>
     (
         (const half2*) a,
         b_q_weight,
@@ -579,12 +1492,12 @@ void gemm_half_q_half_alt
         b_gptq_qzeros,
         b_g_idx,
         size_m,
-        size_k / 8,
+        size_k / 32 * bit,
         size_n
     );
 }
 
-
+template<class T, int bit>
 __global__ void reconstruct_gptq_kernel
 (
     const uint32_t* __restrict__ w,
@@ -600,30 +1513,79 @@ __global__ void reconstruct_gptq_kernel
     // Start of block
 
     int column = BLOCK_KN_SIZE * blockIdx.x + threadIdx.x;
-    int row = blockIdx.y * 8;
+    int row = blockIdx.y * 32 / bit;
     if (column >= width) return;
 
     // Views
 
-    MatrixView_q4_column w_(w, height, width);
     MatrixView_half_rw out_(out, height, width);
     MatrixView_half w_scales_(w_scales, group, width);
-    MatrixView_q4_row w_zeros_(w_zeros, group, width);
+    T w_zeros_(w_zeros, group, width);
 
-    uint32_t w_read = w_.item_uint32_t(row, column);
+    uint32_t w_read = w[blockIdx.y * width + column];
     half* out_ptr = out_.item_ptr(row, column);
 
     #pragma unroll
-    for (int s = 0; s < 32; s += 4)
+    for (int s = 0; s < 32; s += bit)
     {
-        int group = g_idx[row + s / 4];
+        int group = g_idx[row + s / bit];
         half w_scale = w_scales_.item(group, column);
         uint32_t w_zero = w_zeros_.item(group, column) + 1;
-        half w_item = __hmul(__int2half_rn((int)((w_read >> s) & 0x0f) - w_zero), w_scale);
+        half w_item = __hmul(__int2half_rn((int)((w_read >> s) & ((1 << bit) - 1)) - w_zero), w_scale);
         *out_ptr = w_item; out_ptr += out_.width;
     }
 }
 
+__global__ void reconstruct_gptq_3bit_kernel
+(
+    const uint32_t* __restrict__ w,
+    const half* __restrict__ w_scales,
+    const uint32_t* __restrict__ w_zeros,
+    const int* __restrict__ g_idx,
+    const int height,
+    const int width,
+    const int group,
+    half* __restrict__ out
+)
+{
+    // Start of block
+    int column = BLOCK_KN_SIZE * blockIdx.x + threadIdx.x;
+    int row = blockIdx.y * 32;
+    if (column >= width) return;
+
+    // Views
+
+    MatrixView_half_rw out_(out, height, width);
+    MatrixView_half w_scales_(w_scales, group, width);
+    MatrixView_q3_row w_zeros_(w_zeros, group, width);
+
+    uint32_t w1 = w[(blockIdx.y * 3) * width + column];
+    uint32_t w2 = w[(blockIdx.y * 3 + 1) * width + column];
+    uint32_t w3 = w[(blockIdx.y * 3 + 2) * width + column];
+    half* out_ptr = out_.item_ptr(row, column);
+
+    #pragma unroll
+    for (int i = 0; i < 32; i += 1)
+    {
+        int group = g_idx[row + i];
+        half w_scale = w_scales_.item(group, column);
+        uint32_t w_zero = w_zeros_.item(group, column) + 1;
+        int w_item;
+        if (i == 10) {
+            w_item = (w1 >> 30) | ((w2 << 2) & 0x4);
+        } else if (i == 21) {
+            w_item = (w2 >> 31) | ((w3 << 1) & 0x6);
+        } else if (i < 10) {
+            w_item = ((w1 >> (i * 3)) & 0x7);
+        } else if (i < 21) {
+            w_item = ((w2 >> (i * 3 - 32)) & 0x7);
+        } else {
+            w_item = ((w3 >> (i * 3 - 64)) & 0x7);
+        }
+        *out_ptr = __hmul(__int2half_rn(w_item - w_zero), w_scale);
+        out_ptr += out_.width;
+    }
+}
 
 void reconstruct_gptq
 (
@@ -634,16 +1596,28 @@ void reconstruct_gptq
     half* out,
     int height,
     int width,
-    int groups
+    int groups,
+    int bit
 )
 {
     dim3 blockDim, gridDim;
     blockDim.x = BLOCK_KN_SIZE;
     blockDim.y = 1;
-    gridDim.y = DIVIDE(height, 8);
+    gridDim.y = DIVIDE(height, 32 / bit);
     gridDim.x = DIVIDE(width, BLOCK_KN_SIZE);
+
+    auto kernel = reconstruct_gptq_kernel<MatrixView_q4_row, 4>;
+    if (bit == 2) {
+        kernel = reconstruct_gptq_kernel<MatrixView_q2_row, 2>;
+    } else if (bit == 8) {
+        kernel = reconstruct_gptq_kernel<MatrixView_q8_row, 8>;
+    } else if (bit == 3) {
+        kernel = reconstruct_gptq_3bit_kernel;
+        gridDim.y = DIVIDE(height, 32);
+    }
+
     const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-    reconstruct_gptq_kernel<<<gridDim, blockDim, 0, stream>>>
+    kernel<<<gridDim, blockDim, 0, stream>>>
     (
         b_q_weight,
         b_gptq_scales,
@@ -671,19 +1645,27 @@ void gemm_half_q_half_cuda
     int size_n,
     int size_k,
     int groups,
-    bool use_exllama
+    bool use_exllama,
+    int bit
 )
 {
-    if ((use_exllama && size_m > MAX_Q_GEMM_ROWS) || (!use_exllama && size_m > MAX_ALT_GEMM_ROWS)) {
+    bool use_reconstruct;
+    if (use_exllama) {
+        use_reconstruct = ((bit == 8 && size_m > MAX_Q_GEMM_ROWS_8BIT) || (bit != 8 && size_m > MAX_Q_GEMM_ROWS));
+    } else {
+        // The 2/3-bit kernels are somehow slower than dequant + gemm baseline, so we disabled them for now.
+        use_reconstruct = (bit < 4 || size_m > MAX_ALT_GEMM_ROWS);
+    }
+    if (use_reconstruct) {
         // Reconstruct FP16 matrix, then cuBLAS
         if (use_exllama) {
             reconstruct_exllama(b_q_weight, b_gptq_qzeros, b_gptq_scales, b_g_idx, temp_dq,
-                                size_k, size_n, groups);
+                                size_k, size_n, groups, bit);
         }
         else
         {
             reconstruct_gptq(b_q_weight, b_gptq_qzeros, b_gptq_scales, b_g_idx,
-                             temp_dq, size_k, size_n, groups);
+                             temp_dq, size_k, size_n, groups, bit);
         }
 
         const half alpha = __float2half(1.0f);
@@ -707,7 +1689,7 @@ void gemm_half_q_half_cuda
         {
             gemm_half_q_half_cuda_part(a, b_q_weight, b_gptq_qzeros, b_gptq_scales, b_g_idx,
                                         c, last_chunk, size_n, size_k, BLOCK_M_SIZE_MAX,
-                                        groups);
+                                        groups, bit);
         }
 
         if (last_chunk_size)
@@ -715,18 +1697,17 @@ void gemm_half_q_half_cuda
             gemm_half_q_half_cuda_part(a + last_chunk * size_k, b_q_weight, b_gptq_qzeros,
                                         b_gptq_scales, b_g_idx, c + last_chunk * size_n,
                                         last_chunk_size, size_n, size_k, last_chunk_size,
-                                        groups);
+                                        groups, bit);
         }
     }
     else
     {
         gemm_half_q_half_alt(a, b_q_weight, b_gptq_qzeros, b_gptq_scales, b_g_idx,
-                             c, size_m, size_n, size_k);
+                             c, size_m, size_n, size_k, bit);
     }
 }
 
-
-__global__ void shuffle_kernel
+__global__ void shuffle_4bit_kernel
 (
     uint32_t* __restrict__ b_q_weight,
     const int size_k,
@@ -740,13 +1721,53 @@ __global__ void shuffle_kernel
     while (k < size_k) { shuffle_4bit_8 (b_ptr, size_n); b_ptr += 1 * size_n; k +=  8; }
 }
 
+__global__ void shuffle_8bit_kernel
+(
+    uint32_t* __restrict__ b_q_weight,
+    const int size_k,
+    const int size_n
+)
+{
+    int n = blockIdx.x * THREADS_X + threadIdx.x;
+    if (n >= size_n) return;
+    int k = 0;
+    uint32_t* b_ptr = b_q_weight + n;
+    while (k < size_k) { shuffle_8bit_4 (b_ptr, size_n); b_ptr += 1 * size_n; k +=  4; }
+}
+
+__global__ void shuffle_2bit_kernel
+(
+    uint32_t* __restrict__ b_q_weight,
+    const int size_k,
+    const int size_n
+)
+{
+    int n = blockIdx.x * THREADS_X + threadIdx.x;
+    if (n >= size_n) return;
+    int k = 0;
+    uint32_t* b_ptr = b_q_weight + n;
+    while (k < size_k) { shuffle_2bit_16(b_ptr, size_n); b_ptr += 1 * size_n; k += 16;  }
+}
+
+__global__ void shuffle_3bit_kernel
+(
+    uint32_t* __restrict__ b_q_weight,
+    const int size_k,
+    const int size_n
+)
+{
+    int n = blockIdx.x * THREADS_X + threadIdx.x;
+    if (n >= size_n) return;
+    int k = 0;
+    uint32_t* b_ptr = b_q_weight + n;
+    while (k < size_k) { shuffle_3bit_32(b_ptr, size_n); b_ptr += 3 * size_n; k += 32;  }
+}
 
-__global__ void make_sequential_kernel
+__global__ void make_sequential_4bit_kernel
 (
     const uint32_t* __restrict__ w,
     uint32_t* __restrict__ w_new,
     const int* __restrict__ q_perm,
-    const int w_height,
     const int w_width
 )
 {
@@ -778,37 +1799,204 @@ __global__ void make_sequential_kernel
     w_new2[w_new2_row * w2_stride + w2_column] = dst;
 }
 
+__global__ void make_sequential_2bit_kernel
+(
+    const uint32_t* __restrict__ w,
+    uint32_t* __restrict__ w_new,
+    const int* __restrict__ q_perm,
+    const int w_width
+)
+{
+    const uint64_t* w2 = (uint64_t*) w;
+    uint64_t* w_new2 = (uint64_t*) w_new;
+    int w2_stride = w_width >> 1;
+    int w2_column = THREADS_X * blockIdx.x + threadIdx.x;
+    if (w2_column >= w2_stride) return;
+    int w_new2_row = blockIdx.y;
+    int q_perm_idx = w_new2_row << 4;
+    uint64_t dst = 0;
+
+    #pragma unroll
+    for (int i = 0; i < 16; i++)
+    {
+        int source_row = q_perm[q_perm_idx++];
+
+        int w2_row = source_row >> 4;
+        int w2_subrow = source_row & 0x0f;
+        int w2_row_shift = w2_subrow << 1;
+        int wnew2_row_shift = i << 1;
+
+        uint64_t src = w2[w2_row * w2_stride + w2_column];
+        src >>= w2_row_shift;
+        src &= 0x0000000300000003;
+        src <<= wnew2_row_shift;
+        dst |= src;
+    }
+    w_new2[w_new2_row * w2_stride + w2_column] = dst;
+}
+
+__global__ void make_sequential_3bit_kernel
+(
+    const uint32_t* __restrict__ w,
+    uint32_t* __restrict__ w_new,
+    const int* __restrict__ q_perm,
+    const int w_width
+)
+{
+    int w_column = THREADS_X * blockIdx.x + threadIdx.x;
+    if (w_column >= w_width) return;
+    int w_new_row = blockIdx.y * 3;
+    int q_perm_idx = blockIdx.y << 5;
+    uint32_t dst[3] = {0, 0, 0};
+
+    #pragma unroll
+    for (int i = 0; i < 32; i++)
+    {
+        int source_row = q_perm[q_perm_idx++];
+        int z_w = (source_row / 32) * 3;
+        int z_mod = source_row % 32;
+        int z_bit;
+
+        if (z_mod != 10){
+            if (z_mod != 21){
+                z_bit = z_mod;
+                if (z_bit > 21){
+                    z_bit *= 3;
+                    z_bit -= 64;
+                    z_w += 2;
+                } else if (z_bit > 10){
+                    z_bit *= 3;
+                    z_bit -= 32;
+                    z_w += 1;
+                } else {
+                    z_bit *= 3;
+                }
+            } else {
+                z_w += 1;
+            }
+        }
+
+        uint64_t src;
+        if (z_mod == 10) {
+            src = (w[z_w * w_width + w_column] >> 30) | ((w[(z_w + 1) * w_width + w_column] << 2) & 0x4);
+        } else if (z_mod == 21){
+            src = (w[z_w * w_width + w_column] >> 31) | ((w[(z_w + 1) * w_width + w_column] << 1) & 0x6);
+        } else {
+            src = w[z_w * w_width + w_column];
+            src >>= z_bit;
+            src &= 0x07;
+        }
+
+        z_w = 0;
+        if (i != 10){
+            if (i != 21){
+                z_bit = i;
+                if (z_bit > 21){
+                    z_bit *= 3;
+                    z_bit -= 64;
+                    z_w += 2;
+                } else if (z_bit > 10){
+                    z_bit *= 3;
+                    z_bit -= 32;
+                    z_w += 1;
+                } else {
+                    z_bit *= 3;
+                }
+            } else {
+                z_w += 1;
+            }
+        }
+        if (i == 10) {
+            dst[z_w] |= (src & 0x03) << 30;
+            dst[z_w + 1] |= ((src & 0x4) >> 2);
+        } else if (i == 21) {
+            dst[z_w] |= (src & 0x01) << 31;
+            dst[z_w + 1] |= ((src & 0x6) >> 1);
+        } else {
+            dst[z_w] |= (src << z_bit);
+        }
+    }
+    w_new[w_new_row * w_width + w_column] = dst[0];
+    w_new[(w_new_row + 1) * w_width + w_column] = dst[1];
+    w_new[(w_new_row + 2) * w_width + w_column] = dst[2];
+}
+
+__global__ void make_sequential_8bit_kernel
+(
+    const uint32_t* __restrict__ w,
+    uint32_t* __restrict__ w_new,
+    const int* __restrict__ q_perm,
+    const int w_width
+)
+{
+    const uint64_t* w2 = (uint64_t*) w;
+    uint64_t* w_new2 = (uint64_t*) w_new;
+    int w2_stride = w_width >> 1;
+    int w2_column = THREADS_X * blockIdx.x + threadIdx.x;
+    if (w2_column >= w2_stride) return;
+    int w_new2_row = blockIdx.y;
+    int q_perm_idx = w_new2_row << 2;
+    uint64_t dst = 0;
+
+    #pragma unroll
+    for (int i = 0; i < 4; i++)
+    {
+        int source_row = q_perm[q_perm_idx++];
+
+        int w2_row = source_row >> 2;
+        int w2_subrow = source_row & 0x03;
+        int w2_row_shift = w2_subrow << 3;
+        int wnew2_row_shift = i << 3;
+
+        uint64_t src = w2[w2_row * w2_stride + w2_column];
+        src >>= w2_row_shift;
+        src &= 0x000000ff000000ff;
+        src <<= wnew2_row_shift;
+        dst |= src;
+    }
+    w_new2[w_new2_row * w2_stride + w2_column] = dst;
+}
+
 
 void shuffle_exllama_weight
 (
     uint32_t* q_weight,
     int* q_perm,
     int height,
-    int width
+    int width,
+    int bit
 )
 {
     if (q_perm)
     {
         uint32_t* new_qweight = NULL;
-        cudaMalloc(&new_qweight, height / 8 * width * sizeof(uint32_t));
+        cudaMalloc(&new_qweight, height / 32 * bit * width * sizeof(uint32_t));
 
         dim3 blockDim, gridDim;
         blockDim.x = THREADS_X;
         blockDim.y = 1;
         gridDim.x = DIVIDE(width, THREADS_X);
-        gridDim.y = height / 8;
-
+        gridDim.y = height / 32 * bit;
+
+        auto kernel = make_sequential_4bit_kernel;
+        if (bit == 2) {
+            kernel = make_sequential_2bit_kernel;
+        } else if (bit == 3) {
+            kernel = make_sequential_3bit_kernel;
+            gridDim.y = height / 32;
+        } else if (bit == 8) {
+            kernel = make_sequential_8bit_kernel;
+        }
         const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-        make_sequential_kernel<<<gridDim, blockDim, 0, stream>>>
+        kernel<<<gridDim, blockDim, 0, stream>>>
         (
             q_weight,
             new_qweight,
             q_perm,
-            height / 8,
             width
         );
         // Replace qweights
-        cudaMemcpyAsync(q_weight, new_qweight, height / 8 * width * sizeof(uint32_t), cudaMemcpyDeviceToDevice);
+        cudaMemcpyAsync(q_weight, new_qweight, height / 32 * bit * width * sizeof(uint32_t), cudaMemcpyDeviceToDevice);
         // Cleanup
         cudaDeviceSynchronize();
         cudaFree(new_qweight);
@@ -818,6 +2006,14 @@ void shuffle_exllama_weight
     blockDim.y = 1;
     gridDim.x = DIVIDE(width, THREADS_X);
     gridDim.y = 1;
+    auto shuffle_kernel = shuffle_4bit_kernel;
+    if (bit == 2) {
+        shuffle_kernel = shuffle_2bit_kernel;
+    } else if (bit == 3) {
+        shuffle_kernel = shuffle_3bit_kernel;
+    } else if (bit == 8) {
+        shuffle_kernel = shuffle_8bit_kernel;
+    }
     const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     shuffle_kernel<<<gridDim, blockDim, 0, stream>>>(q_weight, height, width);
 }
@@ -832,13 +2028,14 @@ torch::Tensor gptq_gemm
     torch::Tensor b_gptq_qzeros,
     torch::Tensor b_gptq_scales,
     torch::Tensor b_g_idx,
-    bool use_exllama
+    bool use_exllama,
+    int bit
 )
 {
     const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
     auto options = torch::TensorOptions().dtype(a.dtype()).device(a.device());
     at::Tensor c = torch::empty({a.size(0), b_q_weight.size(1)}, options);
-    at::Tensor temp_dq = torch::empty({b_q_weight.size(0) * 8, b_q_weight.size(1)}, options);
+    at::Tensor temp_dq = torch::empty({b_q_weight.size(0) * 32 / bit, b_q_weight.size(1)}, options);
 
     vllm::gptq::gemm_half_q_half_cuda
     (
@@ -854,7 +2051,8 @@ torch::Tensor gptq_gemm
         c.size(1),  // n
         a.size(1),  // k
         b_gptq_qzeros.size(0),  // group number
-        use_exllama
+        use_exllama,
+        bit
     );
     return c;
 }
@@ -862,14 +2060,16 @@ torch::Tensor gptq_gemm
 void gptq_shuffle
 (
     torch::Tensor q_weight,
-    torch::Tensor q_perm
+    torch::Tensor q_perm,
+    int bit
 )
 {
     const at::cuda::OptionalCUDAGuard device_guard(device_of(q_weight));
     vllm::gptq::shuffle_exllama_weight(
         (uint32_t*) q_weight.data_ptr(),
         q_perm.device().is_meta() ? NULL : (int*) q_perm.data_ptr(),
-        q_weight.size(0) * 8,
-        q_weight.size(1)
+        q_weight.size(0) * 32 / bit,
+        q_weight.size(1),
+        bit
     );
 }
diff --git a/csrc/quantization/gptq/qdq_2.cuh b/csrc/quantization/gptq/qdq_2.cuh
new file mode 100644
index 0000000000000..295872a91de37
--- /dev/null
+++ b/csrc/quantization/gptq/qdq_2.cuh
@@ -0,0 +1,87 @@
+/*
+Copied from https://github.com/turboderp/exllamav2
+*/
+
+#ifndef _qdq_2_cuh
+#define _qdq_2_cuh
+
+#include "qdq_util.cuh"
+
+namespace vllm {
+namespace gptq {
+
+// Permutation:
+//
+// ffddbb99 77553311  eeccaa88 66442200
+
+__forceinline__ __device__ void shuffle_2bit_16
+(
+    uint32_t* q,
+    int stride
+)
+{
+    uint32_t qa = q[0];
+    uint32_t qb = 0;
+
+    #pragma unroll
+    for (int i = 0; i < 8; i++)
+    {
+        uint32_t qa0 = qa & 0x03;
+        uint32_t qa1 = (qa & 0x0c) >> 2;
+        qa >>= 4;
+        qb |= (qa1 << (i * 2 + 16));
+        qb |= (qa0 << (i * 2));
+    }
+    q[0] = qb;
+}
+
+__forceinline__ __device__ void dequant_2bit_16
+(
+    const uint32_t q_0,
+    half2 (&dq)[8],
+    int stride,
+    const uint32_t zero
+)
+{
+    const uint32_t c0 = 0x64006400;
+    const half y4_  = __float2half_rn(1.0f /  4.0f);
+    const half y16_ = __float2half_rn(1.0f / 16.0f);
+    const half y64_ = __float2half_rn(1.0f / 64.0f);
+    const half2 y4  = __halves2half2(y4_,  y4_);
+    const half2 y16 = __halves2half2(y16_, y16_);
+    const half2 y64 = __halves2half2(y64_, y64_);
+
+    const half_uint16 z1_(0xe400 | zero); // half(-1024.0f - zero);
+    const half z4_ = __hsub(__int2half_rn(-256), __int2half_rn(zero));
+    const half z16_ = __hsub(__int2half_rn(-64), __int2half_rn(zero));
+    const half z64_ = __hsub(__int2half_rn(-16), __int2half_rn(zero));
+    const half2 z1 = __half2half2(z1_.as_half);
+    const half2 z4 = __half2half2(z4_);
+    const half2 z16 = __half2half2(z16_);
+    const half2 z64 = __half2half2(z64_);
+
+    uint32_t qa = q_0;
+    half2_uint32 q0((qa & 0x00030003) | c0); // half2(q[ 0], q[ 1])      + 1024
+    half2_uint32 q1((qa & 0x000c000c) | c0); // half2(q[ 2], q[ 3]) *  4 + 1024
+    half2_uint32 q2((qa & 0x00300030) | c0); // half2(q[ 4], q[ 5]) * 16 + 1024
+    half2_uint32 q3((qa & 0x00c000c0) | c0); // half2(q[ 6], q[ 7]) * 64 + 1024
+    qa >>= 8;
+    half2_uint32 q4((qa & 0x00030003) | c0); // half2(q[ 8], q[ 8])      + 1024
+    half2_uint32 q5((qa & 0x000c000c) | c0); // half2(q[10], q[11]) *  4 + 1024
+    half2_uint32 q6((qa & 0x00300030) | c0); // half2(q[12], q[13]) * 16 + 1024
+    half2_uint32 q7((qa & 0x00c000c0) | c0); // half2(q[14], q[15]) * 64 + 1024
+
+    dq[0] = __hadd2(q0.as_half2, z1);
+    dq[1] = __hfma2(q1.as_half2, y4,  z4);
+    dq[2] = __hfma2(q2.as_half2, y16, z16);
+    dq[3] = __hfma2(q3.as_half2, y64, z64);
+    dq[4] = __hadd2(q4.as_half2, z1);
+    dq[5] = __hfma2(q5.as_half2, y4,  z4);
+    dq[6] = __hfma2(q6.as_half2, y16, z16);
+    dq[7] = __hfma2(q7.as_half2, y64, z64);
+}
+
+}  // namespace gptq
+}  // namespace vllm
+
+#endif
diff --git a/csrc/quantization/gptq/qdq_3.cuh b/csrc/quantization/gptq/qdq_3.cuh
new file mode 100644
index 0000000000000..3e7ecde752ba3
--- /dev/null
+++ b/csrc/quantization/gptq/qdq_3.cuh
@@ -0,0 +1,141 @@
+#ifndef _qdq_3_cuh
+#define _qdq_3_cuh
+
+#include "qdq_util.cuh"
+
+namespace vllm {
+namespace gptq {
+// Permutation:
+//
+// v9997775 55333111  u8886664 44222000  (u, v lsb)
+// vjjjhhhf ffdddbbb  uiiiggge eecccaaa
+// vtttrrrp ppnnnlll  usssqqqo oommmkkk
+
+__forceinline__ __device__ void shuffle_3bit_32
+(
+    uint32_t* q,
+    int stride
+)
+{
+    uint32_t qa = q[0 * stride];
+    uint32_t qb = q[1 * stride];
+    uint32_t qc = q[2 * stride];
+
+    // qa: aa999888 77766655  54443332 22111000
+    // qb: lkkkjjji iihhhggg  fffeeedd dcccbbba
+    // qc: vvvuuutt tsssrrrq  qqpppooo nnnmmmll
+
+    uint32_t qd = qc >> 26;
+    qc <<= 4;
+    qc |= qb >> 28;
+    qb <<= 2;
+    qb |= qa >> 30;
+
+    // qa: ..999888 77766655  54443332 22111000
+    // qb: ..jjjiii hhhgggff  feeedddc ccbbbaaa
+    // qc: ..tttsss rrrqqqpp  pooonnnm mmlllkkk
+    // qd:                               vvvuuu
+
+    uint32_t za = 0;
+    uint32_t zb = 0;
+    uint32_t zc = 0;
+
+    for (int i = 0; i < 5; i++) { uint32_t t0 = qa & 0x07; uint32_t t1 = (qa & 0x38) >> 3; qa >>= 6; za |= (t0 << (i * 3)); za |= (t1 << (i * 3 + 16)); }
+    for (int i = 0; i < 5; i++) { uint32_t t0 = qb & 0x07; uint32_t t1 = (qb & 0x38) >> 3; qb >>= 6; zb |= (t0 << (i * 3)); zb |= (t1 << (i * 3 + 16)); }
+    for (int i = 0; i < 5; i++) { uint32_t t0 = qc & 0x07; uint32_t t1 = (qc & 0x38) >> 3; qc >>= 6; zc |= (t0 << (i * 3)); zc |= (t1 << (i * 3 + 16)); }
+
+    // za:  9997775 55333111   8886664 44222000
+    // zb:  jjjhhhf ffdddbbb   iiiggge eecccaaa
+    // zc:  tttrrrp ppnnnlll   sssqqqo oommmkkk
+    // qd:                               vvvuuu
+
+    za |= ((qd & 0x01) >> 0) << 15;
+    zb |= ((qd & 0x02) >> 1) << 15;
+    zc |= ((qd & 0x04) >> 2) << 15;
+    za |= ((qd & 0x08) >> 3) << 31;
+    zb |= ((qd & 0x10) >> 4) << 31;
+    zc |= ((qd & 0x20) >> 5) << 31;
+
+    // za: v9997775 55333111  u8886664 44222000  (u, v lsb)
+    // zb: vjjjhhhf ffdddbbb  uiiiggge eecccaaa
+    // zc: vtttrrrp ppnnnlll  usssqqqo oommmkkk
+
+    q[0 * stride] = za;
+    q[1 * stride] = zb;
+    q[2 * stride] = zc;
+}
+
+__forceinline__ __device__ void dequant_3bit_32
+(
+    const uint32_t q_0,
+    const uint32_t q_1,
+    const uint32_t q_2,
+    half2 (&dq)[16],
+    int stride,
+    const uint32_t zero
+)
+{
+    const uint32_t c0 = 0x64006400;
+    const half y8_  = __float2half_rn(1.0f /  8.0f);
+    const half y64_ = __float2half_rn(1.0f / 64.0f);
+    const half2 y8  = __halves2half2(y8_,  y8_);
+    const half2 y64 = __halves2half2(y64_, y64_);
+    const half_uint16 z1_(0xe400 | zero); // half(-1024.0f - zero);
+    const half z8_ = __hsub(__int2half_rn(-128), __int2half_rn(zero));
+    const half z64_ = __hsub(__int2half_rn(-16), __int2half_rn(zero));
+    const half2 z1  = __halves2half2(z1_.as_half,  z1_.as_half);
+    const half2 z8  = __halves2half2(z8_,  z8_);
+    const half2 z64 = __halves2half2(z64_, z64_);
+
+    uint32_t qa = q_0;
+    uint32_t qb = q_1;
+    uint32_t qc = q_2;
+
+    half2_uint32 q0((qa & 0x00070007) | c0); // half2(q[ 0], q[ 1])      + 1024
+    half2_uint32 q1((qa & 0x00380038) | c0); // half2(q[ 2], q[ 3]) *  8 + 1024
+    qa >>= 6;
+    half2_uint32 q2((qa & 0x00070007) | c0); // half2(q[ 4], q[ 5])      + 1024
+    half2_uint32 q3((qa & 0x00380038) | c0); // half2(q[ 6], q[ 7]) *  8 + 1024
+    half2_uint32 q4((qa & 0x01c001c0) | c0); // half2(q[ 8], q[ 9]) * 64 + 1024
+    qa >>= 9;
+    qa &= 0x00010001;
+    half2_uint32 q5((qb & 0x00070007) | c0); // half2(q[10], q[11])      + 1024
+    half2_uint32 q6((qb & 0x00380038) | c0); // half2(q[12], q[13]) *  8 + 1024
+    qb >>= 6;
+    half2_uint32 q7((qb & 0x00070007) | c0); // half2(q[14], q[15])      + 1024
+    half2_uint32 q8((qb & 0x00380038) | c0); // half2(q[16], q[17]) *  8 + 1024
+    half2_uint32 q9((qb & 0x01c001c0) | c0); // half2(q[18], q[19]) * 64 + 1024
+    qb >>= 8;
+    qb &= 0x00020002;
+    half2_uint32 q10((qc & 0x00070007) | c0); // half2(q[20], q[21])      + 1024
+    half2_uint32 q11((qc & 0x00380038) | c0); // half2(q[22], q[23]) *  8 + 1024
+    qc >>= 6;
+    half2_uint32 q12((qc & 0x00070007) | c0); // half2(q[24], q[25])      + 1024
+    half2_uint32 q13((qc & 0x00380038) | c0); // half2(q[26], q[27]) *  8 + 1024
+    half2_uint32 q14((qc & 0x01c001c0) | c0); // half2(q[28], q[29]) * 64 + 1024
+    qc >>= 7;
+    qc &= 0x00040004;
+    half2_uint32 q15((qa | qb | qc) | c0);
+
+    dq[ 0] = __hadd2( q0.as_half2, z1);
+    dq[ 1] = __hfma2( q1.as_half2, y8,  z8);
+    dq[ 2] = __hadd2( q2.as_half2, z1);
+    dq[ 3] = __hfma2( q3.as_half2, y8,  z8);
+    dq[ 4] = __hfma2( q4.as_half2, y64, z64);
+    dq[ 5] = __hadd2( q5.as_half2, z1);
+    dq[ 6] = __hfma2( q6.as_half2, y8,  z8);
+    dq[ 7] = __hadd2( q7.as_half2, z1);
+    dq[ 8] = __hfma2( q8.as_half2, y8,  z8);
+    dq[ 9] = __hfma2( q9.as_half2, y64, z64);
+    dq[10] = __hadd2(q10.as_half2, z1);
+    dq[11] = __hfma2(q11.as_half2, y8,  z8);
+    dq[12] = __hadd2(q12.as_half2, z1);
+    dq[13] = __hfma2(q13.as_half2, y8,  z8);
+    dq[14] = __hfma2(q14.as_half2, y64, z64);
+    dq[15] = __hadd2(q15.as_half2, z1);
+}
+
+}  // namespace gptq
+}  // namespace vllm
+
+#endif
diff --git a/csrc/quantization/gptq/qdq_4.cuh b/csrc/quantization/gptq/qdq_4.cuh
index cfc4635a22c1d..881f353f6564d 100644
--- a/csrc/quantization/gptq/qdq_4.cuh
+++ b/csrc/quantization/gptq/qdq_4.cuh
@@ -38,16 +38,17 @@ __forceinline__ __device__ void dequant_4bit_8
 (
     const uint32_t q_0,
     half2 (&dq)[4],
-    int stride
+    int stride,
+    const uint32_t zero
 )
 {
     const uint32_t c0 = 0x64006400;
     const half y16_ = __float2half_rn(1.0f / 16.0f);
     const half2 y16 = __halves2half2(y16_, y16_);
-    const half z1_  = __float2half_rn(-1024.0f         - 8.0f);
-    const half z16_ = __float2half_rn(-1024.0f / 16.0f - 8.0f);
-    const half2 z1  = __halves2half2(z1_,  z1_);
-    const half2 z16 = __halves2half2(z16_, z16_);
+    const half_uint16 z1_(0xe400 | zero); // half(-1024.0f - zero);
+    const half z16_ = __hsub(__int2half_rn(-64), __int2half_rn(zero));
+    const half2 z1 = __half2half2(z1_.as_half);
+    const half2 z16 = __half2half2(z16_);
 
     uint32_t qa = q_0;
     half2_uint32 q0((qa & 0x000f000f) | c0); // half2(q[ 0], q[ 1])      + 1024
@@ -143,93 +144,4 @@ __forceinline__ __device__ void dequant_4bit_8_gptq
 }  // namespace gptq
 }  // namespace vllm
 
-#else
-
-namespace vllm {
-namespace gptq {
-__forceinline__ __device__ void shuffle_4bit_8
-(
-    uint32_t* q,
-    int stride
-)
-{
-}
-
-__forceinline__ __device__ void dequant_4bit_8
-(
-    const uint32_t q_0,
-    half2 (&dq)[4],
-    int stride
-)
-{
-    half dqh[8];
-    for (int i = 0; i < 8; i++) dqh[i] = dq_ns(exb(q_0, i * 4, 0x0f), 8);
-
-    for (int i = 0; i < 4; i++) dq[i] = __halves2half2(dqh[i * 2], dqh[i * 2 + 1]);
-}
-
-__forceinline__ __device__ void dequant_4bit_8_prep_zero_scale
-(
-    const uint32_t zero,
-    const half scale,
-    half2 (&z1)[2],
-    half2 (&y1)[2]
-)
-{
-    half z = __int2half_rn(-((int)zero));
-    z = __hmul(z, scale);
-    z1[0] = __half2half2(z);
-    y1[0] = __half2half2(scale);
-}
-
-__forceinline__ __device__ void dequant_4bit_8_prep_zero
-(
-    const uint32_t zero,
-    half2(&z1)[2],
-    half2(&y1)[2]
-)
-{
-    half z = __int2half_rn(-((int)zero));
-    z1[0] = __half2half2(z);
-}
-
-__forceinline__ __device__ void dequant_4bit_8_gptq
-(
-    const uint32_t q_0,
-    half2 (&dq)[4],
-    half2 (&z1)[2],
-    half2 (&y1)[2],
-    int stride,
-    bool scaled
-)
-{
-    half2 dqh2[8];
-
-    uint32_t qa = q_0;
-    for (int i = 0; i < 4; i++)
-    {
-        half d0 = __int2half_rn(qa & 0x0f); qa >>= 4;
-        half d1 = __int2half_rn(qa & 0x0f); qa >>= 4;
-        dqh2[i] = __halves2half2(d0, d1);
-    }
-
-    if (scaled)
-    {
-        dq[0] = __hfma2(dqh2[0], y1[0], z1[0]);
-        dq[1] = __hfma2(dqh2[1], y1[0], z1[0]);
-        dq[2] = __hfma2(dqh2[2], y1[0], z1[0]);
-        dq[3] = __hfma2(dqh2[3], y1[0], z1[0]);
-    }
-    else
-    {
-        dq[0] = __hadd2(dqh2[0], z1[0]);
-        dq[1] = __hadd2(dqh2[1], z1[0]);
-        dq[2] = __hadd2(dqh2[2], z1[0]);
-        dq[3] = __hadd2(dqh2[3], z1[0]);
-    }
-}
-
-}  // namespace gptq
-}  // namespace vllm
-
 #endif
diff --git a/csrc/quantization/gptq/qdq_8.cuh b/csrc/quantization/gptq/qdq_8.cuh
new file mode 100644
index 0000000000000..0c7ad7876140b
--- /dev/null
+++ b/csrc/quantization/gptq/qdq_8.cuh
@@ -0,0 +1,40 @@
+/*
+Copied from https://github.com/turboderp/exllamav2
+*/
+
+#ifndef _qdq_8_cuh
+#define _qdq_8_cuh
+
+#include "qdq_util.cuh"
+
+namespace vllm {
+namespace gptq {
+
+__forceinline__ __device__ void shuffle_8bit_4
+(
+    uint32_t* q,
+    int stride
+)
+{
+}
+
+__forceinline__ __device__ void dequant_8bit_8
+(
+    const uint32_t q_0,
+    const uint32_t q_1,
+    half2 (&dq)[4],
+    int stride,
+    const uint32_t zero
+)
+{
+    half dqh[8];
+    for (int i = 0; i < 4; i++) dqh[i    ] = dq_ns(exb(q_0, i * 8, 0xff), zero);
+    for (int i = 0; i < 4; i++) dqh[i + 4] = dq_ns(exb(q_1, i * 8, 0xff), zero);
+
+    for (int i = 0; i < 4; i++) dq[i] = __halves2half2(dqh[i * 2], dqh[i * 2 + 1]);
+}
+
+}  // namespace gptq
+}  // namespace vllm
+
+#endif
diff --git a/vllm/model_executor/layers/quantization/gptq.py b/vllm/model_executor/layers/quantization/gptq.py
index 7218760fbe55d..2e6aabb232673 100644
--- a/vllm/model_executor/layers/quantization/gptq.py
+++ b/vllm/model_executor/layers/quantization/gptq.py
@@ -1,6 +1,7 @@
 import enum
 from enum import Enum
 from typing import Any, Dict, List, Optional
+from fractions import Fraction
 
 import torch
 from torch.nn.parameter import Parameter
@@ -27,11 +28,10 @@ def __init__(
         self.weight_bits = weight_bits
         self.group_size = group_size
         self.desc_act = desc_act
-        self.pack_factor = 32 // self.weight_bits
-        # exllama kernel v1 only supports 4 bit
-        if self.weight_bits != 4:
+        self.pack_factor = Fraction(32, self.weight_bits)
+        if self.weight_bits not in [2, 3, 4, 8]:
             raise ValueError(
-                "Currently, only 4-bit weight quantization is supported for "
+                "Currently, only 2/3/4/8-bit weight quantization is supported for "
                 f"GPTQ, but got {self.weight_bits} bits.")
 
     def __repr__(self) -> str:
@@ -101,7 +101,7 @@ def create_weights(
                 "The input size is not aligned with the quantized "
                 "weight shape. This can be caused by too large "
                 "tensor parallel size.")
-        if output_size_per_partition % self.quant_config.pack_factor != 0:
+        if output_size_per_partition % self.quant_config.pack_factor.numerator != 0:
             raise ValueError(
                 "The output size is not aligned with the quantized "
                 "weight shape. This can be caused by too large "
@@ -201,11 +201,13 @@ def apply_weights(self,
             else:
                 weights["g_idx"] = torch.empty((1, 1), device="meta")
             weights["exllama_state"] = ExllamaState.READY
-            ops.gptq_shuffle(weights["qweight"], weights["g_idx"])
+            ops.gptq_shuffle(weights["qweight"], weights["g_idx"],
+                             self.quant_config.weight_bits)
         output = ops.gptq_gemm(reshaped_x, weights["qweight"],
                                weights["qzeros"], weights["scales"],
                                weights["g_idx"],
-                               weights["exllama_state"] == ExllamaState.READY)
+                               weights["exllama_state"] == ExllamaState.READY,
+                               self.quant_config.weight_bits)
         if bias is not None:
             output = output + bias
         return output.reshape(out_shape)

From a6d471c75939b2f4708a4e1cb1aa3b7b993ee54b Mon Sep 17 00:00:00 2001
From: Jae-Won Chung <jwnchung@umich.edu>
Date: Thu, 29 Feb 2024 01:04:07 -0500
Subject: [PATCH 030/196] Fix: `AttributeError` in OpenAI-compatible server
 (#3018)

---
 vllm/entrypoints/openai/protocol.py     | 2 +-
 vllm/entrypoints/openai/serving_chat.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index e85e7e2b1ede9..97cfd797587c4 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -57,7 +57,7 @@ class UsageInfo(BaseModel):
 
 class ChatCompletionRequest(BaseModel):
     model: str
-    messages: Union[str, List[Dict[str, str]]]
+    messages: List[Dict[str, str]]
     temperature: Optional[float] = 0.7
     top_p: Optional[float] = 1.0
     n: Optional[int] = 1
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 5635ac6c9e106..e5ae39e110a40 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -80,7 +80,7 @@ def get_chat_request_role(self, request: ChatCompletionRequest) -> str:
         if request.add_generation_prompt:
             return self.response_role
         else:
-            return request.messages[-1].role
+            return request.messages[-1]["role"]
 
     async def chat_completion_stream_generator(
             self, request: ChatCompletionRequest,

From 9289e577ec185bd9feb2c03bb86b82f1bf9bb633 Mon Sep 17 00:00:00 2001
From: "Allen.Dou" <allen.dou@hotmail.com>
Date: Thu, 29 Feb 2024 14:15:18 +0800
Subject: [PATCH 031/196] add cache_config's info to prometheus metrics.
 (#3100)

---
 vllm/config.py            |  4 ++++
 vllm/engine/llm_engine.py |  1 +
 vllm/engine/metrics.py    | 10 +++++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/vllm/config.py b/vllm/config.py
index fc848b72d7f2a..2f8883fe0733e 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -308,6 +308,10 @@ def __init__(
         self.num_gpu_blocks = None
         self.num_cpu_blocks = None
 
+    def metrics_info(self):
+        # convert cache_config to dict(key: str, value:str) for prometheus metrics info
+        return {key: str(value) for key, value in self.__dict__.items()}
+
     def _verify_args(self) -> None:
         if self.gpu_memory_utilization > 1.0:
             raise ValueError(
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index f0fd7efdef813..6f5af71426d78 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -138,6 +138,7 @@ def __init__(
             self.stat_logger = StatLogger(
                 local_interval=_LOCAL_LOGGING_INTERVAL_SEC,
                 labels=dict(model_name=model_config.model))
+            self.stat_logger.info("cache_config", self.cache_config)
 
         self.forward_dag = None
         if USE_RAY_COMPILED_DAG:
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
index 83e66a9372272..54b09c38f58a5 100644
--- a/vllm/engine/metrics.py
+++ b/vllm/engine/metrics.py
@@ -1,5 +1,5 @@
 from vllm.logger import init_logger
-from prometheus_client import Counter, Gauge, Histogram, REGISTRY, disable_created_metrics
+from prometheus_client import Counter, Gauge, Histogram, Info, REGISTRY, disable_created_metrics
 
 import time
 import numpy as np
@@ -23,6 +23,10 @@ def __init__(self, labelnames: List[str]):
             if hasattr(collector, "_name") and "vllm" in collector._name:
                 REGISTRY.unregister(collector)
 
+        self.info_cache_config = Info(
+            name='vllm:cache_config',
+            documentation='information of cache_config')
+
         # System stats
         self.gauge_scheduler_running = Gauge(
             name="vllm:num_requests_running",
@@ -128,6 +132,10 @@ def __init__(self, local_interval: float, labels: Dict[str, str]) -> None:
         self.labels = labels
         self.metrics = Metrics(labelnames=list(labels.keys()))
 
+    def info(self, type: str, obj: object) -> None:
+        if type == "cache_config":
+            self.metrics.info_cache_config.info(obj.metrics_info())
+
     def _get_throughput(self, tracked_stats: List[int], now: float) -> float:
         return float(np.sum(tracked_stats) / (now - self.last_local_log))
 

From bfdcfa6a053c693800551bd1bd71acabbe1941e8 Mon Sep 17 00:00:00 2001
From: Seonghyeon <seonghyeon.drew@gmail.com>
Date: Thu, 29 Feb 2024 17:51:48 +0900
Subject: [PATCH 032/196] Support starcoder2 architecture (#3089)

---
 README.md                                     |   1 +
 tests/models/test_models.py                   |   1 +
 vllm/model_executor/models/__init__.py        |   1 +
 vllm/model_executor/models/starcoder2.py      | 310 ++++++++++++++++++
 vllm/transformers_utils/config.py             |  10 +
 vllm/transformers_utils/configs/__init__.py   |   2 +
 vllm/transformers_utils/configs/starcoder2.py | 127 +++++++
 7 files changed, 452 insertions(+)
 create mode 100644 vllm/model_executor/models/starcoder2.py
 create mode 100644 vllm/transformers_utils/configs/starcoder2.py

diff --git a/README.md b/README.md
index f771788db2b89..064faa550f267 100644
--- a/README.md
+++ b/README.md
@@ -78,6 +78,7 @@ vLLM seamlessly supports many Hugging Face models, including the following archi
 - Qwen (`Qwen/Qwen-7B`, `Qwen/Qwen-7B-Chat`, etc.)
 - Qwen2 (`Qwen/Qwen2-7B-beta`, `Qwen/Qwen-7B-Chat-beta`, etc.)
 - StableLM(`stabilityai/stablelm-3b-4e1t`, `stabilityai/stablelm-base-alpha-7b-v2`, etc.)
+- Starcoder2(`bigcode/starcoder2-3b`, `bigcode/starcoder2-7b`, `bigcode/starcoder2-15b`, etc.)
 - Yi (`01-ai/Yi-6B`, `01-ai/Yi-34B`, etc.)
 
 Install vLLM with pip or [from source](https://vllm.readthedocs.io/en/latest/getting_started/installation.html#build-from-source):
diff --git a/tests/models/test_models.py b/tests/models/test_models.py
index e44452e9893cf..fb567e837d281 100644
--- a/tests/models/test_models.py
+++ b/tests/models/test_models.py
@@ -19,6 +19,7 @@
     "microsoft/phi-2",
     "stabilityai/stablelm-3b-4e1t",
     "allenai/OLMo-1B",
+    "bigcode/starcoder2-3b",
 ]
 
 
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index e4f3a785cd99a..75c2ae1e9f48e 100644
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -45,6 +45,7 @@
     "RWForCausalLM": ("falcon", "FalconForCausalLM"),
     "StableLMEpochForCausalLM": ("stablelm", "StablelmForCausalLM"),
     "StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
+    "Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
 }
 
 # Models not supported by ROCm.
diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py
new file mode 100644
index 0000000000000..1eda07b724cae
--- /dev/null
+++ b/vllm/model_executor/models/starcoder2.py
@@ -0,0 +1,310 @@
+# coding=utf-8
+# Copyright 2024 BigCode and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch Starcoder2 model."""
+from typing import List, Optional, Tuple
+
+import torch
+from torch import nn
+
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.activation import get_act_fn
+from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.linear import (ColumnParallelLinear,
+                                               LinearMethodBase,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
+from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding, ParallelLMHead, DEFAULT_VOCAB_PADDING_SIZE)
+from vllm.model_executor.parallel_utils.parallel_state import get_tensor_model_parallel_world_size
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
+from vllm.sequence import SamplerOutput
+
+try:
+    from transformers import Starcoder2Config
+except ImportError:
+    # fallback to PretrainedConfig
+    # NOTE: Please install transformers from source or use transformers>=4.39.0
+    from transformers import PretrainedConfig as Starcoder2Config
+
+KVCache = Tuple[torch.Tensor, torch.Tensor]
+
+
+class Starcoder2Attention(nn.Module):
+
+    def __init__(self,
+                 config: Starcoder2Config,
+                 linear_method: Optional[LinearMethodBase] = None):
+        super().__init__()
+        self.config = config
+
+        self.hidden_size = config.hidden_size
+        tp_size = get_tensor_model_parallel_world_size()
+        self.total_num_heads = config.num_attention_heads
+        assert self.total_num_heads % tp_size == 0
+        self.num_heads = self.total_num_heads // tp_size
+        self.total_num_kv_heads = config.num_key_value_heads
+        if self.total_num_kv_heads >= tp_size:
+            # Number of KV heads is greater than TP size, so we partition
+            # the KV heads across multiple tensor parallel GPUs.
+            assert self.total_num_kv_heads % tp_size == 0
+        else:
+            # Number of KV heads is less than TP size, so we replicate
+            # the KV heads across multiple tensor parallel GPUs.
+            assert tp_size % self.total_num_kv_heads == 0
+        self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size)
+        self.head_dim = self.hidden_size // self.total_num_heads
+        self.q_size = self.num_heads * self.head_dim
+        self.kv_size = self.num_kv_heads * self.head_dim
+        self.scaling = self.head_dim**-0.5
+        self.rope_theta = config.rope_theta
+        self.max_position_embeddings = config.max_position_embeddings
+        self.use_bias = config.use_bias
+        self.sliding_window = config.sliding_window
+
+        self.qkv_proj = QKVParallelLinear(
+            self.hidden_size,
+            self.head_dim,
+            self.total_num_heads,
+            self.total_num_kv_heads,
+            bias=self.use_bias,
+            linear_method=linear_method,
+        )
+        self.o_proj = RowParallelLinear(
+            self.total_num_heads * self.head_dim,
+            self.hidden_size,
+            bias=self.use_bias,
+            linear_method=linear_method,
+        )
+        self.rotary_emb = get_rope(
+            self.head_dim,
+            rotary_dim=self.head_dim,
+            max_position=self.max_position_embeddings,
+            base=int(self.rope_theta),
+            is_neox_style=True,
+        )
+        self.attn = PagedAttention(
+            self.num_heads,
+            self.head_dim,
+            self.scaling,
+            num_kv_heads=self.num_kv_heads,
+            sliding_window=self.sliding_window,
+        )
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        kv_cache: KVCache,
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        qkv, _ = self.qkv_proj(hidden_states)
+        q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
+        q, k = self.rotary_emb(positions, q, k)
+        k_cache, v_cache = kv_cache
+        attn_output = self.attn(q, k, v, k_cache, v_cache, input_metadata)
+        output, _ = self.o_proj(attn_output)
+        return output
+
+
+class Starcoder2MLP(nn.Module):
+
+    def __init__(self,
+                 config: Starcoder2Config,
+                 linear_method: Optional[LinearMethodBase] = None):
+        super().__init__()
+        self.c_fc = ColumnParallelLinear(
+            config.hidden_size,
+            config.intermediate_size,
+            bias=config.use_bias,
+            linear_method=linear_method,
+        )
+        self.c_proj = RowParallelLinear(
+            config.intermediate_size,
+            config.hidden_size,
+            bias=config.use_bias,
+            linear_method=linear_method,
+        )
+        self.act = get_act_fn(config.hidden_act,
+                              intermediate_size=config.intermediate_size)
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        hidden_states, _ = self.c_fc(hidden_states)
+        hidden_states = self.act(hidden_states)
+        hidden_states, _ = self.c_proj(hidden_states)
+        return hidden_states
+
+
+class Starcoder2DecoderLayer(nn.Module):
+
+    def __init__(self,
+                 config: Starcoder2Config,
+                 linear_method: Optional[LinearMethodBase] = None):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.self_attn = Starcoder2Attention(config,
+                                             linear_method=linear_method)
+        self.mlp = Starcoder2MLP(config, linear_method=linear_method)
+        self.input_layernorm = nn.LayerNorm(config.hidden_size,
+                                            eps=config.norm_epsilon)
+        self.post_attention_layernorm = nn.LayerNorm(config.hidden_size,
+                                                     eps=config.norm_epsilon)
+
+    def forward(
+        self,
+        positions: torch.Tensor,
+        hidden_states: torch.Tensor,
+        kv_cache: KVCache,
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        # Self Attention
+        residual = hidden_states
+        hidden_states = self.input_layernorm(hidden_states)
+        hidden_states = self.self_attn(
+            positions=positions,
+            hidden_states=hidden_states,
+            kv_cache=kv_cache,
+            input_metadata=input_metadata,
+        )
+        hidden_states = residual + hidden_states
+
+        # Fully Connected
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+
+        return hidden_states
+
+
+class Starcoder2Model(nn.Module):
+
+    def __init__(self,
+                 config: Starcoder2Config,
+                 linear_method: Optional[LinearMethodBase] = None):
+        super().__init__()
+        self.config = config
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+
+        # TODO: consider padding_idx (currently removed)
+        self.embed_tokens = VocabParallelEmbedding(config.vocab_size,
+                                                   config.hidden_size)
+        self.layers = nn.ModuleList([
+            Starcoder2DecoderLayer(config, linear_method=linear_method)
+            for _ in range(config.num_hidden_layers)
+        ])
+        self.norm = nn.LayerNorm(config.hidden_size, eps=config.norm_epsilon)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+        for i in range(len(self.layers)):
+            layer = self.layers[i]
+            hidden_states = layer(positions, hidden_states, kv_caches[i],
+                                  input_metadata)
+        hidden_states = self.norm(hidden_states)
+        return hidden_states
+
+
+class Starcoder2ForCausalLM(nn.Module):
+
+    def __init__(self,
+                 config: Starcoder2Config,
+                 linear_method: Optional[LinearMethodBase] = None):
+        super().__init__()
+        self.config = config
+        self.model = Starcoder2Model(config, linear_method=linear_method)
+        self.vocab_size = config.vocab_size
+        self.unpadded_vocab_size = config.vocab_size
+        if config.tie_word_embeddings:
+            self.lm_head_weight = self.model.embed_tokens.weight
+        else:
+            self.unpadded_vocab_size = config.vocab_size
+            self.lm_head = ParallelLMHead(
+                self.unpadded_vocab_size,
+                config.hidden_size,
+                org_num_embeddings=config.vocab_size,
+                padding_size=DEFAULT_VOCAB_PADDING_SIZE,
+            )
+            self.lm_head_weight = self.lm_head.weight
+        self.sampler = Sampler(self.unpadded_vocab_size, config.vocab_size)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        hidden_states = self.model(input_ids, positions, kv_caches,
+                                   input_metadata)
+        return hidden_states
+
+    def sample(
+        self,
+        hidden_states: Optional[torch.Tensor],
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
+                                   sampling_metadata)
+        return next_tokens
+
+    def load_weights(self,
+                     model_name_or_path: str,
+                     cache_dir: Optional[str] = None,
+                     load_format: str = "auto",
+                     revision: Optional[str] = None):
+        stacked_params_mapping = [
+            # (param_name, shard_name, shard_id)
+            ("qkv_proj", "q_proj", "q"),
+            ("qkv_proj", "k_proj", "k"),
+            ("qkv_proj", "v_proj", "v"),
+        ]
+
+        params_dict = dict(self.named_parameters(remove_duplicate=False))
+        for name, loaded_weight in hf_model_weights_iterator(
+                model_name_or_path, cache_dir, load_format, revision):
+            if "rotary_emb.inv_freq" in name:
+                continue
+
+            for (param_name, weight_name, shard_id) in stacked_params_mapping:
+                if weight_name not in name:
+                    continue
+                name = name.replace(weight_name, param_name)
+                param = params_dict[name]
+                weight_loader = param.weight_loader
+                weight_loader(param, loaded_weight, shard_id)
+                break
+            else:
+                if self.config.tie_word_embeddings and "lm_head.weight" in name:
+                    continue
+                param = params_dict[name]
+                weight_loader = getattr(param, "weight_loader",
+                                        default_weight_loader)
+                weight_loader(param, loaded_weight)
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 6b0413f440a0e..5e1f0439aec51 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -9,6 +9,7 @@
     "mpt": MPTConfig,
     "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
     "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
+    "starcoder2": Starcoder2Config,
 }
 
 
@@ -16,6 +17,15 @@ def get_config(model: str,
                trust_remote_code: bool,
                revision: Optional[str] = None,
                code_revision: Optional[str] = None) -> PretrainedConfig:
+    # FIXME(woosuk): This is a temporary fix for StarCoder2.
+    # Remove this when the model is supported by HuggingFace transformers.
+    if "bigcode" in model and "starcoder2" in model:
+        config_class = _CONFIG_REGISTRY["starcoder2"]
+        config = config_class.from_pretrained(model,
+                                              revision=revision,
+                                              code_revision=code_revision)
+        return config
+
     try:
         config = AutoConfig.from_pretrained(
             model,
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index ef955f75cedaa..4966526f15184 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -4,9 +4,11 @@
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
+from vllm.transformers_utils.configs.starcoder2 import Starcoder2Config
 
 __all__ = [
     "ChatGLMConfig",
     "MPTConfig",
     "RWConfig",
+    "Starcoder2Config",
 ]
diff --git a/vllm/transformers_utils/configs/starcoder2.py b/vllm/transformers_utils/configs/starcoder2.py
new file mode 100644
index 0000000000000..4c3b6b8def074
--- /dev/null
+++ b/vllm/transformers_utils/configs/starcoder2.py
@@ -0,0 +1,127 @@
+from transformers import PretrainedConfig
+
+
+class Starcoder2Config(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`Starcoder2Model`]. It is used to instantiate a
+    Starcoder2 model according to the specified arguments, defining the model architecture. Instantiating a configuration
+    with the defaults will yield a similar configuration to that of the [bigcode/starcoder2-7b_16k](https://huggingface.co/bigcode/starcoder2-7b_16k) model.
+
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 49152):
+            Vocabulary size of the Starcoder2 model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`Starcoder2Model`]
+        hidden_size (`int`, *optional*, defaults to 3072):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 12288):
+            Dimension of the MLP representations.
+        num_hidden_layers (`int`, *optional*, defaults to 30):
+            Number of hidden layers in the Transformer encoder.
+        num_attention_heads (`int`, *optional*, defaults to 24):
+            Number of attention heads for each attention layer in the Transformer encoder.
+        num_key_value_heads (`int`, *optional*, defaults to 2):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
+        hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to 4096):
+            The maximum sequence length that this model might ever be used with. Starcoder2's sliding window attention
+            allows sequence of up to 4096*32 tokens.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        norm_epsilon (`float`, *optional*, defaults to 1e-05):
+            Epsilon value for the layer norm
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        bos_token_id (`int`, *optional*, defaults to 50256):
+            The id of the "beginning-of-sequence" token.
+        eos_token_id (`int`, *optional*, defaults to 50256):
+            The id of the "end-of-sequence" token.
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        sliding_window (`int`, *optional*):
+            Sliding window attention window size. If not specified, will default to `None` (no sliding window).
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+        residual_dropout (`float`, *optional*, defaults to 0.0):
+            Residual connection dropout value.
+        embedding_dropout (`float`, *optional*, defaults to 0.0):
+            Embedding dropout.
+        use_bias (`bool`, *optional*, defaults to `True`):
+            Whether to use bias term on linear layers of the model.
+
+
+    ```python
+    >>> from transformers import Starcoder2Model, Starcoder2Config
+
+    >>> # Initializing a Starcoder2 7B style configuration
+    >>> configuration = Starcoder2Config()
+
+    >>> # Initializing a model from the Starcoder2 7B style configuration
+    >>> model = Starcoder2Model(configuration)
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+
+    model_type = "starcoder2"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size=49152,
+        hidden_size=3072,
+        intermediate_size=12288,
+        num_hidden_layers=30,
+        num_attention_heads=24,
+        num_key_value_heads=2,
+        hidden_act="gelu_pytorch_tanh",
+        max_position_embeddings=4096,
+        initializer_range=0.018042,
+        norm_epsilon=1e-5,
+        use_cache=True,
+        bos_token_id=50256,
+        eos_token_id=50256,
+        rope_theta=10000.0,
+        sliding_window=None,
+        attention_dropout=0.0,
+        residual_dropout=0.0,
+        embedding_dropout=0.0,
+        use_bias=True,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.sliding_window = sliding_window
+        self.use_bias = use_bias
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.norm_epsilon = norm_epsilon
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.attention_dropout = attention_dropout
+        self.residual_dropout = residual_dropout
+        self.embedding_dropout = embedding_dropout
+
+        super().__init__(
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            **kwargs,
+        )
+        if self.architectures is None:
+            self.architectures = ['Starcoder2ForCausalLM']

From 2c08ff23c07f2f8d51da8e1783c5346dccc1fd12 Mon Sep 17 00:00:00 2001
From: Billy Cao <aliencaocao@gmail.com>
Date: Fri, 1 Mar 2024 03:13:58 +0800
Subject: [PATCH 033/196] Fix building from source on WSL (#3112)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 16978d74e0425..1f48be948aa84 100644
--- a/setup.py
+++ b/setup.py
@@ -36,7 +36,7 @@ def _is_neuron() -> bool:
     torch_neuronx_installed = True
     try:
         subprocess.run(["neuron-ls"], capture_output=True, check=True)
-    except FileNotFoundError:
+    except (FileNotFoundError, PermissionError):
         torch_neuronx_installed = False
     return torch_neuronx_installed
 

From 29a8d6a554a87292f05b62078976b43a899691e3 Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Thu, 29 Feb 2024 11:20:42 -0800
Subject: [PATCH 034/196] [Fix] Don't deep-copy LogitsProcessors when copying
 SamplingParams (#3099)

---
 vllm/engine/llm_engine.py |  5 +++--
 vllm/sampling_params.py   | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 6f5af71426d78..9bf19b932d35b 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -484,8 +484,9 @@ def add_request(
             prompt_token_ids[:prefix_pos], lora_request.lora_int_id
             if lora_request else 0) if prefix_pos is not None else None
 
-        # Defensive copy of SamplingParams, which are used by the sampler
-        sampling_params = copy.deepcopy(sampling_params)
+        # Defensive copy of SamplingParams, which are used by the sampler,
+        # this doesn't deep-copy LogitsProcessor objects
+        sampling_params = sampling_params.clone()
 
         # Create the sequence group.
         seq_group = SequenceGroup(request_id, [seq], sampling_params,
diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index 51d39220ca9ca..8103f3c2b24bf 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -1,4 +1,5 @@
 """Sampling parameters for text generation."""
+import copy
 from enum import IntEnum
 from functools import cached_property
 from typing import Callable, List, Optional, Union
@@ -237,6 +238,20 @@ def sampling_type(self) -> SamplingType:
             return SamplingType.RANDOM_SEED
         return SamplingType.RANDOM
 
+    def clone(self) -> "SamplingParams":
+        """Deep copy excluding LogitsProcessor objects.
+
+        LogitsProcessor objects are excluded because they may contain an
+        arbitrary, nontrivial amount of data.
+        See https://github.com/vllm-project/vllm/issues/3087
+        """
+
+        logit_processor_refs = None if self.logits_processors is None else {
+            id(lp): lp
+            for lp in self.logits_processors
+        }
+        return copy.deepcopy(self, memo=logit_processor_refs)
+
     def __repr__(self) -> str:
         return (
             f"SamplingParams(n={self.n}, "

From 703e42ee4b3efed3c71e7ae7d15f0f96e05722d4 Mon Sep 17 00:00:00 2001
From: felixzhu555 <79335195+felixzhu555@users.noreply.github.com>
Date: Thu, 29 Feb 2024 14:13:08 -0800
Subject: [PATCH 035/196] Add guided decoding for OpenAI API server (#2819)

Co-authored-by: br3no <breno@veltefaria.de>
Co-authored-by: simon-mo <simon.mo@hey.com>
---
 requirements.txt                              |   1 +
 tests/entrypoints/test_guided_processors.py   |  75 ++++++
 tests/entrypoints/test_openai_server.py       | 237 ++++++++++++++++++
 vllm/engine/async_llm_engine.py               |   3 +
 vllm/entrypoints/openai/protocol.py           |  36 ++-
 vllm/entrypoints/openai/serving_chat.py       |   9 +
 vllm/entrypoints/openai/serving_completion.py |   9 +
 vllm/model_executor/guided_decoding.py        |  99 ++++++++
 .../guided_logits_processors.py               | 129 ++++++++++
 9 files changed, 597 insertions(+), 1 deletion(-)
 create mode 100644 tests/entrypoints/test_guided_processors.py
 create mode 100644 vllm/model_executor/guided_decoding.py
 create mode 100644 vllm/model_executor/guided_logits_processors.py

diff --git a/requirements.txt b/requirements.txt
index d4599ec95d945..05ec2e804e13b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,5 @@ pydantic >= 2.0  # Required for OpenAI server.
 prometheus_client >= 0.18.0
 pynvml == 11.5.0
 triton >= 2.1.0
+outlines >= 0.0.27
 cupy-cuda12x == 12.1.0  # Required for CUDA graphs. CUDA 11.8 users should install cupy-cuda11x instead.
diff --git a/tests/entrypoints/test_guided_processors.py b/tests/entrypoints/test_guided_processors.py
new file mode 100644
index 0000000000000..5b39269916f8b
--- /dev/null
+++ b/tests/entrypoints/test_guided_processors.py
@@ -0,0 +1,75 @@
+# This unit test should be moved to a new
+# tests/test_guided_decoding directory.
+
+from transformers import AutoTokenizer
+import torch
+
+from vllm.model_executor.guided_logits_processors import (RegexLogitsProcessor,
+                                                          JSONLogitsProcessor)
+
+TEST_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "name": {
+            "type": "string"
+        },
+        "age": {
+            "type": "integer"
+        },
+        "skills": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "maxLength": 10
+            },
+            "minItems": 3
+        },
+        "work history": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "company": {
+                        "type": "string"
+                    },
+                    "duration": {
+                        "type": "string"
+                    },
+                    "position": {
+                        "type": "string"
+                    }
+                },
+                "required": ["company", "position"]
+            }
+        }
+    },
+    "required": ["name", "age", "skills", "work history"]
+}
+
+TEST_REGEX = r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}" + \
+             r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
+
+
+def test_guided_logits_processors():
+    """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
+    tokenizer = AutoTokenizer.from_pretrained('HuggingFaceH4/zephyr-7b-beta')
+    regex_LP = RegexLogitsProcessor(TEST_REGEX, tokenizer)
+    json_LP = JSONLogitsProcessor(TEST_SCHEMA, tokenizer)
+
+    regex_LP.init_state()
+    token_ids = tokenizer.encode(
+        f"Give an example IPv4 address with this regex: {TEST_REGEX}")
+    tensor = torch.rand(32000)
+    original_tensor = torch.clone(tensor)
+    regex_LP(token_ids, tensor)
+    assert tensor.shape == original_tensor.shape
+    assert not torch.allclose(tensor, original_tensor)
+
+    json_LP.init_state()
+    token_ids = tokenizer.encode(
+        f"Give an employee profile that fits this schema: {TEST_SCHEMA}")
+    tensor = torch.rand(32000)
+    original_tensor = torch.clone(tensor)
+    json_LP(token_ids, tensor)
+    assert tensor.shape == original_tensor.shape
+    assert not torch.allclose(tensor, original_tensor)
diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py
index 72e2374899793..e426cf7eed72b 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -9,12 +9,64 @@
 import openai  # use the official client for correctness check
 from huggingface_hub import snapshot_download  # downloading lora to test lora requests
 
+# imports for guided decoding tests
+import json
+import jsonschema
+import re
+
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
 MAX_SERVER_START_WAIT_S = 600  # wait for server to start for 60 seconds
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"  # any model with a chat template should work here
 LORA_NAME = "typeof/zephyr-7b-beta-lora"  # technically this needs Mistral-7B-v0.1 as base, but we're not testing generation quality here
 
+TEST_SCHEMA = {
+    "type": "object",
+    "properties": {
+        "name": {
+            "type": "string"
+        },
+        "age": {
+            "type": "integer"
+        },
+        "skills": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "maxLength": 10
+            },
+            "minItems": 3
+        },
+        "work history": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "company": {
+                        "type": "string"
+                    },
+                    "duration": {
+                        "type": "string"
+                    },
+                    "position": {
+                        "type": "string"
+                    }
+                },
+                "required": ["company", "position"]
+            }
+        }
+    },
+    "required": ["name", "age", "skills", "work history"]
+}
+
+TEST_REGEX = r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}" + \
+             r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
+
+TEST_CHOICE = [
+    "Python", "Java", "JavaScript", "C++", "C#", "PHP", "TypeScript", "Ruby",
+    "Swift", "Kotlin"
+]
+
 pytestmark = pytest.mark.asyncio
 
 
@@ -325,6 +377,7 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
         max_tokens=max_tokens,
         temperature=0.0,
         logit_bias={str(token_id): 100},
+        seed=42,
     )
     assert completion.choices[0].text is not None and len(
         completion.choices[0].text) >= 5
@@ -358,5 +411,189 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
     assert first_response != completion.choices[0].text
 
 
+async def test_guided_json_completion(server, client: openai.AsyncOpenAI):
+    completion = await client.completions.create(
+        model=MODEL_NAME,
+        prompt=
+        f"Give an example JSON for an employee profile that fits this schema: {TEST_SCHEMA}",
+        n=3,
+        temperature=1.0,
+        max_tokens=500,
+        extra_body=dict(guided_json=TEST_SCHEMA))
+
+    assert completion.id is not None
+    assert completion.choices is not None and len(completion.choices) == 3
+    for i in range(3):
+        assert completion.choices[i].text is not None
+        output_json = json.loads(completion.choices[i].text)
+        jsonschema.validate(instance=output_json, schema=TEST_SCHEMA)
+
+
+async def test_guided_json_chat(server, client: openai.AsyncOpenAI):
+    messages = [{
+        "role": "system",
+        "content": "you are a helpful assistant"
+    }, {
+        "role": "user",
+        "content": "Give an example JSON for an employee profile that " + \
+                    f"fits this schema: {TEST_SCHEMA}"
+    }]
+    chat_completion = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        max_tokens=500,
+        extra_body=dict(guided_json=TEST_SCHEMA))
+    message = chat_completion.choices[0].message
+    assert message.content is not None
+    json1 = json.loads(message.content)
+    jsonschema.validate(instance=json1, schema=TEST_SCHEMA)
+
+    messages.append({"role": "assistant", "content": message.content})
+    messages.append({
+        "role":
+        "user",
+        "content":
+        "Give me another one with a different name and age"
+    })
+    chat_completion = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        max_tokens=500,
+        extra_body=dict(guided_json=TEST_SCHEMA))
+    message = chat_completion.choices[0].message
+    assert message.content is not None
+    json2 = json.loads(message.content)
+    jsonschema.validate(instance=json2, schema=TEST_SCHEMA)
+    assert json1["name"] != json2["name"]
+    assert json1["age"] != json2["age"]
+
+
+async def test_guided_regex_completion(server, client: openai.AsyncOpenAI):
+    completion = await client.completions.create(
+        model=MODEL_NAME,
+        prompt=f"Give an example IPv4 address with this regex: {TEST_REGEX}",
+        n=3,
+        temperature=1.0,
+        max_tokens=20,
+        extra_body=dict(guided_regex=TEST_REGEX))
+
+    assert completion.id is not None
+    assert completion.choices is not None and len(completion.choices) == 3
+    for i in range(3):
+        assert completion.choices[i].text is not None
+        assert re.fullmatch(TEST_REGEX, completion.choices[i].text) is not None
+
+
+async def test_guided_regex_chat(server, client: openai.AsyncOpenAI):
+    messages = [{
+        "role": "system",
+        "content": "you are a helpful assistant"
+    }, {
+        "role":
+        "user",
+        "content":
+        f"Give an example IP address with this regex: {TEST_REGEX}"
+    }]
+    chat_completion = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        max_tokens=20,
+        extra_body=dict(guided_regex=TEST_REGEX))
+    ip1 = chat_completion.choices[0].message.content
+    assert ip1 is not None
+    assert re.fullmatch(TEST_REGEX, ip1) is not None
+
+    messages.append({"role": "assistant", "content": ip1})
+    messages.append({"role": "user", "content": "Give me a different one"})
+    chat_completion = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        max_tokens=20,
+        extra_body=dict(guided_regex=TEST_REGEX))
+    ip2 = chat_completion.choices[0].message.content
+    assert ip2 is not None
+    assert re.fullmatch(TEST_REGEX, ip2) is not None
+    assert ip1 != ip2
+
+
+async def test_guided_choice_completion(server, client: openai.AsyncOpenAI):
+    completion = await client.completions.create(
+        model=MODEL_NAME,
+        prompt="The best language for type-safe systems programming is ",
+        n=2,
+        temperature=1.0,
+        max_tokens=10,
+        extra_body=dict(guided_choice=TEST_CHOICE))
+
+    assert completion.id is not None
+    assert completion.choices is not None and len(completion.choices) == 2
+    for i in range(2):
+        assert completion.choices[i].text in TEST_CHOICE
+
+
+async def test_guided_choice_chat(server, client: openai.AsyncOpenAI):
+    messages = [{
+        "role": "system",
+        "content": "you are a helpful assistant"
+    }, {
+        "role":
+        "user",
+        "content":
+        "The best language for type-safe systems programming is "
+    }]
+    chat_completion = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        max_tokens=10,
+        extra_body=dict(guided_choice=TEST_CHOICE))
+    choice1 = chat_completion.choices[0].message.content
+    assert choice1 in TEST_CHOICE
+
+    messages.append({"role": "assistant", "content": choice1})
+    messages.append({
+        "role": "user",
+        "content": "I disagree, pick another one"
+    })
+    chat_completion = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=messages,
+        max_tokens=10,
+        extra_body=dict(guided_choice=TEST_CHOICE))
+    choice2 = chat_completion.choices[0].message.content
+    assert choice2 in TEST_CHOICE
+    assert choice1 != choice2
+
+
+async def test_guided_decoding_type_error(server, client: openai.AsyncOpenAI):
+    with pytest.raises(openai.BadRequestError):
+        _ = await client.completions.create(
+            model=MODEL_NAME,
+            prompt="Give an example JSON that fits this schema: 42",
+            extra_body=dict(guided_json=42))
+
+    messages = [{
+        "role": "system",
+        "content": "you are a helpful assistant"
+    }, {
+        "role":
+        "user",
+        "content":
+        "The best language for type-safe systems programming is "
+    }]
+    with pytest.raises(openai.BadRequestError):
+        _ = await client.chat.completions.create(model=MODEL_NAME,
+                                                 messages=messages,
+                                                 extra_body=dict(guided_regex={
+                                                     1: "Python",
+                                                     2: "C++"
+                                                 }))
+
+    with pytest.raises(openai.BadRequestError):
+        _ = await client.completions.create(
+            model=MODEL_NAME,
+            prompt="Give an example string that fits this regex",
+            extra_body=dict(guided_regex=TEST_REGEX, guided_json=TEST_SCHEMA))
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 7cba654602779..daa6419cdad3b 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -333,6 +333,9 @@ def is_running(self) -> bool:
         return (self.background_loop is not None
                 and not self.background_loop.done())
 
+    def get_tokenizer(self):
+        return self.engine.tokenizer.tokenizer
+
     def start_background_loop(self) -> None:
         """Start the background loop."""
         if self.is_running:
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 97cfd797587c4..26499b8d7a66f 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -3,7 +3,7 @@
 import time
 from typing import Dict, List, Literal, Optional, Union
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
 
 from vllm.utils import random_uuid
 from vllm.sampling_params import SamplingParams
@@ -86,6 +86,9 @@ class ChatCompletionRequest(BaseModel):
     min_p: Optional[float] = 0.0
     include_stop_str_in_output: Optional[bool] = False
     length_penalty: Optional[float] = 1.0
+    guided_json: Optional[Union[str, dict, BaseModel]] = None
+    guided_regex: Optional[str] = None
+    guided_choice: Optional[List[str]] = None
 
     def to_sampling_params(self) -> SamplingParams:
         if self.logprobs and not self.top_logprobs:
@@ -131,6 +134,20 @@ def logit_bias_logits_processor(
             logits_processors=logits_processors,
         )
 
+    @model_validator(mode="before")
+    @classmethod
+    def check_guided_decoding_count(cls, data):
+        guide_count = sum([
+            "guided_json" in data and data["guided_json"] is not None,
+            "guided_regex" in data and data["guided_regex"] is not None,
+            "guided_choice" in data and data["guided_choice"] is not None
+        ])
+        if guide_count > 1:
+            raise ValueError(
+                "You can only use one kind of guided decoding "
+                "('guided_json', 'guided_regex' or 'guided_choice').")
+        return data
+
 
 class CompletionRequest(BaseModel):
     model: str
@@ -163,6 +180,9 @@ class CompletionRequest(BaseModel):
     min_p: Optional[float] = 0.0
     include_stop_str_in_output: Optional[bool] = False
     length_penalty: Optional[float] = 1.0
+    guided_json: Optional[Union[str, dict, BaseModel]] = None
+    guided_regex: Optional[str] = None
+    guided_choice: Optional[List[str]] = None
 
     def to_sampling_params(self):
         echo_without_generation = self.echo and self.max_tokens == 0
@@ -207,6 +227,20 @@ def logit_bias_logits_processor(
             logits_processors=logits_processors,
         )
 
+    @model_validator(mode="before")
+    @classmethod
+    def check_guided_decoding_count(cls, data):
+        guide_count = sum([
+            "guided_json" in data and data["guided_json"] is not None,
+            "guided_regex" in data and data["guided_regex"] is not None,
+            "guided_choice" in data and data["guided_choice"] is not None
+        ])
+        if guide_count > 1:
+            raise ValueError(
+                "You can only use one kind of guided decoding "
+                "('guided_json', 'guided_regex' or 'guided_choice').")
+        return data
+
 
 class LogProbs(BaseModel):
     text_offset: List[int] = Field(default_factory=list)
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index e5ae39e110a40..f4ad0aa5a0184 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -12,6 +12,7 @@
     UsageInfo)
 from vllm.outputs import RequestOutput
 from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA
+from vllm.model_executor.guided_decoding import get_guided_decoding_logits_processor
 
 logger = init_logger(__name__)
 
@@ -62,6 +63,14 @@ async def create_chat_completion(
                                                            prompt=prompt)
             sampling_params = request.to_sampling_params()
             lora_request = self._maybe_get_lora(request)
+            guided_decode_logits_processor = (
+                await get_guided_decoding_logits_processor(
+                    request, self.engine.get_tokenizer()))
+            if guided_decode_logits_processor:
+                if sampling_params.logits_processors is None:
+                    sampling_params.logits_processors = []
+                sampling_params.logits_processors.append(
+                    guided_decode_logits_processor)
         except ValueError as e:
             return self.create_error_response(str(e))
 
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 610f53549da48..713e67793b290 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -16,6 +16,7 @@
 )
 from vllm.outputs import RequestOutput
 from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA
+from vllm.model_executor.guided_decoding import get_guided_decoding_logits_processor
 
 logger = init_logger(__name__)
 
@@ -286,6 +287,14 @@ async def create_completion(self, request: CompletionRequest,
         try:
             sampling_params = request.to_sampling_params()
             lora_request = self._maybe_get_lora(request)
+            guided_decode_logit_processor = (
+                await get_guided_decoding_logits_processor(
+                    request, self.engine.get_tokenizer()))
+            if guided_decode_logit_processor is not None:
+                if sampling_params.logits_processors is None:
+                    sampling_params.logits_processors = []
+                sampling_params.logits_processors.append(
+                    guided_decode_logit_processor)
             prompt_is_tokens, prompts = parse_prompt_format(request.prompt)
 
             for i, prompt in enumerate(prompts):
diff --git a/vllm/model_executor/guided_decoding.py b/vllm/model_executor/guided_decoding.py
new file mode 100644
index 0000000000000..a8573f8bdc6c8
--- /dev/null
+++ b/vllm/model_executor/guided_decoding.py
@@ -0,0 +1,99 @@
+import asyncio
+import concurrent.futures
+from copy import copy
+from enum import Enum
+from functools import lru_cache
+from json import dumps as json_dumps
+from re import escape as regex_escape
+from typing import Union, Tuple
+from pydantic import BaseModel
+
+from vllm.entrypoints.openai.protocol import CompletionRequest, ChatCompletionRequest
+from vllm.model_executor.guided_logits_processors import JSONLogitsProcessor, RegexLogitsProcessor
+
+
+class GuidedDecodingMode(Enum):
+    JSON = "json"
+    REGEX = "regex"
+    CHOICE = "choice"
+
+
+global_thread_pool = None  # used for generating logits processor fsm
+
+
+async def get_guided_decoding_logits_processor(
+        request: Union[CompletionRequest, ChatCompletionRequest],
+        tokenizer) -> Union[JSONLogitsProcessor, RegexLogitsProcessor]:
+    """
+    Given an OpenAI-compatible request, check for guided decoding parameters
+    and get the necessary logits processor for the given guide.
+    We cache logit processors by (guide, tokenizer), and on cache hit
+    we make a shallow copy to reuse the same underlying FSM.
+    """
+    global global_thread_pool
+    guide, mode = _get_guide_and_mode(request)
+    if not guide:
+        return None
+
+    if global_thread_pool is None:
+        global_thread_pool = concurrent.futures.ThreadPoolExecutor(
+            max_workers=2)
+    loop = asyncio.get_running_loop()
+
+    result = await loop.run_in_executor(global_thread_pool,
+                                        _get_cached_logits_processor, guide,
+                                        tokenizer, mode)
+
+    logits_processor = copy(result)
+    # reset logits processor's internal state
+    logits_processor.init_state()
+    return logits_processor
+
+
+def _get_guide_and_mode(
+    request: Union[CompletionRequest, ChatCompletionRequest]
+) -> Tuple[str, GuidedDecodingMode]:
+
+    if request.guided_json:
+        if not isinstance(request.guided_json, (str, dict, BaseModel)):
+            raise TypeError("JSON schema must be str, dict, or BaseModel")
+
+        json = request.guided_json
+        if isinstance(json, dict):
+            # turn dict into hashable string
+            json = json_dumps(json, sort_keys=True)
+        elif isinstance(json, BaseModel):
+            # use pydantic signature so that different model classes
+            # with the same fields will get hashed the same
+            json = str(json.__signature__)
+        return json, GuidedDecodingMode.JSON
+
+    elif request.guided_regex:
+        if not isinstance(request.guided_regex, str):
+            raise TypeError("Regex must be string")
+        return request.guided_regex, GuidedDecodingMode.REGEX
+
+    elif request.guided_choice:
+        if not isinstance(request.guided_choice, list):
+            raise TypeError("Choices must be a list")
+
+        # choice just uses regex
+        choices = [
+            regex_escape(str(choice)) for choice in request.guided_choice
+        ]
+        choices_regex = "(" + "|".join(choices) + ")"
+        return choices_regex, GuidedDecodingMode.CHOICE
+
+    else:
+        return None, None
+
+
+@lru_cache(maxsize=32)
+def _get_cached_logits_processor(guide: str, tokenizer,
+                                 mode: GuidedDecodingMode):
+    if mode == GuidedDecodingMode.JSON:
+        return JSONLogitsProcessor(guide, tokenizer)
+    elif mode == GuidedDecodingMode.REGEX or mode == GuidedDecodingMode.CHOICE:
+        return RegexLogitsProcessor(guide, tokenizer)
+    else:
+        raise ValueError(f"Unknown guided decoding mode {mode}")
diff --git a/vllm/model_executor/guided_logits_processors.py b/vllm/model_executor/guided_logits_processors.py
new file mode 100644
index 0000000000000..1b3e5e71a5911
--- /dev/null
+++ b/vllm/model_executor/guided_logits_processors.py
@@ -0,0 +1,129 @@
+# Copyright 2024- the Outlines developers
+# This file is adapted from
+# https://github.com/outlines-dev/outlines/blob/main/outlines/serve/vllm.py
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import math
+from collections import defaultdict
+from typing import Union, DefaultDict, Dict, List, Optional
+
+import torch
+from pydantic import BaseModel
+from outlines.fsm.fsm import RegexFSM
+from outlines.fsm.json_schema import build_regex_from_schema
+
+
+class RegexLogitsProcessor:
+
+    def __init__(self, regex_string: str, tokenizer):
+        """Compile the FSM that drives the regex-structured generation.
+
+        Parameters
+        ----------
+        regex_string
+            A string that represents a regular expression
+        tokenizer
+            The model's tokenizer
+
+        """
+        tokenizer = self.adapt_tokenizer(tokenizer)
+        fsm = RegexFSM(regex_string, tokenizer)
+        self.fsm = fsm
+
+    def init_state(self):
+        """Initialize the FSM states."""
+        self.fsm_state: DefaultDict[int, int] = defaultdict(int)
+
+    def __call__(self, input_ids: List[int],
+                 scores: torch.Tensor) -> torch.Tensor:
+        """Use the FSM to bias the logits before sampling the next token."""
+
+        seq_id = hash(tuple(input_ids))
+
+        if len(input_ids) == 0:
+            self.init_state()
+        else:
+            last_token = input_ids[-1]
+            last_seq_id = hash(tuple(input_ids[:-1]))
+            self.fsm_state[seq_id] = self.fsm.next_state(
+                self.fsm_state[last_seq_id], last_token)
+
+        allowed_tokens = self.fsm.allowed_token_ids(self.fsm_state[seq_id])
+
+        mask = torch.full((scores.shape[-1], ),
+                          -math.inf,
+                          device=scores.device)
+        mask[allowed_tokens] = 0
+        scores.add_(mask)
+
+        return scores
+
+    def adapt_tokenizer(self, tokenizer):
+        """Adapt vLLM's tokenizer to use to compile the FSM.
+
+        The API of Outlines tokenizers is slightly different to that of
+        `transformers`. In addition we need to handle the missing spaces to
+        Llama's tokenizer to be able to compile FSMs for this model.
+
+        """
+        tokenizer.vocabulary = tokenizer.get_vocab()
+        tokenizer.special_tokens = set(tokenizer.all_special_tokens)
+
+        def convert_token_to_string(token: str) -> str:
+            from transformers.file_utils import SPIECE_UNDERLINE
+
+            string = tokenizer.convert_tokens_to_string([token])
+
+            # A hack to handle missing spaces to HF's Llama tokenizers
+            if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
+                return " " + string
+
+            return string
+
+        tokenizer.convert_token_to_string = convert_token_to_string
+
+        return tokenizer
+
+
+class JSONLogitsProcessor(RegexLogitsProcessor):
+
+    def __init__(self,
+                 schema: Union[str, Dict, BaseModel],
+                 tokenizer,
+                 whitespace_pattern: Optional[str] = None):
+        """Compile the FSM that drives the JSON-guided generation.
+
+        Parameters
+        ----------
+        schema
+            A JSON schema that encodes the structure we want the model to generate
+        tokenizer
+            The model's tokenizer
+        whitespace_pattern
+            Pattern to use for JSON syntactic whitespace (doesn't impact string literals)
+            Example: allow only a single space or newline with `whitespace_pattern=r"[\n ]?"`
+        """
+        if isinstance(schema, type(BaseModel)):
+            schema_str = json.dumps(schema.model_json_schema())
+        elif isinstance(schema, Dict):
+            schema_str = json.dumps(schema)
+        elif isinstance(schema, str):
+            schema_str = schema
+        else:
+            raise ValueError(
+                f"Cannot parse schema {schema}. The schema must be either " +
+                "a Pydantic object, a dictionary or a string that contains the JSON "
+                + "Schema specification")
+        regex_string = build_regex_from_schema(schema_str, whitespace_pattern)
+        super().__init__(regex_string, tokenizer)

From 54d3544784ff20e7038abf72793eaf734e727269 Mon Sep 17 00:00:00 2001
From: Sherry <503147114@qq.com>
Date: Fri, 1 Mar 2024 15:52:22 +0800
Subject: [PATCH 036/196] Fix: Output text is always truncated in some models
 (#3016)

---
 vllm/engine/llm_engine.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 9bf19b932d35b..df4858a696530 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -980,7 +980,10 @@ def _check_stop(self, seq: Sequence,
     def _finalize_sequence(self, seq: Sequence,
                            sampling_params: SamplingParams,
                            stop_string: str) -> None:
-        if not sampling_params.include_stop_str_in_output and stop_string:
+        if sampling_params.include_stop_str_in_output:
+            return
+
+        if stop_string and seq.output_text.endswith(stop_string):
             # Truncate the output text so that the stop string is
             # not included in the output.
             seq.output_text = seq.output_text[:-len(stop_string)]

From 27ca23dc002e06eade014ac6b801dc2dcbea40f3 Mon Sep 17 00:00:00 2001
From: Seonghyeon <seonghyeon.drew@gmail.com>
Date: Sat, 2 Mar 2024 02:59:06 +0900
Subject: [PATCH 037/196] Remove exclude_unset in streaming response (#3143)

---
 vllm/entrypoints/openai/serving_completion.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 713e67793b290..86b753fa06ab5 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -96,7 +96,7 @@ async def completion_stream_generator(
                         logprobs=logprobs,
                         finish_reason=finish_reason,
                     )
-                ]).model_dump_json(exclude_unset=True)
+                ]).model_dump_json()
             yield f"data: {response_json}\n\n"
 
             if output.finish_reason is not None:  # return final usage
@@ -121,7 +121,7 @@ async def completion_stream_generator(
                         )
                     ],
                     usage=final_usage,
-                ).model_dump_json(exclude_unset=True)
+                ).model_dump_json()
                 yield f"data: {response_json}\n\n"
 
     yield "data: [DONE]\n\n"
@@ -306,7 +306,7 @@ async def create_completion(self, request: CompletionRequest,
                         request, prompt=prompt)
 
                 generators.append(
-                    self.engine.generate(None,
+                    self.engine.generate(prompt,
                                          sampling_params,
                                          f"{request_id}-{i}",
                                          prompt_token_ids=input_ids,

From 49d849b3ab7aa6ae493ccde1d85d226833f73fbb Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Fri, 1 Mar 2024 14:04:14 -0500
Subject: [PATCH 038/196] docs: Add tutorial on deploying vLLM model with
 KServe (#2586)

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 docs/source/index.rst                         | 1 +
 docs/source/serving/deploying_with_kserve.rst | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 docs/source/serving/deploying_with_kserve.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 32929257661ad..bdc541cb2d58e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -70,6 +70,7 @@ Documentation
 
    serving/distributed_serving
    serving/run_on_sky
+   serving/deploying_with_kserve
    serving/deploying_with_triton
    serving/deploying_with_docker
    serving/serving_with_langchain
diff --git a/docs/source/serving/deploying_with_kserve.rst b/docs/source/serving/deploying_with_kserve.rst
new file mode 100644
index 0000000000000..7f22766e09aef
--- /dev/null
+++ b/docs/source/serving/deploying_with_kserve.rst
@@ -0,0 +1,8 @@
+.. _deploying_with_kserve:
+
+Deploying with KServe
+============================
+
+vLLM can be deployed with `KServe <https://github.com/kserve/kserve>`_ on Kubernetes for highly scalable distributed model serving.
+
+Please see `this guide <https://kserve.github.io/website/latest/modelserving/v1beta1/llm/vllm/>`_ for more details on using vLLM with KServe.

From 90fbf12540da089fcc7dc825ce2ceb7ea3a3df33 Mon Sep 17 00:00:00 2001
From: Huarong <huohuarong@gmail.com>
Date: Sat, 2 Mar 2024 03:42:06 +0800
Subject: [PATCH 039/196] fix relative import path of protocol.py (#3134)

Co-authored-by: huohuarong <huohuarong@zuoshouyisheng.com>
---
 vllm/entrypoints/openai/serving_completion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 86b753fa06ab5..99a10196b5f73 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -5,7 +5,7 @@
 from vllm.logger import init_logger
 from vllm.utils import random_uuid
 from vllm.engine.async_llm_engine import AsyncLLMEngine
-from .protocol import (
+from vllm.entrypoints.openai.protocol import (
     CompletionRequest,
     CompletionResponse,
     CompletionResponseChoice,

From c0c2335ce027486d254c31f665ce00d7db427d22 Mon Sep 17 00:00:00 2001
From: Robert Shaw
 <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
Date: Fri, 1 Mar 2024 14:47:51 -0600
Subject: [PATCH 040/196] Integrate Marlin Kernels for Int4 GPTQ inference
 (#2497)

Co-authored-by: Robert Shaw <114415538+rib-2@users.noreply.github.com>
Co-authored-by: alexm <alexm@neuralmagic.com>
---
 csrc/ops.h                                    |    9 +
 csrc/pybind.cpp                               |    4 +-
 csrc/quantization/marlin/LICENSE              |  209 +++
 .../quantization/marlin/marlin_cuda_kernel.cu | 1145 +++++++++++++++++
 requirements-dev.txt                          |    1 +
 setup.py                                      |    2 +
 tests/conftest.py                             |   32 +
 tests/models/test_marlin.py                   |   97 ++
 vllm/config.py                                |   18 +-
 vllm/model_executor/layers/linear.py          |   29 +
 .../layers/quantization/__init__.py           |    2 +
 .../layers/quantization/marlin.py             |  210 +++
 12 files changed, 1752 insertions(+), 6 deletions(-)
 create mode 100644 csrc/quantization/marlin/LICENSE
 create mode 100644 csrc/quantization/marlin/marlin_cuda_kernel.cu
 create mode 100644 tests/models/test_marlin.py
 create mode 100644 vllm/model_executor/layers/quantization/marlin.py

diff --git a/csrc/ops.h b/csrc/ops.h
index 08dfb0e8604f1..249c7451bf73c 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -84,6 +84,15 @@ torch::Tensor awq_dequantize(
     int split_k_iters,
     int thx,
     int thy);
+
+torch::Tensor marlin_gemm(
+    torch::Tensor& a, 
+    torch::Tensor& b_q_weight,
+    torch::Tensor& b_scales, 
+    torch::Tensor& workspace,
+    int64_t size_m, 
+    int64_t size_n, 
+    int64_t size_k);
 #endif
 
 void squeezellm_gemm(
diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
index 5d062bb5700bc..4b6ade7566398 100644
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -52,11 +52,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
     &rotary_embedding,
     "Apply GPT-NeoX or GPT-J style rotary embedding to query and key");
 
-  // Quantization ops
+// Quantization ops
 #ifndef USE_ROCM
   ops.def("awq_gemm", &awq_gemm, "Quantized GEMM for AWQ");
+  ops.def("marlin_gemm", &marlin_gemm, "Marlin Optimized Quantized GEMM for GPTQ");
   ops.def("awq_dequantize", &awq_dequantize, "Dequantization for AWQ");
 #endif
+ 
   ops.def("gptq_gemm", &gptq_gemm, "Quantized GEMM for GPTQ");
   ops.def("gptq_shuffle", &gptq_shuffle, "Post processing for GPTQ");
   ops.def("squeezellm_gemm", &squeezellm_gemm, "Quantized GEMM for SqueezeLLM");
diff --git a/csrc/quantization/marlin/LICENSE b/csrc/quantization/marlin/LICENSE
new file mode 100644
index 0000000000000..1d1e4cf9c8233
--- /dev/null
+++ b/csrc/quantization/marlin/LICENSE
@@ -0,0 +1,209 @@
+Contains code from https://github.com/IST-DASLab/marlin
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+------------------------------------------------------------------------------------
+
+This product bundles various third-party components under other open source licenses.
+This section summarizes those components and their licenses. See licenses/
+for text of these licenses.
diff --git a/csrc/quantization/marlin/marlin_cuda_kernel.cu b/csrc/quantization/marlin/marlin_cuda_kernel.cu
new file mode 100644
index 0000000000000..cf1b0afdec8b4
--- /dev/null
+++ b/csrc/quantization/marlin/marlin_cuda_kernel.cu
@@ -0,0 +1,1145 @@
+/*
+ * Modified by Neural Magic
+ * Copyright (C) Marlin.2024 Elias Frantar
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <torch/extension.h>
+
+#include <ATen/cuda/CUDAContext.h>
+#include <c10/cuda/CUDAGuard.h>
+#include <cuda.h>
+#include <cuda_fp16.h>
+#include <cuda_runtime.h>
+
+#include <iostream>
+
+template <typename T> inline std::string str(T x) { return std::to_string(x); }
+
+namespace marlin {
+
+constexpr int ceildiv(int a, int b) { return (a + b - 1) / b; }
+
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
+
+// Instances of `Vec` are used to organize groups of >>registers<<, as needed
+// for instance as inputs to tensor core operations. Consequently, all
+// corresponding index accesses must be compile-time constants, which is why we
+// extensively use `#pragma unroll` throughout the kernel code to guarantee
+// this.
+template <typename T, int n> struct Vec {
+  T elems[n];
+  __device__ T &operator[](int i) { return elems[i]; }
+};
+
+using I4 = Vec<int, 4>;
+
+// Matrix fragments for tensor core instructions; their precise layout is
+// documented here:
+// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#matrix-fragments-for-mma-m16n8k16-with-floating-point-type
+using FragA = Vec<half2, 4>;
+using FragB = Vec<half2, 2>;
+using FragC = Vec<float, 4>;
+using FragS = Vec<half2, 1>; // quantization scales
+
+// Predicated asynchronous global->shared copy; used for inputs A where we apply
+// predication to handle batchsizes that are not multiples of 16.
+__device__ inline void cp_async4_pred(void *smem_ptr, const void *glob_ptr,
+                                      bool pred = true) {
+  const int BYTES = 16;
+  uint32_t smem = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
+  asm volatile("{\n"
+               "   .reg .pred p;\n"
+               "   setp.ne.b32 p, %0, 0;\n"
+               "   @p cp.async.cg.shared.global [%1], [%2], %3;\n"
+               "}\n" ::"r"((int)pred),
+               "r"(smem), "l"(glob_ptr), "n"(BYTES));
+}
+
+// Asynchronous global->shared copy with a cache hint indicating that the values
+// may be evicted immediately; used for quantized weights B, which are only
+// accessed precisely once and should thus not pollute the L2 cache which we
+// need for inputs A and outputs C.
+__device__ inline void cp_async4_stream(void *smem_ptr, const void *glob_ptr) {
+  const int BYTES = 16;
+  uint32_t smem = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
+  asm volatile(
+      "{\n"
+      "   .reg .b64 p;\n"
+      "   createpolicy.fractional.L2::evict_first.b64 p, 1.0;"
+      "   cp.async.cg.shared.global.L2::cache_hint [%0], [%1], %2, p;\n"
+      "}\n" ::"r"(smem),
+      "l"(glob_ptr), "n"(BYTES));
+}
+
+// Async copy fence.
+__device__ inline void cp_async_fence() {
+  asm volatile("cp.async.commit_group;\n" ::);
+}
+
+// Wait until at most `n` async copy stages are still pending.
+template <int n> __device__ inline void cp_async_wait() {
+  asm volatile("cp.async.wait_group %0;\n" ::"n"(n));
+}
+
+// m16n8k16 tensor core mma instruction with fp16 inputs and fp32
+// output/accumulation.
+__device__ inline void mma(const FragA &a_frag, const FragB &frag_b,
+                           FragC &frag_c) {
+  const uint32_t *a = reinterpret_cast<const uint32_t *>(&a_frag);
+  const uint32_t *b = reinterpret_cast<const uint32_t *>(&frag_b);
+  float *c = reinterpret_cast<float *>(&frag_c);
+  asm volatile("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 "
+               "{%0,%1,%2,%3}, {%4,%5,%6,%7}, {%8,%9}, {%10,%11,%12,%13};\n"
+               : "=f"(c[0]), "=f"(c[1]), "=f"(c[2]), "=f"(c[3])
+               : "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(b[0]),
+                 "r"(b[1]), "f"(c[0]), "f"(c[1]), "f"(c[2]), "f"(c[3]));
+}
+
+// Instruction for loading a full 16x16 matrix fragment of operand A from shared
+// memory, directly in tensor core layout.
+__device__ inline void ldsm4(FragA &frag_a, const void *smem_ptr) {
+  uint32_t *a = reinterpret_cast<uint32_t *>(&frag_a);
+  uint32_t smem = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
+  asm volatile("ldmatrix.sync.aligned.m8n8.x4.shared.b16 {%0,%1,%2,%3}, [%4];\n"
+               : "=r"(a[0]), "=r"(a[1]), "=r"(a[2]), "=r"(a[3])
+               : "r"(smem));
+}
+
+// Lookup-table based 3-input logical operation; explicitly used for
+// dequantization as the compiler does not seem to automatically recognize it in
+// all cases.
+template <int lut> __device__ inline int lop3(int a, int b, int c) {
+  int res;
+  asm volatile("lop3.b32 %0, %1, %2, %3, %4;\n"
+               : "=r"(res)
+               : "r"(a), "r"(b), "r"(c), "n"(lut));
+  return res;
+}
+
+// Efficiently dequantize an int32 value into a full B-fragment of 4 fp16
+// values. We mostly follow the strategy in the link below, with some small
+// changes:
+// https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h
+__device__ inline FragB dequant(int q) {
+  const int LO = 0x000f000f;
+  const int HI = 0x00f000f0;
+  const int EX = 0x64006400;
+  // Guarantee that the `(a & b) | c` operations are LOP3s.
+  int lo = lop3<(0xf0 & 0xcc) | 0xaa>(q, LO, EX);
+  int hi = lop3<(0xf0 & 0xcc) | 0xaa>(q, HI, EX);
+  // We want signed int4 outputs, hence we fuse the `-8` symmetric zero point
+  // directly into `SUB` and `ADD`.
+  const int SUB = 0x64086408;
+  const int MUL = 0x2c002c00;
+  const int ADD = 0xd480d480;
+  FragB frag_b;
+  frag_b[0] = __hsub2(*reinterpret_cast<half2 *>(&lo),
+                      *reinterpret_cast<const half2 *>(&SUB));
+  frag_b[1] = __hfma2(*reinterpret_cast<half2 *>(&hi),
+                      *reinterpret_cast<const half2 *>(&MUL),
+                      *reinterpret_cast<const half2 *>(&ADD));
+  return frag_b;
+}
+
+// Multiply dequantized values by the corresponding quantization scale; used
+// only for grouped quantization.
+__device__ inline void scale(FragB &frag_b, FragS &frag_s, int i) {
+  half2 s = __half2half2(reinterpret_cast<__half *>(&frag_s)[i]);
+  frag_b[0] = __hmul2(frag_b[0], s);
+  frag_b[1] = __hmul2(frag_b[1], s);
+}
+
+// Wait until barrier reaches `count`, then lock for current threadblock.
+__device__ inline void barrier_acquire(int *lock, int count) {
+  if (threadIdx.x == 0) {
+    int state = -1;
+    do
+      // Guarantee that subsequent writes by this threadblock will be visible
+      // globally.
+      asm volatile("ld.global.acquire.gpu.b32 %0, [%1];\n"
+                   : "=r"(state)
+                   : "l"(lock));
+    while (state != count);
+  }
+  __syncthreads();
+}
+
+// Release barrier and increment visitation count.
+__device__ inline void barrier_release(int *lock, bool reset = false) {
+  __syncthreads();
+  if (threadIdx.x == 0) {
+    if (reset) {
+      lock[0] = 0;
+      return;
+    }
+    int val = 1;
+    // Make sure that all writes since acquiring this barrier are visible
+    // globally, while releasing the barrier.
+    asm volatile("fence.acq_rel.gpu;\n");
+    asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n"
+                 :
+                 : "l"(lock), "r"(val));
+  }
+}
+
+template <const int threads,         // number of threads in a threadblock
+          const int thread_m_blocks, // number of 16x16 blocks in the m
+                                     // dimension (batchsize) of the threadblock
+          const int thread_n_blocks, // same for n dimension (output)
+          const int thread_k_blocks, // same for k dimension (reduction)
+          const int stages, // number of stages for the async global->shared
+                            // fetch pipeline
+          const int group_blocks = -1 // number of consecutive 16x16 blocks with
+                                      // a separate quantization scale
+          >
+__global__ void
+Marlin(const int4 *__restrict__ A, // fp16 input matrix of shape mxk
+       const int4 *__restrict__ B, // 4bit quantized weight matrix of shape kxn
+       int4 *__restrict__ C,       // fp16 output buffer of shape mxn
+       const int4
+           *__restrict__ s, // fp16 quantization scales of shape (k/groupsize)xn
+       int prob_m,          // batch dimension m
+       int prob_n,          // output dimension n
+       int prob_k,          // reduction dimension k
+       int *locks           // extra global storage for barrier synchronization
+) {
+  // Each threadblock processes one "stripe" of the B matrix with (roughly) the
+  // same size, which might involve multiple column "slices" (of width 16 *
+  // `thread_n_blocks`). Stripes are defined as shown in the 3x3 matrix 5 SM
+  // example:
+  //   0 1 3
+  //   0 2 3
+  //   1 2 4
+  // While this kind of partitioning makes things somewhat more complicated, it
+  // ensures good utilization of all SMs for many kinds of shape and GPU
+  // configurations, while requiring as few slow global cross-threadblock
+  // reductions as possible.
+
+  // For larger GEMMs we run multiple batchsize 64 versions in parallel for a
+  // better partitioning with less reductions
+  int parallel = 1;
+  if (prob_m > 16 * thread_m_blocks) {
+    parallel = prob_m / (16 * thread_m_blocks);
+    prob_m = 16 * thread_m_blocks;
+  }
+
+  int k_tiles = prob_k / 16 / thread_k_blocks;
+  int n_tiles = prob_n / 16 / thread_n_blocks;
+  int iters = ceildiv(k_tiles * n_tiles * parallel, gridDim.x);
+  // Ensure that the number of tiles in each stripe is a multiple of the
+  // groupsize; this avoids an annoying special case where a stripe starts in
+  // the middle of group.
+  if (group_blocks != -1)
+    iters = (group_blocks / thread_k_blocks) *
+            ceildiv(iters, (group_blocks / thread_k_blocks));
+
+  int slice_row = (iters * blockIdx.x) % k_tiles;
+  int slice_col_par = (iters * blockIdx.x) / k_tiles;
+  int slice_col = slice_col_par;
+  int slice_iters; // number of threadblock tiles in the current slice
+  int slice_count =
+      0;         // total number of active threadblocks in the current slice
+  int slice_idx; // index of threadblock in current slice; numbered bottom to
+                 // top
+
+  // We can easily implement parallel problem execution by just remapping
+  // indices and advancing global pointers
+  if (slice_col_par >= n_tiles) {
+    A += (slice_col_par / n_tiles) * 16 * thread_m_blocks * prob_k / 8;
+    C += (slice_col_par / n_tiles) * 16 * thread_m_blocks * prob_n / 8;
+    locks += (slice_col_par / n_tiles) * n_tiles;
+    slice_col = slice_col_par % n_tiles;
+  }
+
+  // Compute all information about the current slice which is required for
+  // synchronization.
+  auto init_slice = [&]() {
+    slice_iters =
+        iters * (blockIdx.x + 1) - (k_tiles * slice_col_par + slice_row);
+    if (slice_iters < 0 || slice_col_par >= n_tiles * parallel)
+      slice_iters = 0;
+    if (slice_iters == 0)
+      return;
+    if (slice_row + slice_iters > k_tiles)
+      slice_iters = k_tiles - slice_row;
+    slice_count = 1;
+    slice_idx = 0;
+    int col_first = iters * ceildiv(k_tiles * slice_col_par, iters);
+    if (col_first <= k_tiles * (slice_col_par + 1)) {
+      int col_off = col_first - k_tiles * slice_col_par;
+      slice_count = ceildiv(k_tiles - col_off, iters);
+      if (col_off > 0)
+        slice_count++;
+      int delta_first = iters * blockIdx.x - col_first;
+      if (delta_first < 0 || (col_off == 0 && delta_first == 0))
+        slice_idx = slice_count - 1;
+      else {
+        slice_idx = slice_count - 1 - delta_first / iters;
+        if (col_off > 0)
+          slice_idx--;
+      }
+    }
+    if (slice_col == n_tiles) {
+      A += 16 * thread_m_blocks * prob_k / 8;
+      C += 16 * thread_m_blocks * prob_n / 8;
+      locks += n_tiles;
+      slice_col = 0;
+    }
+  };
+  init_slice();
+
+  int a_gl_stride = prob_k / 8; // stride of the A matrix in global memory
+  // We typically use `constexpr` to indicate that this value is a compile-time
+  // constant
+  constexpr int a_sh_stride =
+      16 * thread_k_blocks / 8; // stride of an A matrix tile in shared memory
+  constexpr int a_gl_rd_delta_o =
+      16 * thread_k_blocks /
+      8; // delta between subsequent A tiles in global memory
+  int a_gl_rd_delta_i =
+      a_gl_stride *
+      (threads / a_gl_rd_delta_o); // between subsequent accesses within a tile
+  constexpr int a_sh_wr_delta =
+      a_sh_stride * (threads / a_gl_rd_delta_o); // between shared memory writes
+  constexpr int a_sh_rd_delta_o =
+      2 * ((threads / 32) /
+           (thread_n_blocks / 4)); // between shared memory tile reads
+  constexpr int a_sh_rd_delta_i =
+      a_sh_stride * 16; // within a shared memory tile
+  constexpr int a_sh_stage =
+      a_sh_stride * (16 * thread_m_blocks); // overall size of a tile
+  constexpr int a_sh_wr_iters =
+      ceildiv(a_sh_stage,
+              a_sh_wr_delta); // number of shared write iterations for a tile
+
+  int b_gl_stride = 16 * prob_n / 32;
+  constexpr int b_sh_stride = 32 * thread_n_blocks / 4;
+  int b_gl_rd_delta_o = b_gl_stride * thread_k_blocks;
+  int b_gl_rd_delta_i = b_gl_stride * (threads / b_sh_stride);
+  constexpr int b_sh_wr_delta = threads;
+  constexpr int b_sh_rd_delta = threads;
+  constexpr int b_sh_stage = b_sh_stride * thread_k_blocks;
+  constexpr int b_sh_wr_iters = b_sh_stage / b_sh_wr_delta;
+
+  int s_gl_stride = prob_n / 8;
+  constexpr int s_sh_stride = 16 * thread_n_blocks / 8;
+  constexpr int s_sh_stage = s_sh_stride;
+  int s_gl_rd_delta = s_gl_stride;
+
+  // Global A read index of current thread.
+  int a_gl_rd = a_gl_stride * (threadIdx.x / a_gl_rd_delta_o) +
+                (threadIdx.x % a_gl_rd_delta_o);
+  a_gl_rd += a_gl_rd_delta_o * slice_row;
+  // Shared write index of current thread.
+  int a_sh_wr = a_sh_stride * (threadIdx.x / a_gl_rd_delta_o) +
+                (threadIdx.x % a_gl_rd_delta_o);
+  // Shared read index.
+  int a_sh_rd =
+      a_sh_stride * ((threadIdx.x % 32) % 16) + (threadIdx.x % 32) / 16;
+  a_sh_rd += 2 * ((threadIdx.x / 32) / (thread_n_blocks / 4));
+
+  int b_gl_rd =
+      b_gl_stride * (threadIdx.x / b_sh_stride) + (threadIdx.x % b_sh_stride);
+  b_gl_rd += b_sh_stride * slice_col;
+  b_gl_rd += b_gl_rd_delta_o * slice_row;
+  int b_sh_wr = threadIdx.x;
+  int b_sh_rd = threadIdx.x;
+
+  int s_gl_rd = s_gl_stride * ((thread_k_blocks * slice_row) / group_blocks) +
+                s_sh_stride * slice_col + threadIdx.x;
+  int s_sh_wr = threadIdx.x;
+  int s_sh_rd;
+  // We use a different scale layout for grouped and column-wise quantization as
+  // we scale a `half2` tile in column-major layout in the former and in
+  // row-major in the latter case.
+  if (group_blocks != -1)
+    s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
+              (threadIdx.x % 32) / 4;
+  else
+    s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
+              (threadIdx.x % 32) % 4;
+
+  // Precompute which thread should not read memory in which iterations; this is
+  // needed if there are more threads than required for a certain tilesize or
+  // when the batchsize is not a multiple of 16.
+  bool a_sh_wr_pred[a_sh_wr_iters];
+#pragma unroll
+  for (int i = 0; i < a_sh_wr_iters; i++)
+    a_sh_wr_pred[i] = a_sh_wr_delta * i + a_sh_wr < a_sh_stride * prob_m;
+  bool s_sh_wr_pred = threadIdx.x < s_sh_stride;
+
+  // To ensure that writing and reading A tiles to/from shared memory, the
+  // latter in fragment format, is fully bank conflict free, we need to use a
+  // rather fancy XOR-based layout. The key here is that neither reads nor
+  // writes of the 16-byte `int4` blocks of 8 consecutive threads involve the
+  // same shared memory banks. Further, it seems (based on NSight-Compute) that
+  // each warp must also write a consecutive memory segment?
+  auto transform_a = [&](int i) {
+    int row = i / a_gl_rd_delta_o;
+    return a_gl_rd_delta_o * row + (i % a_gl_rd_delta_o) ^ row;
+  };
+  // Since the computation of this remapping is non-trivial and, due to our main
+  // loop unrolls, all shared memory accesses are static, we simply precompute
+  // both transformed reads and writes.
+  int a_sh_wr_trans[a_sh_wr_iters];
+#pragma unroll
+  for (int i = 0; i < a_sh_wr_iters; i++)
+    a_sh_wr_trans[i] = transform_a(a_sh_wr_delta * i + a_sh_wr);
+  int a_sh_rd_trans[b_sh_wr_iters][thread_m_blocks];
+#pragma unroll
+  for (int i = 0; i < b_sh_wr_iters; i++) {
+#pragma unroll
+    for (int j = 0; j < thread_m_blocks; j++)
+      a_sh_rd_trans[i][j] =
+          transform_a(a_sh_rd_delta_o * i + a_sh_rd_delta_i * j + a_sh_rd);
+  }
+
+  // Since B-accesses have non-constant stride they have to be computed at
+  // runtime; we break dependencies between subsequent accesses with a tile by
+  // maintining multiple pointers (we have enough registers), a tiny
+  // optimization.
+  const int4 *B_ptr[b_sh_wr_iters];
+#pragma unroll
+  for (int i = 0; i < b_sh_wr_iters; i++)
+    B_ptr[i] = B + b_gl_rd_delta_i * i + b_gl_rd;
+
+  extern __shared__ int4 sh[];
+  // Shared memory storage for global fetch pipelines.
+  int4 *sh_a = sh;
+  int4 *sh_b = sh_a + (stages * a_sh_stage);
+  int4 *sh_s = sh_b + (stages * b_sh_stage);
+  // Register storage for double buffer of shared memory reads.
+  FragA frag_a[2][thread_m_blocks];
+  I4 frag_b_quant[2];
+  FragC frag_c[thread_m_blocks][4][2];
+  FragS frag_s[2][4];
+
+  // Zero accumulators.
+  auto zero_accums = [&]() {
+#pragma unroll
+    for (int i = 0; i < thread_m_blocks * 4 * 2 * 4; i++)
+      reinterpret_cast<float *>(frag_c)[i] = 0;
+  };
+
+  // Asynchronously fetch the next A, B and s tile from global to the next
+  // shared memory pipeline location.
+  auto fetch_to_shared = [&](int pipe, int a_off, bool pred = true) {
+    if (pred) {
+      int4 *sh_a_stage = sh_a + a_sh_stage * pipe;
+#pragma unroll
+      for (int i = 0; i < a_sh_wr_iters; i++) {
+        cp_async4_pred(
+            &sh_a_stage[a_sh_wr_trans[i]],
+            &A[a_gl_rd_delta_i * i + a_gl_rd + a_gl_rd_delta_o * a_off],
+            a_sh_wr_pred[i]);
+      }
+      int4 *sh_b_stage = sh_b + b_sh_stage * pipe;
+#pragma unroll
+      for (int i = 0; i < b_sh_wr_iters; i++) {
+        cp_async4_stream(&sh_b_stage[b_sh_wr_delta * i + b_sh_wr], B_ptr[i]);
+        B_ptr[i] += b_gl_rd_delta_o;
+      }
+      // Only fetch scales if this tile starts a new group
+      if (group_blocks != -1 && pipe % (group_blocks / thread_k_blocks) == 0) {
+        int4 *sh_s_stage = sh_s + s_sh_stage * pipe;
+        if (s_sh_wr_pred)
+          cp_async4_stream(&sh_s_stage[s_sh_wr], &s[s_gl_rd]);
+        s_gl_rd += s_gl_rd_delta;
+      }
+    }
+    // Insert a fence even when we are winding down the pipeline to ensure that
+    // waiting is also correct at this point.
+    cp_async_fence();
+  };
+
+  // Wait until the next thread tile has been loaded to shared memory.
+  auto wait_for_stage = [&]() {
+    // We only have `stages - 2` active fetches since we are double buffering
+    // and can only issue the next fetch when it is guaranteed that the previous
+    // shared memory load is fully complete (as it may otherwise be
+    // overwritten).
+    cp_async_wait<stages - 2>();
+    __syncthreads();
+  };
+
+  // Load the next sub-tile from the current location in the shared memory pipe
+  // into the current register buffer.
+  auto fetch_to_registers = [&](int k, int pipe) {
+    // It may seem inefficient that we reload the groups for every sub-tile;
+    // however, this does not seem to be a significant bottleneck, while some
+    // theoretically better attempts have lead to bad instruction ordering by
+    // the compiler and correspondingly a noticeable drop in performance.
+    if (group_blocks != -1) {
+      int4 *sh_s_stage =
+          sh_s + s_sh_stage * ((group_blocks / thread_k_blocks) *
+                               (pipe / (group_blocks / thread_k_blocks)));
+      reinterpret_cast<int4 *>(&frag_s[k % 2])[0] = sh_s_stage[s_sh_rd];
+    }
+    int4 *sh_a_stage = sh_a + a_sh_stage * pipe;
+#pragma unroll
+    for (int i = 0; i < thread_m_blocks; i++)
+      ldsm4(frag_a[k % 2][i], &sh_a_stage[a_sh_rd_trans[k % b_sh_wr_iters][i]]);
+    int4 *sh_b_stage = sh_b + b_sh_stage * pipe;
+    frag_b_quant[k % 2] = *reinterpret_cast<I4 *>(
+        &sh_b_stage[b_sh_rd_delta * (k % b_sh_wr_iters) + b_sh_rd]);
+  };
+
+  // Execute the actual tensor core matmul of a sub-tile.
+  auto matmul = [&](int k) {
+// We have the m dimension as the inner loop in order to encourage overlapping
+// dequantization and matmul operations.
+#pragma unroll
+    for (int j = 0; j < 4; j++) {
+      int b_quant = frag_b_quant[k % 2][j];
+      int b_quant_shift = b_quant >> 8;
+      FragB frag_b0 = dequant(b_quant);
+      // If there are no groups, we can just scale the final output once and can
+      // avoid doing so for each weight.
+      if (group_blocks != -1)
+        scale(frag_b0, frag_s[k % 2][j], 0);
+      FragB frag_b1 = dequant(b_quant_shift);
+      if (group_blocks != -1)
+        scale(frag_b1, frag_s[k % 2][j], 1);
+#pragma unroll
+      for (int i = 0; i < thread_m_blocks; i++) {
+        mma(frag_a[k % 2][i], frag_b0, frag_c[i][j][0]);
+        mma(frag_a[k % 2][i], frag_b1, frag_c[i][j][1]);
+      }
+    }
+  };
+
+  // Since we slice across the k dimension of a tile in order to increase the
+  // number of warps while keeping the n dimension of a tile reasonable, we have
+  // multiple warps that accumulate their partial sums of the same output
+  // location; which we have to reduce over in the end. We do in shared memory.
+  auto thread_block_reduce = [&]() {
+    constexpr int red_off = threads / b_sh_stride / 2;
+    if (red_off >= 1) {
+      int red_idx = threadIdx.x / b_sh_stride;
+      constexpr int red_sh_stride = b_sh_stride * 4 * 2;
+      constexpr int red_sh_delta = b_sh_stride;
+      int red_sh_rd = red_sh_stride * (threadIdx.x / b_sh_stride) +
+                      (threadIdx.x % b_sh_stride);
+
+      // Parallel logarithmic shared memory reduction. We make sure to avoid any
+      // unnecessary read or write iterations, e.g., for two warps we write only
+      // once by warp 1 and read only once by warp 0.
+
+#pragma unroll
+      for (int m_block = 0; m_block < thread_m_blocks; m_block++) {
+#pragma unroll
+        for (int i = red_off; i > 0; i /= 2) {
+          if (i <= red_idx && red_idx < 2 * i) {
+#pragma unroll
+            for (int j = 0; j < 4 * 2; j++) {
+              int red_sh_wr =
+                  red_sh_delta * j + (red_sh_rd - red_sh_stride * i);
+              if (i < red_off) {
+                float *c_rd = reinterpret_cast<float *>(
+                    &sh[red_sh_delta * j + red_sh_rd]);
+                float *c_wr = reinterpret_cast<float *>(&sh[red_sh_wr]);
+#pragma unroll
+                for (int k = 0; k < 4; k++)
+                  reinterpret_cast<FragC *>(frag_c)[4 * 2 * m_block + j][k] +=
+                      c_rd[k] + c_wr[k];
+              }
+              sh[red_sh_wr] =
+                  reinterpret_cast<int4 *>(&frag_c)[4 * 2 * m_block + j];
+            }
+          }
+          __syncthreads();
+        }
+        if (red_idx == 0) {
+#pragma unroll
+          for (int i = 0; i < 4 * 2; i++) {
+            float *c_rd =
+                reinterpret_cast<float *>(&sh[red_sh_delta * i + red_sh_rd]);
+#pragma unroll
+            for (int j = 0; j < 4; j++)
+              reinterpret_cast<FragC *>(frag_c)[4 * 2 * m_block + i][j] +=
+                  c_rd[j];
+          }
+        }
+        __syncthreads();
+      }
+    }
+  };
+
+  // Since multiple threadblocks may process parts of the same column slice, we
+  // finally have to globally reduce over the results. As the striped partitioning
+  // minimizes the number of such reductions and our outputs are usually rather
+  // small, we perform this reduction serially in L2 cache.
+  auto global_reduce = [&](bool first = false, bool last = false) {
+    // We are very careful here to reduce directly in the output buffer to
+    // maximize L2 cache utilization in this step. To do this, we write out
+    // results in FP16 (but still reduce with FP32 compute).
+    constexpr int active_threads = 32 * thread_n_blocks / 4;
+    if (threadIdx.x < active_threads) {
+      int c_gl_stride = prob_n / 8;
+      int c_gl_wr_delta_o = 8 * c_gl_stride;
+      int c_gl_wr_delta_i = 4 * (active_threads / 32);
+      int c_gl_wr = c_gl_stride * ((threadIdx.x % 32) / 4) +
+                    4 * (threadIdx.x / 32) + threadIdx.x % 4;
+      c_gl_wr += (2 * thread_n_blocks) * slice_col;
+      constexpr int c_sh_wr_delta = active_threads;
+      int c_sh_wr = threadIdx.x;
+
+      int row = (threadIdx.x % 32) / 4;
+
+      if (!first) {
+// Interestingly, doing direct global accesses here really seems to mess up the
+// compiler and lead to slowdowns, hence we also use async-copies even though
+// these fetches are not actually asynchronous.
+#pragma unroll
+        for (int i = 0; i < thread_m_blocks * 4; i++) {
+          cp_async4_pred(&sh[c_sh_wr + c_sh_wr_delta * i],
+                         &C[c_gl_wr + c_gl_wr_delta_o * (i / 2) +
+                            c_gl_wr_delta_i * (i % 2)],
+                         i < (thread_m_blocks - 1) * 4 ||
+                             8 * (i / 2) + row < prob_m);
+        }
+        cp_async_fence();
+        cp_async_wait<0>();
+      }
+
+#pragma unroll
+      for (int i = 0; i < thread_m_blocks * 4; i++) {
+        if (i < (thread_m_blocks - 1) * 4 || 8 * (i / 2) + row < prob_m) {
+          if (!first) {
+            int4 c_red = sh[c_sh_wr + i * c_sh_wr_delta];
+#pragma unroll
+            for (int j = 0; j < 2 * 4; j++) {
+              reinterpret_cast<float *>(
+                  &frag_c)[4 * 2 * 4 * (i / 4) + 4 * j + (i % 4)] +=
+                  __half2float(reinterpret_cast<__half *>(&c_red)[j]);
+            }
+          }
+          if (!last) {
+            int4 c;
+#pragma unroll
+            for (int j = 0; j < 2 * 4; j++) {
+              reinterpret_cast<__half *>(&c)[j] =
+                  __float2half(reinterpret_cast<float *>(
+                      &frag_c)[4 * 2 * 4 * (i / 4) + 4 * j + (i % 4)]);
+            }
+            C[c_gl_wr + c_gl_wr_delta_o * (i / 2) + c_gl_wr_delta_i * (i % 2)] =
+                c;
+          }
+        }
+      }
+    }
+  };
+
+  // Write out the reduce final result in the correct layout. We only actually
+  // reshuffle matrix fragments in this step, the reduction above is performed
+  // in fragment layout.
+  auto write_result = [&]() {
+    int c_gl_stride = prob_n / 8;
+    constexpr int c_sh_stride = 2 * thread_n_blocks + 1;
+    int c_gl_wr_delta = c_gl_stride * (threads / (2 * thread_n_blocks));
+    constexpr int c_sh_rd_delta =
+        c_sh_stride * (threads / (2 * thread_n_blocks));
+
+    int c_gl_wr = c_gl_stride * (threadIdx.x / (2 * thread_n_blocks)) +
+                  (threadIdx.x % (2 * thread_n_blocks));
+    c_gl_wr += (2 * thread_n_blocks) * slice_col;
+    int c_sh_wr =
+        (4 * c_sh_stride) * ((threadIdx.x % 32) / 4) + (threadIdx.x % 32) % 4;
+    c_sh_wr += 32 * (threadIdx.x / 32);
+    int c_sh_rd = c_sh_stride * (threadIdx.x / (2 * thread_n_blocks)) +
+                  (threadIdx.x % (2 * thread_n_blocks));
+
+    int c_gl_wr_end = c_gl_stride * prob_m;
+
+    // We first reorder in shared memory to guarantee the most efficient final
+    // global write patterns
+    auto write = [&](int idx, float c0, float c1, FragS &s) {
+      half2 res = __halves2half2(__float2half(c0), __float2half(c1));
+      if (group_blocks ==
+          -1) // for per-column quantization we finally apply the scale here
+        res = __hmul2(res, s[0]);
+      ((half2 *)sh)[idx] = res;
+    };
+    if (threadIdx.x / 32 < thread_n_blocks / 4) {
+#pragma unroll
+      for (int i = 0; i < thread_m_blocks; i++) {
+#pragma unroll
+        for (int j = 0; j < 4; j++) {
+          int wr = c_sh_wr + 8 * j;
+          write(wr + (4 * c_sh_stride) * 0 + 0, frag_c[i][j][0][0],
+                frag_c[i][j][0][1], frag_s[j / 2][2 * (j % 2) + 0]);
+          write(wr + (4 * c_sh_stride) * 8 + 0, frag_c[i][j][0][2],
+                frag_c[i][j][0][3], frag_s[j / 2][2 * (j % 2) + 0]);
+          write(wr + (4 * c_sh_stride) * 0 + 4, frag_c[i][j][1][0],
+                frag_c[i][j][1][1], frag_s[j / 2][2 * (j % 2) + 1]);
+          write(wr + (4 * c_sh_stride) * 8 + 4, frag_c[i][j][1][2],
+                frag_c[i][j][1][3], frag_s[j / 2][2 * (j % 2) + 1]);
+        }
+        c_sh_wr += 16 * (4 * c_sh_stride);
+      }
+    }
+    __syncthreads();
+
+#pragma unroll
+    for (int i = 0;
+         i < ceildiv(16 * thread_m_blocks, threads / (2 * thread_n_blocks));
+         i++) {
+      if (c_gl_wr < c_gl_wr_end) {
+        C[c_gl_wr] = sh[c_sh_rd];
+        c_gl_wr += c_gl_wr_delta;
+        c_sh_rd += c_sh_rd_delta;
+      }
+    }
+  };
+
+  // Start global fetch and register load pipelines.
+  auto start_pipes = [&]() {
+#pragma unroll
+    for (int i = 0; i < stages - 1; i++)
+      fetch_to_shared(i, i, i < slice_iters);
+    zero_accums();
+    wait_for_stage();
+    fetch_to_registers(0, 0);
+    a_gl_rd += a_gl_rd_delta_o * (stages - 1);
+  };
+  start_pipes();
+
+  // Main loop.
+  while (slice_iters) {
+// We unroll over both the global fetch and the register load pipeline to ensure
+// all shared memory accesses are static. Note that both pipelines have even
+// length meaning that the next iteration will always start at index 0.
+#pragma unroll
+    for (int pipe = 0; pipe < stages;) {
+#pragma unroll
+      for (int k = 0; k < b_sh_wr_iters; k++) {
+        fetch_to_registers(k + 1, pipe % stages);
+        if (k == b_sh_wr_iters - 2) {
+          fetch_to_shared((pipe + stages - 1) % stages, pipe,
+                          slice_iters >= stages);
+          pipe++;
+          wait_for_stage();
+        }
+        matmul(k);
+      }
+      slice_iters--;
+      if (slice_iters == 0)
+        break;
+    }
+    a_gl_rd += a_gl_rd_delta_o * stages;
+
+    // Process results and, if necessary, proceed to the next column slice.
+    // While this pattern may not be the most readable, other ways of writing
+    // the loop seemed to noticeably worse performance after compilation.
+    if (slice_iters == 0) {
+      cp_async_wait<0>();
+      bool last = slice_idx == slice_count - 1;
+      // For per-column scales, we only fetch them here in the final step before
+      // write-out
+      if (group_blocks == -1 && last) {
+        if (s_sh_wr_pred)
+          cp_async4_stream(&sh_s[s_sh_wr], &s[s_gl_rd]);
+        cp_async_fence();
+      }
+      thread_block_reduce();
+      if (group_blocks == -1 && last) {
+        cp_async_wait<0>();
+        __syncthreads();
+        if (threadIdx.x / 32 < thread_n_blocks / 4) {
+          reinterpret_cast<int4 *>(&frag_s)[0] = sh_s[s_sh_rd + 0];
+          reinterpret_cast<int4 *>(&frag_s)[1] = sh_s[s_sh_rd + 4];
+        }
+      }
+      if (slice_count > 1) { // only globally reduce if there is more than one
+                             // block in a slice
+        barrier_acquire(&locks[slice_col], slice_idx);
+        global_reduce(slice_idx == 0, last);
+        barrier_release(&locks[slice_col], last);
+      }
+      if (last) // only the last block in a slice actually writes the result
+        write_result();
+      slice_row = 0;
+      slice_col_par++;
+      slice_col++;
+      init_slice();
+      if (slice_iters) {
+        a_gl_rd = a_gl_stride * (threadIdx.x / a_gl_rd_delta_o) +
+                  (threadIdx.x % a_gl_rd_delta_o);
+#pragma unroll
+        for (int i = 0; i < b_sh_wr_iters; i++)
+          B_ptr[i] += b_sh_stride - b_gl_rd_delta_o * k_tiles;
+        if (slice_col == 0) {
+#pragma unroll
+          for (int i = 0; i < b_sh_wr_iters; i++)
+            B_ptr[i] -= b_gl_stride;
+        }
+        s_gl_rd = s_sh_stride * slice_col + threadIdx.x;
+        start_pipes();
+      }
+    }
+  }
+}
+
+#else
+
+template <const int threads,         // number of threads in a threadblock
+          const int thread_m_blocks, // number of 16x16 blocks in the m
+                                     // dimension (batchsize) of the threadblock
+          const int thread_n_blocks, // same for n dimension (output)
+          const int thread_k_blocks, // same for k dimension (reduction)
+          const int stages, // number of stages for the async global->shared
+                            // fetch pipeline
+          const int group_blocks = -1 // number of consecutive 16x16 blocks with
+                                      // a separate quantization scale
+          >
+__global__ void
+Marlin(const int4 *__restrict__ A, // fp16 input matrix of shape mxk
+       const int4 *__restrict__ B, // 4bit quantized weight matrix of shape kxn
+       int4 *__restrict__ C,       // fp16 output buffer of shape mxn
+       const int4
+           *__restrict__ s, // fp16 quantization scales of shape (k/groupsize)xn
+       int prob_m,          // batch dimension m
+       int prob_n,          // output dimension n
+       int prob_k,          // reduction dimension k
+       int *locks           // extra global storage for barrier synchronization
+) {
+  // Marlin is not implemented yet for SM < 8.0
+  assert(false);
+  return;
+}
+
+#endif
+
+// 8 warps are a good choice since every SM has 4 schedulers and having more
+// than 1 warp per schedule allows some more latency hiding. At the same time,
+// we want relatively few warps to have many registers per warp and small tiles.
+const int USER_THREADS =
+    256;              // Note: This is only used with user-provided thread_k/n
+const int STAGES = 4; // 4 pipeline stages fit into shared memory
+const int SHARED_MEM =
+    96 * 1024; // max shared memory on compute capability 8.6 (< 8.0)
+
+static constexpr int min_thread_n = 64;
+static constexpr int min_thread_k = 64;
+
+static constexpr int tile_size = 16;
+static constexpr int max_par = 16;
+
+static constexpr int pack_factor_4bit =
+    8; // We have 8 4-bit vals inside a 32 bit
+
+#define __CALL_IF(THREAD_M_BLOCKS, THREAD_N_BLOCKS, THREAD_K_BLOCKS,           \
+                  GROUP_BLOCKS, NUM_THREADS)                                   \
+  else if (thread_m_blocks == THREAD_M_BLOCKS &&                               \
+           thread_n_blocks == THREAD_N_BLOCKS &&                               \
+           thread_k_blocks == THREAD_K_BLOCKS &&                               \
+           group_blocks == GROUP_BLOCKS && num_threads == NUM_THREADS) {       \
+    cudaFuncSetAttribute(Marlin<NUM_THREADS, THREAD_M_BLOCKS, THREAD_N_BLOCKS, \
+                                THREAD_K_BLOCKS, STAGES, GROUP_BLOCKS>,        \
+                         cudaFuncAttributeMaxDynamicSharedMemorySize,          \
+                         SHARED_MEM);                                          \
+    Marlin<NUM_THREADS, THREAD_M_BLOCKS, THREAD_N_BLOCKS, THREAD_K_BLOCKS,     \
+           STAGES, GROUP_BLOCKS><<<blocks, NUM_THREADS, SHARED_MEM, stream>>>( \
+        A_ptr, B_ptr, C_ptr, s_ptr, prob_m, prob_n, prob_k, locks);            \
+  }
+
+typedef struct {
+  int thread_k;
+  int thread_n;
+  int num_threads;
+} thread_config_t;
+
+thread_config_t small_batch_thread_configs[] = {
+    // Ordered by priority
+
+    // thread_k, thread_n, num_threads
+    {128, 128, 256}, // Default
+    {128, 64, 128},  // Reduce N 2X, same K
+    {64, 256, 256},  // Reduce K 2X, increase N 2X
+    {64, 128, 128},  // Reduce K 2X, same N
+};
+
+thread_config_t large_batch_thread_configs[] = {
+    // Ordered by priority
+
+    // thread_k, thread_n, num_threads
+    {64, 256, 256},  // Default
+    {128, 128, 256}, // Reduce N 2X, increase K 2X
+    {64, 128, 128},  // Reduce N 2X, same K
+    {128, 64, 128},  // Reduce N 4X, increase K 2X
+};
+
+bool is_valid_config(thread_config_t const &th_config, int prob_m, int prob_n,
+                     int prob_k) {
+  // Sanity
+  if (th_config.thread_k == -1 || th_config.thread_n == -1 ||
+      th_config.num_threads == -1) {
+    return false;
+  }
+
+  // Verify K/N are divisible by thread K/N
+  if (prob_k % th_config.thread_k != 0 || prob_n % th_config.thread_n != 0) {
+    return false;
+  }
+
+  // thread_k can be only 128 or 64 (because it must be less than groupsize
+  // which is 128)
+  if (th_config.thread_k != 128 && th_config.thread_k != 64) {
+    return false;
+  }
+
+  // Verify min for thread K/N
+  if (th_config.thread_n < min_thread_n || th_config.thread_k < min_thread_k) {
+    return false;
+  }
+
+  // num_threads must be at least 128 (= 4 warps)
+  if (th_config.num_threads < 128) {
+    return false;
+  }
+
+  return true;
+}
+
+thread_config_t determine_thread_config(int prob_m, int prob_n, int prob_k) {
+
+  if (prob_m <= 16) {
+    for (auto th_config : small_batch_thread_configs) {
+      if (is_valid_config(th_config, prob_m, prob_n, prob_k)) {
+        return th_config;
+      }
+    }
+
+  } else {
+    for (auto th_config : large_batch_thread_configs) {
+      if (is_valid_config(th_config, prob_m, prob_n, prob_k)) {
+        return th_config;
+      }
+    }
+  }
+
+  return thread_config_t{-1, -1, -1};
+}
+
+#define CALL_IF(N_BLOCKS, K_BLOCKS, NUM_THREADS)                               \
+  __CALL_IF(1, N_BLOCKS, K_BLOCKS, -1, NUM_THREADS)                            \
+  __CALL_IF(1, N_BLOCKS, K_BLOCKS, 8, NUM_THREADS)                             \
+  __CALL_IF(1, N_BLOCKS, K_BLOCKS, -1, NUM_THREADS)                            \
+  __CALL_IF(1, N_BLOCKS, K_BLOCKS, 8, NUM_THREADS)                             \
+  __CALL_IF(2, N_BLOCKS, K_BLOCKS, -1, NUM_THREADS)                            \
+  __CALL_IF(2, N_BLOCKS, K_BLOCKS, 8, NUM_THREADS)                             \
+  __CALL_IF(3, N_BLOCKS, K_BLOCKS, -1, NUM_THREADS)                            \
+  __CALL_IF(3, N_BLOCKS, K_BLOCKS, 8, NUM_THREADS)                             \
+  __CALL_IF(4, N_BLOCKS, K_BLOCKS, -1, NUM_THREADS)                            \
+  __CALL_IF(4, N_BLOCKS, K_BLOCKS, 8, NUM_THREADS)
+
+void marlin_cuda(const void *A, const void *B, void *C, void *s, int prob_m,
+                 int prob_n, int prob_k, void *workspace, int groupsize = -1,
+                 int dev = 0, cudaStream_t stream = 0, int thread_k = -1,
+                 int thread_n = -1, int sms = -1, int max_par = 16) {
+  int tot_m = prob_m;
+  int tot_m_blocks = ceildiv(tot_m, 16);
+  int pad = 16 * tot_m_blocks - tot_m;
+
+  if (sms == -1)
+    cudaDeviceGetAttribute(&sms, cudaDevAttrMultiProcessorCount, dev);
+
+  // Set thread config
+  thread_config_t th_config;
+  if (thread_k != -1 && thread_n != -1) {
+    // User-defined config
+    th_config = thread_config_t{thread_k, thread_n, USER_THREADS};
+  } else {
+    // Auto config
+    th_config = determine_thread_config(prob_m, prob_n, prob_k);
+  }
+
+  if (!is_valid_config(th_config, prob_m, prob_n, prob_k)) {
+    throw std::runtime_error(
+        "Invalid thread config: thread_k = " + str(th_config.thread_k) +
+        ", thread_n = " + str(th_config.thread_n) +
+        ", num_threads = " + str(th_config.num_threads) + " for MKN = [" +
+        str(prob_m) + ", " + str(prob_k) + ", " + str(prob_n) + "]");
+  }
+
+  // Uncomment for debug
+  // std::cout << "Using thread_config: thread_k = " + str(th_config.thread_k) +
+  //                  ", thread_n = " + str(th_config.thread_n) +
+  //                  ", num_threads = " + str(th_config.num_threads) + " for
+  //                  MKN = [" + str(prob_m) +
+  //                  ", " + str(prob_k) + ", " + str(prob_n) + "]\n";
+
+  int num_threads = th_config.num_threads;
+  thread_k = th_config.thread_k;
+  thread_n = th_config.thread_n;
+
+  int thread_k_blocks = thread_k / 16;
+  int thread_n_blocks = thread_n / 16;
+  int group_blocks = (groupsize == -1) ? -1 : groupsize / 16;
+  int blocks = sms;
+
+  if (prob_m == 0 || prob_n == 0 || prob_k == 0) {
+    return;
+  }
+
+  TORCH_CHECK(prob_n % thread_n == 0, "prob_n = ", prob_n,
+              " is not divisible by thread_n = ", thread_n);
+  TORCH_CHECK(prob_k % thread_k == 0, "prob_k = ", prob_k,
+              " is not divisible by thread_k = ", thread_k);
+  if (group_blocks != -1) {
+    TORCH_CHECK(prob_k % group_blocks == 0, "prob_k = ", prob_k,
+                " is not divisible by group_blocks = ", group_blocks);
+  }
+
+  const int4 *A_ptr = (const int4 *)A;
+  const int4 *B_ptr = (const int4 *)B;
+  int4 *C_ptr = (int4 *)C;
+  const int4 *s_ptr = (const int4 *)s;
+
+  int *locks = (int *)workspace;
+
+  for (int i = 0; i < tot_m_blocks; i += 4) {
+    int thread_m_blocks = tot_m_blocks - i;
+    prob_m = tot_m - 16 * i;
+    int par = 1;
+    if (thread_m_blocks > 4) {
+      // Note that parallel > 1 currently only works for inputs without any
+      // padding
+      par = (16 * thread_m_blocks - pad) / 64;
+      if (par > max_par)
+        par = max_par;
+      prob_m = 64 * par;
+      i += 4 * (par - 1);
+      thread_m_blocks = 4;
+    }
+
+    // For compilation speed, we only define the kernel configurations that have
+    // seemed useful (in terms of performance) in our testing, however many more
+    // are, in principle, possible.
+    if (false) {
+    }
+    CALL_IF(8, 8, 256)
+    CALL_IF(16, 4, 256)
+    CALL_IF(8, 4, 128)
+    CALL_IF(4, 8, 128)
+    else {
+      throw std::runtime_error("Unsupported shapes: MKN = [" + str(prob_m) +
+                               ", " + str(prob_k) + ", " + str(prob_n) + "]" +
+                               ", groupsize = " + str(groupsize) +
+                               ", thread_m_blocks = " + str(thread_m_blocks) +
+                               ", thread_n_blocks = " + str(thread_n_blocks) +
+                               ", thread_k_blocks = " + str(thread_k_blocks));
+    }
+
+    A_ptr += 16 * thread_m_blocks * (prob_k / 8) * par;
+    C_ptr += 16 * thread_m_blocks * (prob_n / 8) * par;
+  }
+}
+
+} // namespace marlin
+
+torch::Tensor marlin_gemm(torch::Tensor &a, torch::Tensor &b_q_weight,
+                          torch::Tensor &b_scales, torch::Tensor &workspace,
+                          int64_t size_m, int64_t size_n, int64_t size_k) {
+
+  // Verify M
+  TORCH_CHECK(size_m == a.size(0),
+              "Shape mismatch: a.size(0) = " + str(a.size(0)) +
+                  ", size_m = " + str(size_m));
+
+  // Verify K
+  TORCH_CHECK(size_k == a.size(1),
+              "Shape mismatch: a.size(1) = " + str(a.size(1)) +
+                  ", size_k = " + str(size_k));
+  TORCH_CHECK(size_k % marlin::tile_size == 0,
+              "size_k = " + str(size_k) +
+                  " is not divisible by tile_size = " + str(marlin::tile_size));
+  TORCH_CHECK((size_k / marlin::tile_size) == b_q_weight.size(0),
+              "Shape mismatch: b_q_weight.size(0) = " +
+                  str(b_q_weight.size(0)) + ", size_k = " + str(size_k) +
+                  ", tile_size = " + str(marlin::tile_size));
+
+  // Verify N
+  TORCH_CHECK(b_scales.size(1) == size_n,
+              "b_scales.size(1) = " + str(b_scales.size(1)) +
+                  ", size_n = " + str(size_n));
+  TORCH_CHECK(b_q_weight.size(1) % marlin::tile_size == 0,
+              "b_q_weight.size(1) = " + str(b_q_weight.size(1)) +
+                  " is not divisible by tile_size = " + str(marlin::tile_size));
+
+  int actual_size_n =
+      (b_q_weight.size(1) / marlin::tile_size) * marlin::pack_factor_4bit;
+  TORCH_CHECK(size_n == actual_size_n,
+              "size_n = " + str(size_n) +
+                  ", actual_size_n = " + str(actual_size_n));
+
+  // Verify A device and strides
+  TORCH_CHECK(a.device().is_cuda(), "A is not on GPU");
+  TORCH_CHECK(a.is_contiguous(), "A is not contiguous");
+
+  // Verify B device and strides
+  TORCH_CHECK(b_q_weight.device().is_cuda(), "b_q_weight is not on GPU");
+  TORCH_CHECK(b_q_weight.is_contiguous(), "b_q_weight is not contiguous");
+
+  // Verify scales device and strides
+  TORCH_CHECK(b_scales.device().is_cuda(), "b_scales is not on GPU");
+  TORCH_CHECK(b_scales.is_contiguous(), "b_scales is not contiguous");
+
+  // Alloc C matrix
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
+  auto options = torch::TensorOptions().dtype(a.dtype()).device(a.device());
+  torch::Tensor c = torch::empty({size_m, size_n}, options);
+
+  // thread_k: `k` size of a thread_tile in `weights` (can usually be left as
+  // auto -1)
+  int thread_k = -1;
+  // thread_n: `n` size of a thread_tile in `weights` (can usually be left as
+  // auto -1)
+  int thread_n = -1;
+  // sms: number of SMs to use for the kernel (can usually be left as auto -1)
+  int sms = -1;
+
+  // Detect groupsize
+  if (b_scales.size(0) != 1) {
+    TORCH_CHECK(size_k % b_scales.size(0) == 0,
+                "size_k = " + str(size_k) +
+                    ", is not divisible by b_scales.size(0) = " +
+                    str(b_scales.size(0)));
+  }
+  int groupsize = b_scales.size(0) == 1 ? -1 : size_k / b_scales.size(0);
+
+  // Verify groupsize
+  TORCH_CHECK(groupsize == -1 || groupsize == 128,
+              "Unexpected groupsize = " + str(groupsize));
+
+  // Verify workspace size
+  TORCH_CHECK(
+      size_n % marlin::min_thread_n == 0,
+      "size_n = " + str(size_n) +
+          ", is not divisible by min_thread_n = " + str(marlin::min_thread_n));
+  int min_workspace_size = (size_n / marlin::min_thread_n) * marlin::max_par;
+  TORCH_CHECK(workspace.numel() >= min_workspace_size,
+              "workspace.numel = " + str(workspace.numel()) +
+                  " is below min_workspace_size = " + str(min_workspace_size));
+
+  int dev = a.get_device();
+  marlin::marlin_cuda(a.data_ptr(), b_q_weight.data_ptr(), c.data_ptr(),
+                      b_scales.data_ptr(), size_m, size_n, size_k,
+                      workspace.data_ptr(), groupsize, dev,
+                      at::cuda::getCurrentCUDAStream(dev), thread_k, thread_n,
+                      sms, marlin::max_par);
+
+  return c;
+}
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 80d66530f47f0..55e102374fd73 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -15,6 +15,7 @@ types-setuptools
 pytest
 pytest-forked
 pytest-asyncio
+pytest-rerunfailures
 httpx
 einops # required for MPT
 openai
diff --git a/setup.py b/setup.py
index 1f48be948aa84..745b5a9b2d02a 100644
--- a/setup.py
+++ b/setup.py
@@ -342,6 +342,8 @@ def get_torch_arch_list() -> Set[str]:
 
 if _is_cuda():
     vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
+    vllm_extension_sources.append(
+        "csrc/quantization/marlin/marlin_cuda_kernel.cu")
     vllm_extension_sources.append("csrc/custom_all_reduce.cu")
 
     # Add MoE kernels.
diff --git a/tests/conftest.py b/tests/conftest.py
index 30a3df89d9f12..6eb8159837d51 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -199,6 +199,24 @@ def generate(
             outputs.append((req_sample_output_ids, req_sample_output_strs))
         return outputs
 
+    def generate_w_logprobs(
+        self,
+        prompts: List[str],
+        sampling_params: SamplingParams,
+    ) -> List[Tuple[List[int], str]]:
+        assert sampling_params.logprobs is not None
+
+        req_outputs = self.model.generate(prompts,
+                                          sampling_params=sampling_params)
+        outputs = []
+        for req_output in req_outputs:
+            for sample in req_output.outputs:
+                output_str = sample.text
+                output_ids = sample.token_ids
+                output_logprobs = sample.logprobs
+            outputs.append((output_ids, output_str, output_logprobs))
+        return outputs
+
     def generate_greedy(
         self,
         prompts: List[str],
@@ -209,6 +227,20 @@ def generate_greedy(
         return [(output_ids[0], output_str[0])
                 for output_ids, output_str in outputs]
 
+    def generate_greedy_logprobs(
+        self,
+        prompts: List[str],
+        max_tokens: int,
+        num_logprobs: int,
+    ) -> List[Tuple[List[int], str]]:
+        greedy_logprobs_params = SamplingParams(temperature=0.0,
+                                                max_tokens=max_tokens,
+                                                logprobs=num_logprobs)
+        outputs = self.generate_w_logprobs(prompts, greedy_logprobs_params)
+
+        return [(output_ids, output_str, output_logprobs)
+                for output_ids, output_str, output_logprobs in outputs]
+
     def generate_beam_search(
         self,
         prompts: List[str],
diff --git a/tests/models/test_marlin.py b/tests/models/test_marlin.py
new file mode 100644
index 0000000000000..f3cc517364f06
--- /dev/null
+++ b/tests/models/test_marlin.py
@@ -0,0 +1,97 @@
+"""Compare the outputs of a GPTQ model to a Marlin model.
+
+Note: GPTQ and Marlin do not have bitwise correctness. 
+As a result, in this test, we just confirm that the top selected tokens of the 
+Marlin/GPTQ models are in the top 3 selections of each other.
+
+Note: Marlin internally uses locks to synchronize the threads. This can
+result in very slight nondeterminism for Marlin. As a result, we re-run the test
+up to 3 times to see if we pass.
+
+Run `pytest tests/models/test_marlin.py --forked`.
+"""
+
+import pytest
+import torch
+from dataclasses import dataclass
+from vllm.model_executor.layers.quantization import _QUANTIZATION_CONFIG_REGISTRY
+
+capability = torch.cuda.get_device_capability()
+capability = capability[0] * 10 + capability[1]
+marlin_not_supported = (
+    capability < _QUANTIZATION_CONFIG_REGISTRY["marlin"].get_min_capability())
+
+
+@dataclass
+class ModelPair:
+    model_marlin: str
+    model_gptq: str
+
+
+model_pairs = [
+    ModelPair(model_marlin="nm-testing/zephyr-beta-7b-marlin-g128",
+              model_gptq="nm-testing/zephyr-beta-7b-gptq-g128"),
+    ModelPair(model_marlin="robertgshaw2/zephyr-7b-beta-channelwise-marlin",
+              model_gptq="robertgshaw2/zephyr-7b-beta-channelwise-gptq"),
+    ModelPair(model_marlin="robertgshaw2/TinyLlama-1.1B-Chat-v1.0-g128-marlin",
+              model_gptq="robertgshaw2/TinyLlama-1.1B-Chat-v1.0-g128-gptq")
+]
+
+
+@pytest.mark.flaky(reruns=2)
+@pytest.mark.skipif(marlin_not_supported,
+                    reason="Marlin is not supported on this GPU type.")
+@pytest.mark.parametrize("model_pair", model_pairs)
+@pytest.mark.parametrize("dtype", ["half"])
+@pytest.mark.parametrize("max_tokens", [32])
+@pytest.mark.parametrize("num_logprobs", [3])
+def test_models(
+    vllm_runner,
+    example_prompts,
+    model_pair: ModelPair,
+    dtype: str,
+    max_tokens: int,
+    num_logprobs: int,
+) -> None:
+    marlin_model = vllm_runner(model_pair.model_marlin, dtype=dtype)
+    marlin_outputs = marlin_model.generate_greedy_logprobs(
+        example_prompts, max_tokens, num_logprobs)
+
+    # Note: not sure why, but deleting just the model on Ada Lovelace
+    #   does not free the GPU memory. On Ampere, deleting the just model
+    #   frees the memory.
+    del marlin_model.model.llm_engine.driver_worker
+    del marlin_model
+
+    gptq_model = vllm_runner(model_pair.model_gptq, dtype=dtype)
+    gptq_outputs = gptq_model.generate_greedy_logprobs(example_prompts,
+                                                       max_tokens,
+                                                       num_logprobs)
+
+    # Note: not sure why, but deleting just the model on Ada Lovelace
+    #   does not free the GPU memory. On Ampere, deleting the just model
+    #   frees the memory.
+    del gptq_model.model.llm_engine.driver_worker
+    del gptq_model
+
+    # loop through the prompts
+    for prompt_idx in range(len(example_prompts)):
+        gptq_output_ids, gptq_output_str, gptq_logprobs = gptq_outputs[
+            prompt_idx]
+        marlin_output_ids, marlin_output_str, marlin_logprobs = marlin_outputs[
+            prompt_idx]
+
+        for idx, (gptq_output_id, marlin_output_id) in enumerate(
+                zip(gptq_output_ids, marlin_output_ids)):
+            # If sequence is not an exact match,
+            if marlin_output_id != gptq_output_id:
+                # Each predicted token must be in top 5 of the other's
+                assert gptq_output_id in marlin_logprobs[idx], (
+                    f"Test{prompt_idx}:\nGPTQ:\t{gptq_output_str!r}\nMarlin:\t{marlin_output_str!r}"
+                )
+                assert marlin_output_id in gptq_logprobs[idx], (
+                    f"Test{prompt_idx}:\nGPTQ:\t{gptq_output_str!r}\nMarlin:\t{marlin_output_str!r}"
+                )
+
+                # Break out since sequences will now diverge.
+                break
diff --git a/vllm/config.py b/vllm/config.py
index 2f8883fe0733e..b4d48d34a8a72 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -155,15 +155,21 @@ def _verify_tokenizer_mode(self) -> None:
         self.tokenizer_mode = tokenizer_mode
 
     def _verify_quantization(self) -> None:
-        supported_quantization = ["awq", "gptq", "squeezellm"]
-        rocm_not_supported_quantization = ["awq"]
+        supported_quantization = ["awq", "gptq", "squeezellm", "marlin"]
+        rocm_not_supported_quantization = ["awq", "marlin"]
         if self.quantization is not None:
             self.quantization = self.quantization.lower()
 
         # Parse quantization method from the HF model config, if available.
         hf_quant_config = getattr(self.hf_config, "quantization_config", None)
         if hf_quant_config is not None:
+
             hf_quant_method = str(hf_quant_config["quant_method"]).lower()
+            # If the GPTQ model is serialized in marlin format, use marlin.
+            if (hf_quant_method == "gptq"
+                    and "is_marlin_format" in hf_quant_config
+                    and hf_quant_config["is_marlin_format"]):
+                hf_quant_method = "marlin"
             if self.quantization is None:
                 self.quantization = hf_quant_method
             elif self.quantization != hf_quant_method:
@@ -183,9 +189,11 @@ def _verify_quantization(self) -> None:
                 raise ValueError(
                     f"{self.quantization} quantization is currently not supported "
                     f"in ROCm.")
-            logger.warning(f"{self.quantization} quantization is not fully "
-                           "optimized yet. The speed can be slower than "
-                           "non-quantized models.")
+            if self.quantization != "marlin":
+                logger.warning(
+                    f"{self.quantization} quantization is not fully "
+                    "optimized yet. The speed can be slower than "
+                    "non-quantized models.")
 
     def _verify_cuda_graph(self) -> None:
         if self.max_context_len_to_capture is None:
diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
index 55d38b763b2b5..b2396a1d6f141 100644
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -17,6 +17,14 @@
 logger = init_logger(__name__)
 
 
+def adjust_marlin_shard(param, shard_size, shard_offset):
+    marlin_tile_size = getattr(param, "marlin_tile_size", None)
+    if marlin_tile_size is None:
+        return shard_size, shard_offset
+
+    return shard_size * marlin_tile_size, shard_offset * marlin_tile_size
+
+
 class LinearMethodBase(ABC):
     """Base class for different (maybe quantized) linear methods."""
 
@@ -276,6 +284,11 @@ def weight_loader(self,
                 if packed_dim == output_dim:
                     shard_size = shard_size // param.pack_factor
                     shard_offset = shard_offset // param.pack_factor
+
+                    # If marlin, we need to adjust the offset and size to account for the tiling.
+                    shard_size, shard_offset = adjust_marlin_shard(
+                        param, shard_size, shard_offset)
+
                 loaded_weight_shard = loaded_weight.narrow(
                     output_dim, shard_offset, shard_size)
                 self.weight_loader(param, loaded_weight_shard, shard_id)
@@ -293,6 +306,11 @@ def weight_loader(self,
             if packed_dim == output_dim:
                 shard_size = shard_size // param.pack_factor
                 shard_offset = shard_offset // param.pack_factor
+
+                # If marlin, we need to adjust the offset and size to account for the tiling.
+                shard_size, shard_offset = adjust_marlin_shard(
+                    param, shard_size, shard_offset)
+
             param_data = param_data.narrow(output_dim, shard_offset,
                                            shard_size)
             start_idx = tp_rank * shard_size
@@ -372,6 +390,7 @@ def weight_loader(self,
                       loaded_shard_id: Optional[str] = None):
         param_data = param.data
         output_dim = getattr(param, "output_dim", None)
+
         if loaded_shard_id is None:
             # Loaded weight is already packed.
             if output_dim is None:
@@ -393,6 +412,11 @@ def weight_loader(self,
                 if packed_dim == output_dim:
                     shard_size = shard_size // param.pack_factor
                     shard_offset = shard_offset // param.pack_factor
+
+                    # If marlin, we need to adjust the offset and size to account for the tiling.
+                    shard_size, shard_offset = adjust_marlin_shard(
+                        param, shard_size, shard_offset)
+
                 loaded_weight_shard = loaded_weight.narrow(
                     output_dim, shard_offset, shard_size)
                 self.weight_loader(param, loaded_weight_shard, shard_id)
@@ -417,6 +441,11 @@ def weight_loader(self,
             if packed_dim == output_dim:
                 shard_size = shard_size // param.pack_factor
                 shard_offset = shard_offset // param.pack_factor
+
+                # If marlin, we need to adjust the offset and size to account for the tiling.
+                shard_size, shard_offset = adjust_marlin_shard(
+                    param, shard_size, shard_offset)
+
             param_data = param_data.narrow(output_dim, shard_offset,
                                            shard_size)
             if loaded_shard_id == "q":
diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py
index b3449eaff0e35..dc54641878c64 100644
--- a/vllm/model_executor/layers/quantization/__init__.py
+++ b/vllm/model_executor/layers/quantization/__init__.py
@@ -4,11 +4,13 @@
 from vllm.model_executor.layers.quantization.awq import AWQConfig
 from vllm.model_executor.layers.quantization.gptq import GPTQConfig
 from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig
+from vllm.model_executor.layers.quantization.marlin import MarlinConfig
 
 _QUANTIZATION_CONFIG_REGISTRY = {
     "awq": AWQConfig,
     "gptq": GPTQConfig,
     "squeezellm": SqueezeLLMConfig,
+    "marlin": MarlinConfig,
 }
 
 
diff --git a/vllm/model_executor/layers/quantization/marlin.py b/vllm/model_executor/layers/quantization/marlin.py
new file mode 100644
index 0000000000000..7566d78a8aba4
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/marlin.py
@@ -0,0 +1,210 @@
+from typing import Any, Dict, List, Optional
+
+import torch
+from torch.nn.parameter import Parameter
+
+from vllm._C import ops
+from vllm.model_executor.layers.linear import LinearMethodBase, set_weight_attrs
+from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+
+
+class MarlinConfig(QuantizationConfig):
+    """Config class for Marlin.
+
+    Reference: https://github.com/IST-DASLab/marlin/tree/master
+    """
+
+    def __init__(
+        self,
+        group_size: int,
+    ) -> None:
+        # Group size for the quantization.
+        self.group_size = group_size
+        if self.group_size != 128 and self.group_size != -1:
+            raise ValueError(
+                "Currently, only group size 128 and -1 (channelwise) is supported for "
+                f"Marlin, but got group_size of {self.group_size}")
+
+        # 4 Bits packed into 32 bit datatype.
+        self.pack_factor = 32 // 4
+
+        # Tile size used by marlin kernels.
+        self.tile_size = 16
+
+        # Min out_features dim
+        self.min_n_threads = 64
+
+        # Min in_features dim
+        self.min_k_threads = 128
+
+        # Max parallel problems to solve at once (improves large batch performance)
+        self.max_parallel = 16
+
+        # Permutation length used by the marlin kernels.
+        self.perm_len = 1024
+
+    def __repr__(self) -> str:
+        return f"MarlinConfig(group_size={self.group_size}"
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "marlin"
+
+    @classmethod
+    def get_supported_act_dtypes(cls) -> List[torch.dtype]:
+        return [torch.half]
+
+    @classmethod
+    # Need to figure it out
+    def get_min_capability(cls) -> int:
+        return 80
+
+    @classmethod
+    def get_config_filenames(cls) -> List[str]:
+        return ["quantize_config.json"]
+
+    @classmethod
+    def from_config(cls, config: Dict[str, Any]) -> "MarlinConfig":
+        group_size = cls.get_from_keys(config, ["group_size"])
+        return cls(group_size)
+
+    def get_linear_method(self) -> "MarlinLinearMethod":
+        return MarlinLinearMethod(self)
+
+    def get_scaled_act_names(self) -> List[str]:
+        return []
+
+
+class MarlinLinearMethod(LinearMethodBase):
+    """Linear method for Marlin.
+
+    Args:
+        quant_config: The Marlin quantization config.
+    """
+
+    def __init__(self, quant_config: MarlinConfig):
+        self.quant_config = quant_config
+
+    def create_weights(
+        self,
+        input_size_per_partition: int,
+        output_size_per_partition: int,
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+    ) -> Dict[str, Any]:
+        del output_size  # Unused.
+
+        if params_dtype != torch.float16:
+            raise ValueError(
+                f"The params dtype must be float16, but got {params_dtype}")
+
+        # Validate output_size_per_partition
+        if output_size_per_partition % self.quant_config.min_n_threads != 0:
+            raise ValueError(
+                f"Weight output_size_per_partition = {output_size_per_partition} is not divisible by min_n_threads = {self.quant_config.min_n_threads}."
+            )
+        if output_size_per_partition % self.quant_config.pack_factor != 0:
+            raise ValueError(
+                f"Weight output_size_per_partition = {output_size_per_partition} is not divisible by pack_factor = {self.quant_config.pack_factor}."
+            )
+
+        # Validate input_size_per_partition
+        if input_size_per_partition % self.quant_config.min_k_threads != 0:
+            raise ValueError(
+                f"Weight input_size_per_partition = {input_size_per_partition} is not divisible by min_k_threads = {self.quant_config.min_k_threads}."
+            )
+        if self.quant_config.group_size != -1 and input_size_per_partition % self.quant_config.group_size != 0:
+            raise ValueError(
+                f"Weight input_size_per_partition = f{input_size_per_partition} is not divisible by group_size = {self.quant_config.group_size}."
+            )
+
+        # Check that we have at least 4 tiles horizontally in the shard
+        num_tiles_per_perm = self.quant_config.perm_len // (
+            self.quant_config.tile_size**2)
+        if output_size_per_partition % num_tiles_per_perm != 0:
+            raise ValueError(
+                "Each permutation group must reside on the same gpu")
+
+        # Quantized 4Bit weights packed into Int32.
+        qweight = Parameter(
+            torch.empty(
+                input_size_per_partition // self.quant_config.tile_size,
+                output_size_per_partition * self.quant_config.tile_size //
+                self.quant_config.pack_factor,
+                device="cuda",
+                dtype=torch.int32,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(
+            qweight,
+            {
+                "input_dim": 0,
+                "output_dim": 1,
+                "packed_dim": 1,
+                "pack_factor": self.quant_config.pack_factor,
+                "marlin_tile_size": self.quant_config.tile_size,
+            },
+        )
+
+        # Determine if channelwise or not
+        input_groups = 1 if self.quant_config.group_size == -1 else input_size_per_partition // self.quant_config.group_size
+
+        scales = Parameter(
+            torch.empty(
+                input_groups,
+                output_size_per_partition,
+                device="cuda",
+                dtype=params_dtype,
+            ),
+            requires_grad=False,
+        )
+        set_weight_attrs(
+            scales,
+            {
+                "input_dim": None if input_groups == 1 else 0,
+                "output_dim": 1,
+            },
+        )
+
+        # Allocate workspace (Used for internal locking mechanism)
+        max_workspace_size = (
+            output_size_per_partition //
+            self.quant_config.min_n_threads) * self.quant_config.max_parallel
+        workspace = Parameter(torch.zeros(max_workspace_size,
+                                          device="cuda",
+                                          dtype=torch.int),
+                              requires_grad=False)
+
+        return {
+            "B": qweight,
+            "s": scales,
+            "workspace": workspace,
+        }
+
+    def apply_weights(
+        self,
+        weights: Dict[str, Any],
+        x: torch.Tensor,
+        bias: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        qweight = weights["B"]
+        scales = weights["s"]
+        workspace = weights["workspace"]
+
+        x_2d = x.view(-1, x.shape[-1])
+
+        size_m = x_2d.shape[0]
+        size_k = x_2d.shape[1]
+        size_n = scales.shape[1]
+
+        output_2d = ops.marlin_gemm(x_2d, qweight, scales, workspace, size_m,
+                                    size_n, size_k)
+
+        output = output_2d.view(x.shape[:-1] + (output_2d.shape[1], ))
+
+        if bias is not None:
+            output.add_(bias)  # In-place add
+
+        return output

From 82091b864af105dbe373353655dc9d8c0a6ba66f Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Fri, 1 Mar 2024 12:58:06 -0800
Subject: [PATCH 041/196] Bump up to v0.3.3 (#3129)

---
 vllm/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/__init__.py b/vllm/__init__.py
index 7ff92d8cc681d..f1e30f5eb6e6e 100644
--- a/vllm/__init__.py
+++ b/vllm/__init__.py
@@ -8,7 +8,7 @@
 from vllm.outputs import CompletionOutput, RequestOutput
 from vllm.sampling_params import SamplingParams
 
-__version__ = "0.3.2"
+__version__ = "0.3.3"
 
 __all__ = [
     "LLM",

From 29e70e3e88698feca9509cf07fcf06b12163f1c3 Mon Sep 17 00:00:00 2001
From: "Allen.Dou" <allen.dou@hotmail.com>
Date: Sat, 2 Mar 2024 07:28:41 +0800
Subject: [PATCH 042/196] allow user chose log level by --log-level instead of
 fixed 'info'. (#3109)

Co-authored-by: zixiao <shunli.dsl@alibaba-inc.com>
Co-authored-by: Simon Mo <simon.mo@hey.com>
---
 vllm/config.py                        | 2 +-
 vllm/engine/metrics.py                | 1 +
 vllm/entrypoints/openai/api_server.py | 8 +++++++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index b4d48d34a8a72..e260e6a0cb1d6 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -317,7 +317,7 @@ def __init__(
         self.num_cpu_blocks = None
 
     def metrics_info(self):
-        # convert cache_config to dict(key: str, value:str) for prometheus metrics info
+        # convert cache_config to dict(key: str, value: str) for prometheus metrics info
         return {key: str(value) for key, value in self.__dict__.items()}
 
     def _verify_args(self) -> None:
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
index 54b09c38f58a5..d31542159e4a4 100644
--- a/vllm/engine/metrics.py
+++ b/vllm/engine/metrics.py
@@ -23,6 +23,7 @@ def __init__(self, labelnames: List[str]):
             if hasattr(collector, "_name") and "vllm" in collector._name:
                 REGISTRY.unregister(collector)
 
+        # Config Information
         self.info_cache_config = Info(
             name='vllm:cache_config',
             documentation='information of cache_config')
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index b2f040114a078..3777e0f3a0601 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -62,6 +62,12 @@ def parse_args():
         description="vLLM OpenAI-Compatible RESTful API server.")
     parser.add_argument("--host", type=str, default=None, help="host name")
     parser.add_argument("--port", type=int, default=8000, help="port number")
+    parser.add_argument(
+        "--uvicorn-log-level",
+        type=str,
+        default="info",
+        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
+        help="log level for uvicorn")
     parser.add_argument("--allow-credentials",
                         action="store_true",
                         help="allow credentials")
@@ -245,7 +251,7 @@ async def authentication(request: Request, call_next):
     uvicorn.run(app,
                 host=args.host,
                 port=args.port,
-                log_level="info",
+                log_level=args.uvicorn_log_level,
                 timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
                 ssl_keyfile=args.ssl_keyfile,
                 ssl_certfile=args.ssl_certfile)

From baee28c46c242b72f90d6b1211ab9d7872ab05d3 Mon Sep 17 00:00:00 2001
From: cloudhan <cloudhan@outlook.com>
Date: Sat, 2 Mar 2024 14:34:48 +0800
Subject: [PATCH 043/196] Reorder kv dtype check to avoid nvcc not found error
 on AMD platform (#3104)

---
 vllm/config.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index e260e6a0cb1d6..ff8536c1aca55 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -330,15 +330,14 @@ def _verify_cache_dtype(self) -> None:
         if self.cache_dtype == "auto":
             pass
         elif self.cache_dtype == "fp8_e5m2":
+            if is_hip():
+                raise NotImplementedError(
+                    "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
             nvcc_cuda_version = get_nvcc_cuda_version()
             if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
                 raise ValueError(
                     "FP8 is not supported when cuda version is lower than 11.8."
                 )
-            device_name = torch.cuda.get_device_name()
-            if "AMD" in device_name:
-                raise NotImplementedError(
-                    "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
             logger.info(
                 "Using fp8_e5m2 data type to store kv cache. It reduces "
                 "the GPU memory footprint and boosts the performance. "

From ce4f5a29fb3e35041842518fefe999847b8326b9 Mon Sep 17 00:00:00 2001
From: Sage Moore <sagemoore@utexas.edu>
Date: Sat, 2 Mar 2024 03:50:01 -0500
Subject: [PATCH 044/196] Add Automatic Prefix Caching (#2762)

Co-authored-by: ElizaWszola <eliza@neuralmagic.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
---
 benchmarks/benchmark_throughput.py          |  30 ++-
 docs/source/models/engine_args.rst          |   4 +
 examples/offline_inference_with_prefix.py   |  11 +-
 tests/prefix_caching/test_prefix_caching.py | 103 ++++---
 tests/test_cache_block_hashing.py           |  76 ++++++
 vllm/block.py                               |  14 +-
 vllm/config.py                              |   2 +
 vllm/core/block_manager.py                  | 285 +++++++++++++++-----
 vllm/core/evictor.py                        | 161 +++++++++++
 vllm/core/scheduler.py                      |  15 +-
 vllm/engine/arg_utils.py                    |   9 +-
 vllm/engine/async_llm_engine.py             |  14 +-
 vllm/engine/llm_engine.py                   |  26 +-
 vllm/entrypoints/api_server.py              |   6 +-
 vllm/entrypoints/llm.py                     |  14 +-
 vllm/prefix.py                              |  87 ------
 vllm/sequence.py                            |  23 +-
 vllm/worker/model_runner.py                 |  30 ++-
 18 files changed, 618 insertions(+), 292 deletions(-)
 create mode 100644 tests/test_cache_block_hashing.py
 create mode 100644 vllm/core/evictor.py
 delete mode 100644 vllm/prefix.py

diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index 1ad502526c97c..51c1a6540a451 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -73,21 +73,21 @@ def run_vllm(
     enforce_eager: bool,
     kv_cache_dtype: str,
     device: str,
+    enable_prefix_caching: bool,
 ) -> float:
     from vllm import LLM, SamplingParams
-    llm = LLM(
-        model=model,
-        tokenizer=tokenizer,
-        quantization=quantization,
-        tensor_parallel_size=tensor_parallel_size,
-        seed=seed,
-        trust_remote_code=trust_remote_code,
-        dtype=dtype,
-        max_model_len=max_model_len,
-        enforce_eager=enforce_eager,
-        kv_cache_dtype=kv_cache_dtype,
-        device=device,
-    )
+    llm = LLM(model=model,
+              tokenizer=tokenizer,
+              quantization=quantization,
+              tensor_parallel_size=tensor_parallel_size,
+              seed=seed,
+              trust_remote_code=trust_remote_code,
+              dtype=dtype,
+              max_model_len=max_model_len,
+              enforce_eager=enforce_eager,
+              kv_cache_dtype=kv_cache_dtype,
+              device=device,
+              enable_prefix_caching=enable_prefix_caching)
 
     # Add the requests to the engine.
     for prompt, _, output_len in requests:
@@ -211,7 +211,8 @@ def main(args: argparse.Namespace):
                                 args.seed, args.n, args.use_beam_search,
                                 args.trust_remote_code, args.dtype,
                                 args.max_model_len, args.enforce_eager,
-                                args.kv_cache_dtype, args.device)
+                                args.kv_cache_dtype, args.device,
+                                args.enable_prefix_caching)
     elif args.backend == "hf":
         assert args.tensor_parallel_size == 1
         elapsed_time = run_hf(requests, args.model, tokenizer, args.n,
@@ -302,6 +303,7 @@ def main(args: argparse.Namespace):
         default="cuda",
         choices=["cuda"],
         help='device type for vLLM execution, supporting CUDA only currently.')
+    parser.add_argument("--enable_prefix_caching", action='store_true')
     args = parser.parse_args()
     if args.tokenizer is None:
         args.tokenizer = args.model
diff --git a/docs/source/models/engine_args.rst b/docs/source/models/engine_args.rst
index d89b795149501..9f5f672ae4f34 100644
--- a/docs/source/models/engine_args.rst
+++ b/docs/source/models/engine_args.rst
@@ -81,6 +81,10 @@ Below, you can find an explanation of every engine argument for vLLM:
 
     Token block size for contiguous chunks of tokens.
 
+.. option:: --enable-prefix-caching
+
+    Enables automatic prefix caching
+
 .. option:: --seed <seed>
 
     Random seed for operations.
diff --git a/examples/offline_inference_with_prefix.py b/examples/offline_inference_with_prefix.py
index 8ccfb1ceea731..1aa718b88907c 100644
--- a/examples/offline_inference_with_prefix.py
+++ b/examples/offline_inference_with_prefix.py
@@ -37,20 +37,13 @@
 
 print("-" * 80)
 
-# -1 since the last token can change when concatenating prompts.
-prefix_pos = len(llm.llm_engine.tokenizer.encode(prefix)) - 1
-
 # The llm.generate call will batch all prompts and send the batch at once if resources allow.
 # The prefix will only be cached after the first batch is processed, so we need to call generate once
 # to calculate the prefix and cache it.
-outputs = llm.generate(generating_prompts[0],
-                       sampling_params,
-                       prefix_pos=[prefix_pos])
+outputs = llm.generate(generating_prompts[0], sampling_params)
 
 # Subsequent batches can leverage the cached prefix
-outputs = llm.generate(generating_prompts,
-                       sampling_params,
-                       prefix_pos=[prefix_pos] * len(generating_prompts))
+outputs = llm.generate(generating_prompts, sampling_params)
 
 # Print the outputs. You should see the same outputs as before
 for output in outputs:
diff --git a/tests/prefix_caching/test_prefix_caching.py b/tests/prefix_caching/test_prefix_caching.py
index 1e301bedfc21e..7ef8dde7bb8f6 100644
--- a/tests/prefix_caching/test_prefix_caching.py
+++ b/tests/prefix_caching/test_prefix_caching.py
@@ -4,38 +4,73 @@
 """
 import pytest
 
-from vllm import LLM, SamplingParams
-
-prefix = (
-    "You are an expert school principal, skilled in effectively managing "
-    "faculty and staff. Draft 10-15 questions for a potential first grade "
-    "Head Teacher for my K-12, all-girls', independent school that emphasizes "
-    "community, joyful discovery, and life-long learning. The candidate is "
-    "coming in for a first-round panel interview for a 8th grade Math "
-    "teaching role. They have 5 years of previous teaching experience "
-    "as an assistant teacher at a co-ed, public school with experience "
-    "in middle school math teaching. Based on these information, fulfill "
-    "the following paragraph: ")
-
-
-@pytest.mark.parametrize("model", ["facebook/opt-125m"])
-@pytest.mark.parametrize("max_tokens", [16])
-def test_prefix_caching(
-    example_prompts,
-    model: str,
-    max_tokens: int,
+from vllm.core.block_manager import BlockAllocator
+from vllm.utils import Device
+
+
+@pytest.mark.parametrize("block_size", [16])
+@pytest.mark.parametrize("num_blocks", [16])
+def test_block_allocator(
+    block_size: int,
+    num_blocks: int,
 ):
-    llm = LLM(model=model)
-    # -1 since the last token can change when concatenating prompts.
-    prefix_pos = len(llm.llm_engine.tokenizer.encode(prefix)) - 1
-    prompts = [prefix + prompt for prompt in example_prompts]
-    sampling_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
-    outputs_without_prefix = llm.generate(prompts, sampling_params)
-    outputs_with_prefix = llm.generate(prompts,
-                                       sampling_params,
-                                       prefix_pos=[prefix_pos] * len(prompts))
-    for output_without_prefix, output_with_prefix in zip(
-            outputs_without_prefix, outputs_with_prefix):
-        assert (output_without_prefix.outputs[0].token_ids ==
-                output_with_prefix.outputs[0].token_ids)
-    assert len(llm.llm_engine.scheduler.prefix_pool.prefixes) == 1
+    block_hash = 1
+    block_allocator = BlockAllocator(Device.CPU,
+                                     block_size,
+                                     num_blocks,
+                                     enable_caching=True)
+
+    # Allocate two PysicalTokenBlocks with the same hash and check that they are the same PhysicalTokenBlock
+    first_block = block_allocator.allocate(block_hash, 0)
+    second_block = block_allocator.allocate(block_hash, 0)
+    assert (first_block == second_block)
+    assert (second_block.ref_count == 2)
+
+    # Free the first_block and confirm that the ref_count is correctly decremented on the second block
+    block_allocator.free(first_block)
+    assert (second_block.ref_count == 1)
+
+    # Free the second block
+    block_allocator.free(second_block)
+
+    # Reallocate the first block and confirm that, even after the block had its ref_count go to 0, we still get the same block back
+    first_block = block_allocator.allocate(block_hash, 0)
+    assert (first_block == second_block)
+    assert (first_block.block_hash == block_hash)
+
+
+@pytest.mark.parametrize("num_blocks", [16])
+def test_eviction(num_blocks: int, ):
+    block_size = 16
+    block_allocator = BlockAllocator(Device.CPU,
+                                     block_size,
+                                     num_blocks,
+                                     enable_caching=True)
+    blocks = []
+
+    for i in range(num_blocks):
+        # use i as the block_hash
+        blocks.append(block_allocator.allocate(i, 0))
+
+    #Free all blocks
+    for block in blocks:
+        block_allocator.free(block)
+
+    # Allocate a new block and confirm that it's the first block freed. I.E The Least Recently Used block
+    new_block_hash = block_size
+    new_block = block_allocator.allocate(new_block_hash, 0)
+    assert (new_block == blocks[0])
+    assert (new_block.block_hash == new_block_hash)
+
+    # Reallocate the second in blocks to remove it from the free list
+    realloc_block_hash = 1
+    realloc_block = block_allocator.allocate(realloc_block_hash, 0)
+    assert (realloc_block == blocks[realloc_block_hash])
+    assert (realloc_block.block_hash == realloc_block_hash)
+
+    # Allocate a new block and confirm that it's not the realloc_block, since the realloc_block shouldn't be in the free list
+    new_block_hash = block_size + 1
+    new_block = block_allocator.allocate(new_block_hash, 0)
+    assert (realloc_block != new_block)
+    assert (new_block.block_hash == new_block_hash)
+    assert (new_block.block_number == 2)
diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py
new file mode 100644
index 0000000000000..7c4ade7f8c8ed
--- /dev/null
+++ b/tests/test_cache_block_hashing.py
@@ -0,0 +1,76 @@
+"""Test hashing of cache blocks.
+
+Run `pytest tests/test_cache_block_hashing.py`.
+"""
+import pytest
+
+from vllm.transformers_utils.tokenizer import TokenizerGroup
+from vllm.sequence import Sequence
+
+# Make two prefixes with different first blocks.
+prefix_start = [("You are an expert"), ("You are a")]
+prefix_common = (
+    " school principal, skilled in effectively managing "
+    "faculty and staff. Draft 10-15 questions for a potential first grade "
+    "Head Teacher for my K-12, all-girls', independent school that emphasizes "
+    "community, joyful discovery, and life-long learning. The candidate is "
+    "coming in for a first-round panel interview for a 8th grade Math "
+    "teaching role. They have 5 years of previous teaching experience "
+    "as an assistant teacher at a co-ed, public school with experience "
+    "in middle school math teaching. Based on this, fulfill "
+    "the following: ")
+prefixes = [start + prefix_common for start in prefix_start]
+
+# Sample prompts.
+sample_prompts = [
+    "Hello, my name is", "The president of the United States is",
+    "The capital of France is", "The future of AI is"
+]
+
+
+# Helper function.
+def flatten_2d(li):
+    return [lss for ls in li for lss in ls]
+
+
+@pytest.mark.parametrize("model", ["facebook/opt-125m"])
+@pytest.mark.parametrize("block_size", [16])
+@pytest.mark.parametrize("max_num_seqs", [256])
+def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int):
+
+    tokenizer = TokenizerGroup(
+        tokenizer_id="facebook/opt-125m",
+        enable_lora=False,
+        max_num_seqs=max_num_seqs,
+        max_input_length=None,
+    )
+
+    hashes = []
+
+    for prefix in prefixes:
+        hashes.append([])
+        prompts = [prefix + prompt for prompt in sample_prompts]
+        seq_id = 0
+        for prompt in prompts:
+            hashes[-1].append([])
+            prompt_token_ids = tokenizer.encode(prompt)
+            seq = Sequence(seq_id, prompt, prompt_token_ids, block_size)
+
+            num_blocks = len(prompt_token_ids) // block_size
+            for idx in range(num_blocks):
+                hashes[-1][-1].append(seq.hash_of_block(idx))
+
+            seq_id += 1
+
+    # Check that hashes made with two prefixes with different first blocks are
+    # different everywhere.
+    for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])):
+        assert (hash0 != hash1)
+
+    # Check that hashes of different prompts made with the same prefix are the
+    # same until the hashes that contain the prompt.
+    for hash_pref in hashes:
+        same_hashes = [tuple(h[:-1]) for h in hash_pref]
+        different_hashes = [h[-1] for h in hash_pref]
+        assert (len(set(same_hashes)) == 1)
+        assert (len(set(different_hashes)) == len(different_hashes))
diff --git a/vllm/block.py b/vllm/block.py
index 5fe39ed47b2ff..2cc6b947f2255 100644
--- a/vllm/block.py
+++ b/vllm/block.py
@@ -5,6 +5,8 @@
 
 _BLANK_TOKEN_ID = -1
 
+DEFAULT_LAST_ACCESSED_TIME = -1
+
 
 class LogicalTokenBlock:
     """A block that stores a contiguous chunk of tokens from left to right.
@@ -55,17 +57,27 @@ def __init__(
         device: Device,
         block_number: int,
         block_size: int,
+        block_hash: int,
+        num_hashed_tokens: int,
     ) -> None:
         self.device = device
         self.block_number = block_number
         self.block_size = block_size
+        self.block_hash = block_hash
+        self.num_hashed_tokens = num_hashed_tokens
 
         self.ref_count = 0
+        self.last_accessed = DEFAULT_LAST_ACCESSED_TIME
+
+        self.computed = False
 
     def __repr__(self) -> str:
         return (f'PhysicalTokenBlock(device={self.device}, '
                 f'block_number={self.block_number}, '
-                f'ref_count={self.ref_count})')
+                f'num_hashed_tokens={self.num_hashed_tokens}, '
+                f'ref_count={self.ref_count}, '
+                f'last_accessed={self.last_accessed}, '
+                f'computed={self.computed})')
 
 
 # Mapping: logical block number -> physical block.
diff --git a/vllm/config.py b/vllm/config.py
index ff8536c1aca55..876a439cd1280 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -303,12 +303,14 @@ def __init__(
         swap_space: int,
         cache_dtype: str,
         sliding_window: Optional[int] = None,
+        enable_prefix_caching: bool = False,
     ) -> None:
         self.block_size = block_size
         self.gpu_memory_utilization = gpu_memory_utilization
         self.swap_space_bytes = swap_space * _GB
         self.cache_dtype = cache_dtype
         self.sliding_window = sliding_window
+        self.enable_prefix_caching = enable_prefix_caching
         self._verify_args()
         self._verify_cache_dtype()
 
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 3946096d4296a..08d519ab767a9 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -1,10 +1,13 @@
 """A block manager that manages token blocks."""
 import enum
+from itertools import count
+from os.path import commonprefix
 from typing import Dict, List, Optional, Set, Tuple
 
 from vllm.block import BlockTable, PhysicalTokenBlock
 from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
 from vllm.utils import Device
+from vllm.core.evictor import Evictor, EvictionPolicy, make_evictor
 
 
 class BlockAllocator:
@@ -15,29 +18,68 @@ class BlockAllocator:
     the reference count becomes zero, the block is added back to the free list.
     """
 
-    def __init__(
-        self,
-        device: Device,
-        block_size: int,
-        num_blocks: int,
-    ) -> None:
+    def __init__(self,
+                 device: Device,
+                 block_size: int,
+                 num_blocks: int,
+                 eviction_policy: EvictionPolicy = EvictionPolicy.LRU,
+                 enable_caching: bool = False) -> None:
         self.device = device
         self.block_size = block_size
         self.num_blocks = num_blocks
+        self.enable_caching = enable_caching
+
+        self.current_num_blocks = 0
+        self.cached_blocks: Dict[int, PhysicalTokenBlock] = {}
+
+        # Switch over to FIFO eviction when caching is disabled
+        if not self.enable_caching:
+            eviction_policy = EvictionPolicy.FIFO
+        self.evictor: Evictor = make_evictor(eviction_policy)
+
+        self.default_hash_ctr = count()
+
+    def allocate_block(self, block_hash: int,
+                       num_hashed_tokens: int) -> PhysicalTokenBlock:
+        if self.current_num_blocks == self.num_blocks:
+            block = self.evictor.evict()
+            block.block_hash = block_hash
+            block.num_hashed_tokens = num_hashed_tokens
+            return block
+        block = PhysicalTokenBlock(device=self.device,
+                                   block_number=self.current_num_blocks,
+                                   block_size=self.block_size,
+                                   block_hash=block_hash,
+                                   num_hashed_tokens=num_hashed_tokens)
+        self.current_num_blocks += 1
+        return block
 
-        # Initialize the free blocks.
-        self.free_blocks: BlockTable = []
-        for i in range(num_blocks):
-            block = PhysicalTokenBlock(device=device,
-                                       block_number=i,
-                                       block_size=block_size)
-            self.free_blocks.append(block)
-
-    def allocate(self) -> PhysicalTokenBlock:
-        if not self.free_blocks:
-            raise ValueError("Out of memory! No free blocks are available.")
-        block = self.free_blocks.pop()
-        block.ref_count = 1
+    def allocate(self,
+                 block_hash: Optional[int] = None,
+                 num_hashed_tokens: int = 0) -> PhysicalTokenBlock:
+        # If caching is disabled, just allocate a new block and return it
+        if not self.enable_caching:
+            block = self.allocate_block(next(self.default_hash_ctr),
+                                        num_hashed_tokens)
+            block.ref_count += 1
+            return block
+
+        if block_hash is None:
+            block_hash = next(self.default_hash_ctr)
+        if block_hash in self.evictor:
+            assert block_hash not in self.cached_blocks
+            block = self.evictor.remove(block_hash)
+            assert block.ref_count == 0
+            self.cached_blocks[block_hash] = block
+            block.ref_count += 1
+            assert block.block_hash == block_hash
+            return block
+        if block_hash not in self.cached_blocks:
+            self.cached_blocks[block_hash] = self.allocate_block(
+                block_hash, num_hashed_tokens)
+        block = self.cached_blocks[block_hash]
+        assert block.block_hash == block_hash
+        block.ref_count += 1
         return block
 
     def free(self, block: PhysicalTokenBlock) -> None:
@@ -45,10 +87,27 @@ def free(self, block: PhysicalTokenBlock) -> None:
             raise ValueError(f"Double free! {block} is already freed.")
         block.ref_count -= 1
         if block.ref_count == 0:
-            self.free_blocks.append(block)
+            assert block.block_hash not in self.evictor
+            self.evictor.add(block)
+
+            # If caching is enabled, remove the block from the cached_blocks
+            if self.enable_caching:
+                del self.cached_blocks[block.block_hash]
 
     def get_num_free_blocks(self) -> int:
-        return len(self.free_blocks)
+        return self.num_blocks - self.current_num_blocks + self.evictor.num_blocks
+
+    def contains_block(self, block_hash: int) -> bool:
+        return block_hash in self.cached_blocks or block_hash in self.evictor
+
+    def update_hash(self, block_hash: int, block: PhysicalTokenBlock):
+        # If caching is enabled, update the hash of block and the cached_blocks dictionary.
+        if self.enable_caching:
+            assert not self.contains_block(block_hash)
+            old_hash = block.block_hash
+            block.block_hash = block_hash
+            del self.cached_blocks[old_hash]
+            self.cached_blocks[block_hash] = block
 
 
 class AllocStatus(enum.Enum):
@@ -75,6 +134,7 @@ def __init__(
         num_cpu_blocks: int,
         watermark: float = 0.01,
         sliding_window: Optional[int] = None,
+        enable_caching: bool = False,
     ) -> None:
         self.block_size = block_size
         self.num_total_gpu_blocks = num_gpu_blocks
@@ -89,11 +149,17 @@ def __init__(
         self.watermark = watermark
         assert watermark >= 0.0
 
+        self.enable_caching = enable_caching
+
         self.watermark_blocks = int(watermark * num_gpu_blocks)
-        self.gpu_allocator = BlockAllocator(Device.GPU, block_size,
-                                            num_gpu_blocks)
-        self.cpu_allocator = BlockAllocator(Device.CPU, block_size,
-                                            num_cpu_blocks)
+        self.gpu_allocator = BlockAllocator(Device.GPU,
+                                            block_size,
+                                            num_gpu_blocks,
+                                            enable_caching=enable_caching)
+        self.cpu_allocator = BlockAllocator(Device.CPU,
+                                            block_size,
+                                            num_cpu_blocks,
+                                            enable_caching=enable_caching)
         # Mapping: seq_id -> BlockTable.
         self.block_tables: Dict[int, BlockTable] = {}
 
@@ -103,9 +169,6 @@ def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
         seq = seq_group.get_seqs(status=SequenceStatus.WAITING)[0]
         num_required_blocks = len(seq.logical_token_blocks)
 
-        if seq_group.prefix is not None and seq_group.prefix.allocated:
-            num_required_blocks -= seq_group.prefix.get_num_blocks()
-
         if self.block_sliding_window is not None:
             num_required_blocks = min(num_required_blocks,
                                       self.block_sliding_window)
@@ -129,36 +192,16 @@ def allocate(self, seq_group: SequenceGroup) -> None:
         num_prompt_blocks = len(seq.logical_token_blocks)
 
         block_table: BlockTable = []
-        prefix_block_table: BlockTable = []
-        num_prefix_blocks = 0
-
-        prefix = seq_group.prefix
-        if prefix is not None and prefix.allocated:
-            # Prefix has already been allocated. Use the existing block table.
-            num_prompt_blocks -= prefix.get_num_blocks()
-            for block in prefix.block_table:
-                block.ref_count += seq_group.num_seqs()
-                block_table.append(block)
-
         for logical_idx in range(num_prompt_blocks):
             if (self.block_sliding_window is not None
                     and logical_idx >= self.block_sliding_window):
                 block = block_table[logical_idx % self.block_sliding_window]
             else:
-                block = self.gpu_allocator.allocate()
-            # Set the reference counts of the token blocks.
-            block.ref_count = seq_group.num_seqs()
+                block = self.gpu_allocator.allocate(
+                    seq.hash_of_block(logical_idx),
+                    seq.num_hashed_tokens_of_block(logical_idx))
             block_table.append(block)
 
-        if prefix is not None and not prefix.allocated:
-            # Allocate blocks for the prefix, we will compute the prefix's
-            # KV cache in this run.
-            num_prefix_blocks = prefix.get_num_blocks()
-            prefix_block_table = block_table[:num_prefix_blocks]
-            for block in prefix_block_table:
-                block.ref_count += 1
-            prefix.set_block_table(prefix_block_table)
-
         # Assign the block table for each sequence.
         for seq in seq_group.get_seqs(status=SequenceStatus.WAITING):
             self.block_tables[seq.seq_id] = block_table.copy()
@@ -170,12 +213,72 @@ def can_append_slot(self, seq_group: SequenceGroup) -> bool:
         num_seqs = seq_group.num_seqs(status=SequenceStatus.RUNNING)
         return num_seqs <= num_free_gpu_blocks
 
-    def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]:
+    def _promote_last_block(
+        self,
+        seq: Sequence,
+        last_block: PhysicalTokenBlock,
+    ) -> PhysicalTokenBlock:
+        # Compute a new hash for the block so that it can be shared by other Sequences
+        new_hash = seq.hash_of_block(len(seq.logical_token_blocks) - 1)
+
+        # if new_hash is already in the cached table, then free last_block and return the cached version
+        if self.gpu_allocator.contains_block(new_hash):
+            self.gpu_allocator.free(last_block)
+            return self.gpu_allocator.allocate(new_hash)
+        else:
+            self.gpu_allocator.update_hash(new_hash, last_block)
+            return last_block
+
+    def _is_last_block_full(
+        self,
+        seq: Sequence,
+    ) -> bool:
+        token_ids_len = len(seq.data.get_token_ids())
+        return token_ids_len > 0 and token_ids_len % seq.block_size == 0
+
+    def _is_last_block(
+        self,
+        seq: Sequence,
+        index: int,
+    ) -> bool:
+        return index == len(seq.logical_token_blocks) - 1
+
+    def _maybe_promote_last_block(
+        self,
+        seq: Sequence,
+        last_block: PhysicalTokenBlock,
+    ) -> PhysicalTokenBlock:
+        if self._is_last_block_full(seq):
+            return self._promote_last_block(seq, last_block)
+        else:
+            return last_block
+
+    def _allocate_last_physical_block(
+        self,
+        seq: Sequence,
+    ) -> PhysicalTokenBlock:
+        block_hash: Optional[int] = None
+        if (self._is_last_block_full(seq)):
+            block_hash = seq.hash_of_block(len(seq.logical_token_blocks) - 1)
+        num_hashed_tokens = seq.num_hashed_tokens_of_block(
+            len(seq.logical_token_blocks) - 1)
+        new_block = self.gpu_allocator.allocate(block_hash, num_hashed_tokens)
+        if block_hash is None:
+            assert new_block.ref_count == 1
+        return new_block
+
+    def append_slot(
+        self,
+        seq: Sequence,
+    ) -> Optional[Tuple[int, int]]:
         """Allocate a physical slot for a new token."""
         logical_blocks = seq.logical_token_blocks
         block_table = self.block_tables[seq.seq_id]
-
+        # If we need to allocate a new physical block
         if len(block_table) < len(logical_blocks):
+            # Currently this code only supports adding one physical block
+            assert len(block_table) == len(logical_blocks) - 1
+
             if (self.block_sliding_window
                     and len(block_table) >= self.block_sliding_window):
                 # reuse a block
@@ -184,8 +287,8 @@ def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]:
             else:
                 # The sequence has a new logical block.
                 # Allocate a new physical block.
-                block = self.gpu_allocator.allocate()
-                block_table.append(block)
+                new_block = self._allocate_last_physical_block(seq)
+                block_table.append(new_block)
                 return None
 
         # We want to append the token to the last physical block.
@@ -193,11 +296,15 @@ def append_slot(self, seq: Sequence) -> Optional[Tuple[int, int]]:
         assert last_block.device == Device.GPU
         if last_block.ref_count == 1:
             # Not shared with other sequences. Appendable.
+            # If the last block is now complete, promote it to a full block so that it can be shared
+            new_block = self._maybe_promote_last_block(seq, last_block)
+            block_table[-1] = new_block
             return None
         else:
             # The last block is shared with other sequences.
             # Copy on Write: Allocate a new block and copy the tokens.
-            new_block = self.gpu_allocator.allocate()
+            new_block = self._allocate_last_physical_block(seq)
+
             block_table[-1] = new_block
             self.gpu_allocator.free(last_block)
             return last_block.block_number, new_block.block_number
@@ -233,25 +340,18 @@ def can_swap_in(self, seq_group: SequenceGroup) -> bool:
 
     def swap_in(self, seq_group: SequenceGroup) -> Dict[int, int]:
         # CPU block -> GPU block.
-        if seq_group.prefix is not None:
-            # make sure to swap in the prefix first
-            assert seq_group.prefix.allocated and seq_group.prefix.computed
-
         mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
         for seq in seq_group.get_seqs(status=SequenceStatus.SWAPPED):
             new_block_table: BlockTable = []
             block_table = self.block_tables[seq.seq_id]
-            if seq_group.prefix is not None:
-                for block in seq_group.prefix.block_table:
-                    new_block_table.append(block)
-                    block.ref_count += 1
 
             for cpu_block in block_table:
                 if cpu_block in mapping:
                     gpu_block = mapping[cpu_block]
                     gpu_block.ref_count += 1
                 else:
-                    gpu_block = self.gpu_allocator.allocate()
+                    gpu_block = self.gpu_allocator.allocate(
+                        cpu_block.block_hash, cpu_block.num_hashed_tokens)
                     mapping[cpu_block] = gpu_block
                 new_block_table.append(gpu_block)
                 # Free the CPU block swapped in to GPU.
@@ -276,17 +376,12 @@ def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
             block_table = self.block_tables[seq.seq_id]
 
             for gpu_block in block_table:
-                if (seq_group.prefix is not None
-                        and gpu_block in seq_group.prefix.block_table):
-                    # NOTE: We do not swap out the prefix blocks for now.
-                    self.gpu_allocator.free(gpu_block)
-                    continue
-
                 if gpu_block in mapping:
                     cpu_block = mapping[gpu_block]
                     cpu_block.ref_count += 1
                 else:
-                    cpu_block = self.cpu_allocator.allocate()
+                    cpu_block = self.cpu_allocator.allocate(
+                        gpu_block.block_hash, gpu_block.num_hashed_tokens)
                     mapping[gpu_block] = cpu_block
                 new_block_table.append(cpu_block)
                 # Free the GPU block swapped out to CPU.
@@ -328,3 +423,49 @@ def get_num_free_gpu_blocks(self) -> int:
 
     def get_num_free_cpu_blocks(self) -> int:
         return self.cpu_allocator.get_num_free_blocks()
+
+    def access_all_blocks_in_seq(
+        self,
+        seq: Sequence,
+        access_time: float,
+    ) -> None:
+        block_table = self.block_tables[seq.seq_id]
+        for block in block_table:
+            block.last_accessed = access_time
+
+    def compute_last_full_block_in_seq(self, seq: Sequence):
+        if seq.seq_id not in self.block_tables:
+            return
+        max_full_block = seq.get_len() // seq.block_size - 1
+        block_table = self.block_tables[seq.seq_id]
+        if max_full_block == -1:
+            return
+        block_table[max_full_block].computed = True
+
+    def get_all_block_ids_till_computed(self, seq: Sequence) -> List[int]:
+        if seq.seq_id not in self.block_tables:
+            return []
+        block_table = self.block_tables[seq.seq_id]
+        for block_idx in reversed(range(len(block_table))):
+            if block_table[block_idx].computed:
+                return [b.block_number for b in block_table[:block_idx + 1]]
+        return []
+
+    # Can return non-empty result only with prefix caching enabled.
+    def get_common_computed_block_ids(self,
+                                      seq_group: SequenceGroup) -> List[int]:
+        if not self.enable_caching:
+            return []
+
+        ids_list = [
+            self.get_all_block_ids_till_computed(seq)
+            for seq in iter(seq_group.seqs_dict.values())
+        ]
+        return commonprefix([ids for ids in ids_list if ids != []])
+
+    # We only mark the last full block because with prefix caching,
+    # all blocks until the marked one are guaranteed to be computed.
+    def mark_blocks_as_computed(self, seq_group: SequenceGroup):
+        if self.enable_caching:
+            for seq in seq_group.seqs_dict.values():
+                self.compute_last_full_block_in_seq(seq)
diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py
new file mode 100644
index 0000000000000..b538ea574b604
--- /dev/null
+++ b/vllm/core/evictor.py
@@ -0,0 +1,161 @@
+import enum
+from typing import Dict, List, Optional
+from abc import ABC, abstractmethod, abstractproperty
+
+from vllm.block import PhysicalTokenBlock
+
+
+class EvictionPolicy(enum.Enum):
+    """Enum for eviction policy used by make_evictor to instantiate the correct
+       Evictor subclass.
+    """
+    LRU = enum.auto()
+    FIFO = enum.auto()
+
+
+class Evictor(ABC):
+    """The Evictor subclasses should be used by the BlockAllocator class to
+    handle eviction of freed PhysicalTokenBlocks.
+    """
+
+    @abstractmethod
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def __contains__(self, block_hash: int) -> bool:
+        pass
+
+    @abstractmethod
+    def evict(self) -> PhysicalTokenBlock:
+        """Runs the eviction algorithm and returns the evicted block"""
+        pass
+
+    @abstractmethod
+    def add(self, block: PhysicalTokenBlock):
+        """Adds block to the evictor, making it a candidate for eviction"""
+        pass
+
+    @abstractmethod
+    def remove(self, block_hash: int) -> PhysicalTokenBlock:
+        """Simply removes the block with the hash value block_hash from the
+        evictor. Caller is responsible for making sure that block_hash is contained
+        in the evictor before calling remove. Should be used to "bring back" blocks
+        that have been freed but not evicted yet.
+        """
+        pass
+
+    @abstractproperty
+    def num_blocks(self) -> int:
+        pass
+
+
+class LRUEvictor(Evictor):
+    """Evicts in a least-recently-used order using the last_accessed timestamp
+    that's recorded in the PhysicalTokenBlock. If there are multiple blocks with
+    the same last_accessed time, then the one with the largest num_hashed_tokens
+    will be evicted. If two blocks each have the lowest last_accessed time and
+    highest num_hashed_tokens value, then one will be chose arbitrarily
+    """
+
+    def __init__(self):
+        self.free_table: Dict[int, PhysicalTokenBlock] = {}
+
+    def __contains__(self, block_hash: int) -> bool:
+        return block_hash in self.free_table
+
+    # TODO: The performance of this evict function can be optimized further.
+    def evict(self) -> PhysicalTokenBlock:
+        free_blocks: List[PhysicalTokenBlock] = list(self.free_table.values())
+        if len(free_blocks) == 0:
+            raise ValueError("No usable cache memory left")
+
+        # Find lowest timestamp
+        lowest_timestamp = free_blocks[0].last_accessed
+        for block in free_blocks:
+            if block.last_accessed < lowest_timestamp:
+                lowest_timestamp = block.last_accessed
+
+        # Find all blocks with the lowest timestamp
+        least_recent: List[PhysicalTokenBlock] = []
+        for block in free_blocks:
+            if block.last_accessed == lowest_timestamp:
+                least_recent.append(block)
+
+        # Find highest prefix count per block
+        highest_num_hashed_tokens = 0
+        for block in least_recent:
+            if block.num_hashed_tokens > highest_num_hashed_tokens:
+                highest_num_hashed_tokens = block.num_hashed_tokens
+
+        evicted_block: Optional[PhysicalTokenBlock] = None
+
+        # Find the first block with the lowest timestamp
+        for block in least_recent:
+            if block.num_hashed_tokens == highest_num_hashed_tokens:
+                evicted_block = block
+                break
+
+        assert evicted_block is not None
+
+        del self.free_table[evicted_block.block_hash]
+
+        evicted_block.computed = False
+        return evicted_block
+
+    def add(self, block: PhysicalTokenBlock):
+        self.free_table[block.block_hash] = block
+
+    def remove(self, block_hash: int) -> PhysicalTokenBlock:
+        if block_hash not in self.free_table:
+            raise ValueError(
+                "Attempting to remove block that's not in the evictor")
+        block: PhysicalTokenBlock = self.free_table[block_hash]
+        del self.free_table[block_hash]
+        return block
+
+    @property
+    def num_blocks(self) -> int:
+        return len(self.free_table)
+
+
+class RandomEvictor(Evictor):
+    """Evicts in a first-in-first-out order"""
+
+    def __init__(self):
+        self.free_table: Dict[int, PhysicalTokenBlock] = {}
+
+    def __contains__(self, block_hash: int) -> bool:
+        return block_hash in self.free_table
+
+    def evict(self) -> PhysicalTokenBlock:
+        if len(self.free_table) == 0:
+            raise ValueError("No usable cache memory left")
+        evicted_block = next(iter(self.free_table.values()))
+        evicted_block.computed = False
+        del self.free_table[evicted_block.block_hash]
+        return evicted_block
+
+    def add(self, block: PhysicalTokenBlock):
+        self.free_table[block.block_hash] = block
+
+    def remove(self, block_hash: int) -> PhysicalTokenBlock:
+        if block_hash not in self.free_table:
+            raise ValueError(
+                "Attempting to remove block that's not in the evictor")
+        block: PhysicalTokenBlock = self.free_table[block_hash]
+        del self.free_table[block_hash]
+        return block
+
+    @property
+    def num_blocks(self) -> int:
+        return len(self.free_table)
+
+
+def make_evictor(eviction_policy: EvictionPolicy) -> Evictor:
+    if eviction_policy == EvictionPolicy.LRU:
+        return LRUEvictor()
+    elif eviction_policy == EvictionPolicy.FIFO:
+        return RandomEvictor()
+    else:
+        raise ValueError(f"Unknown cache eviction policy: {eviction_policy}")
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index 5e7cc3091d775..1ae58f525b0fb 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -10,7 +10,6 @@
 from vllm.logger import init_logger
 from vllm.sequence import (Sequence, SequenceData, SequenceGroup,
                            SequenceGroupMetadata, SequenceStatus)
-from vllm.prefix import PrefixPool
 
 logger = init_logger(__name__)
 
@@ -95,10 +94,8 @@ def __init__(
             block_size=self.cache_config.block_size,
             num_gpu_blocks=self.cache_config.num_gpu_blocks,
             num_cpu_blocks=self.cache_config.num_cpu_blocks,
-            sliding_window=self.cache_config.sliding_window)
-
-        # Create the prefix pool to cache the prefixes.
-        self.prefix_pool = PrefixPool(self.cache_config.block_size)
+            sliding_window=self.cache_config.sliding_window,
+            enable_caching=self.cache_config.enable_prefix_caching)
 
         # Sequence groups in the WAITING state.
         self.waiting: Deque[SequenceGroup] = deque()
@@ -374,10 +371,12 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
 
             seq_data: Dict[int, SequenceData] = {}
             block_tables: Dict[int, List[int]] = {}
+
             for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
                 seq_id = seq.seq_id
                 seq_data[seq_id] = seq.data
                 block_tables[seq_id] = self.block_manager.get_block_table(seq)
+                self.block_manager.access_all_blocks_in_seq(seq, now)
 
             seq_group_metadata = SequenceGroupMetadata(
                 request_id=seq_group.request_id,
@@ -386,7 +385,8 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
                 sampling_params=seq_group.sampling_params,
                 block_tables=block_tables,
                 lora_request=seq_group.lora_request,
-                prefix=seq_group.prefix,
+                computed_block_nums=self.block_manager.
+                get_common_computed_block_ids(seq_group),
                 state=seq_group.state,
             )
             seq_group_metadata_list.append(seq_group_metadata)
@@ -496,3 +496,6 @@ def _swap_out(
         blocks_to_swap_out.update(mapping)
         for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
             seq.status = SequenceStatus.SWAPPED
+
+    def mark_blocks_as_computed(self, seq_group: SequenceGroup):
+        self.block_manager.mark_blocks_as_computed(seq_group)
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index c01e7311fb89a..0349c3a6636c7 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -25,6 +25,7 @@ class EngineArgs:
     tensor_parallel_size: int = 1
     max_parallel_loading_workers: Optional[int] = None
     block_size: int = 16
+    enable_prefix_caching: bool = False
     swap_space: int = 4  # GiB
     gpu_memory_utilization: float = 0.90
     max_num_batched_tokens: Optional[int] = None
@@ -173,6 +174,11 @@ def add_cli_args(
                             default=EngineArgs.block_size,
                             choices=[8, 16, 32, 128],
                             help='token block size')
+
+        parser.add_argument('--enable-prefix-caching',
+                            action='store_true',
+                            help='Enables automatic prefix caching')
+
         parser.add_argument('--seed',
                             type=int,
                             default=EngineArgs.seed,
@@ -293,7 +299,8 @@ def create_engine_configs(
         cache_config = CacheConfig(self.block_size,
                                    self.gpu_memory_utilization,
                                    self.swap_space, self.kv_cache_dtype,
-                                   model_config.get_sliding_window())
+                                   model_config.get_sliding_window(),
+                                   self.enable_prefix_caching)
         parallel_config = ParallelConfig(self.pipeline_parallel_size,
                                          self.tensor_parallel_size,
                                          self.worker_use_ray,
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index daa6419cdad3b..9e52d20ca4980 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -225,7 +225,6 @@ async def add_request_async(
         prompt_token_ids: Optional[List[int]] = None,
         arrival_time: Optional[float] = None,
         lora_request: Optional[LoRARequest] = None,
-        prefix_pos: Optional[int] = None,
     ) -> None:
         if lora_request is not None and not self.lora_config:
             raise ValueError(f"Got lora_request {lora_request} but LoRA is "
@@ -245,7 +244,6 @@ async def add_request_async(
             sampling_params=sampling_params,
             arrival_time=arrival_time,
             lora_request=lora_request,
-            prefix_pos=prefix_pos,
         )
 
     async def _run_workers_async(
@@ -422,7 +420,6 @@ async def add_request(
         prompt_token_ids: Optional[List[int]] = None,
         arrival_time: Optional[float] = None,
         lora_request: Optional[LoRARequest] = None,
-        prefix_pos: Optional[int] = None,
     ) -> AsyncStream:
         if self.log_requests:
             shortened_prompt = prompt
@@ -435,7 +432,6 @@ async def add_request(
                                                               max_log_len]
             logger.info(f"Received request {request_id}: "
                         f"prompt: {shortened_prompt!r}, "
-                        f"prefix_pos: {prefix_pos},"
                         f"sampling_params: {sampling_params}, "
                         f"prompt_token_ids: {shortened_token_ids}, "
                         f"lora_request: {lora_request}.")
@@ -472,8 +468,7 @@ async def add_request(
             sampling_params=sampling_params,
             prompt_token_ids=prompt_token_ids,
             arrival_time=arrival_time,
-            lora_request=lora_request,
-            prefix_pos=prefix_pos)
+            lora_request=lora_request)
 
         return stream
 
@@ -484,7 +479,6 @@ async def generate(
         request_id: str,
         prompt_token_ids: Optional[List[int]] = None,
         lora_request: Optional[LoRARequest] = None,
-        prefix_pos: Optional[int] = None,
     ) -> AsyncIterator[RequestOutput]:
         """Generate outputs for a request.
 
@@ -500,11 +494,6 @@ async def generate(
             prompt_token_ids: The token IDs of the prompt. If None, we
                 use the tokenizer to convert the prompts to token IDs.
             lora_request: LoRA request to use for generation, if any.
-            prefix_pos: If not None, we use the given position as the prefix
-                position for each prompt. We will cache the prefix's KV
-                cache and reuse it for the next request with the same prefix.
-                This is an experimental feature, and may be replaced with
-                automatic prefix caching in the future.
 
         Yields:
             The output `RequestOutput` objects from the LLMEngine for the
@@ -565,7 +554,6 @@ async def generate(
                 prompt_token_ids=prompt_token_ids,
                 arrival_time=arrival_time,
                 lora_request=lora_request,
-                prefix_pos=prefix_pos,
             )
 
             async for request_output in stream:
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index df4858a696530..e84fda5640e4d 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -415,7 +415,6 @@ def add_request(
         prompt_token_ids: Optional[List[int]] = None,
         arrival_time: Optional[float] = None,
         lora_request: Optional[LoRARequest] = None,
-        prefix_pos: Optional[int] = None,
     ) -> None:
         """Add a request to the engine's request pool.
 
@@ -432,11 +431,6 @@ def add_request(
                 use the tokenizer to convert the prompts to token IDs.
             arrival_time: The arrival time of the request. If None, we use
                 the current monotonic time.
-            prefix_pos: If not None, we use the given position as the prefix
-                position for each prompt. We will cache the prefix's KV
-                cache and reuse it for the next request with the same prefix.
-                This is an experimental feature, and may be replaced with
-                automatic prefix caching in the future.
 
         Details:
             - Set arrival_time to the current time if it is None.
@@ -479,18 +473,13 @@ def add_request(
         seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
                        lora_request)
 
-        # Check whether the input specifies prefix
-        prefix = self.scheduler.prefix_pool.add_or_get_prefix(
-            prompt_token_ids[:prefix_pos], lora_request.lora_int_id
-            if lora_request else 0) if prefix_pos is not None else None
-
         # Defensive copy of SamplingParams, which are used by the sampler,
         # this doesn't deep-copy LogitsProcessor objects
         sampling_params = sampling_params.clone()
 
         # Create the sequence group.
         seq_group = SequenceGroup(request_id, [seq], sampling_params,
-                                  arrival_time, lora_request, prefix)
+                                  arrival_time, lora_request)
 
         # Add the sequence group to the scheduler.
         self.scheduler.add_seq_group(seq_group)
@@ -752,6 +741,13 @@ def _process_model_outputs(
         now = time.time()
         # Update the scheduled sequence groups with the model outputs.
         scheduled_seq_groups = scheduler_outputs.scheduled_seq_groups
+
+        # If prefix caching is enabled, mark all blocks in the sequence groups
+        # as completed so that future requests don't attempt to recompute them
+        if self.cache_config.enable_prefix_caching:
+            for seq_group in scheduled_seq_groups:
+                self.scheduler.mark_blocks_as_computed(seq_group)
+
         for seq_group, outputs in zip(scheduled_seq_groups, output):
             self._process_sequence_group_outputs(seq_group, outputs)
 
@@ -768,12 +764,6 @@ def _process_model_outputs(
             request_output = RequestOutput.from_seq_group(seq_group)
             request_outputs.append(request_output)
 
-        # Update prefix state, now all the uncomputed prefixes are computed.
-        for seq_group in scheduled_seq_groups:
-            if (seq_group.prefix is not None and seq_group.prefix.allocated
-                    and not seq_group.prefix.computed):
-                seq_group.prefix.computed = True
-
         # Log stats.
         if self.log_stats:
             self.stat_logger.log(self._get_stats(scheduler_outputs))
diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py
index e7af2c6db5e4c..1eb4ab8b06b64 100644
--- a/vllm/entrypoints/api_server.py
+++ b/vllm/entrypoints/api_server.py
@@ -39,15 +39,11 @@ async def generate(request: Request) -> Response:
     """
     request_dict = await request.json()
     prompt = request_dict.pop("prompt")
-    prefix_pos = request_dict.pop("prefix_pos", None)
     stream = request_dict.pop("stream", False)
     sampling_params = SamplingParams(**request_dict)
     request_id = random_uuid()
 
-    results_generator = engine.generate(prompt,
-                                        sampling_params,
-                                        request_id,
-                                        prefix_pos=prefix_pos)
+    results_generator = engine.generate(prompt, sampling_params, request_id)
 
     # Streaming case
     async def stream_results() -> AsyncGenerator[bytes, None]:
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index fc82018d18eb6..62f1d172377f6 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -124,7 +124,6 @@ def generate(
         prompts: Optional[Union[str, List[str]]] = None,
         sampling_params: Optional[SamplingParams] = None,
         prompt_token_ids: Optional[List[List[int]]] = None,
-        prefix_pos: Optional[Union[int, List[int]]] = None,
         use_tqdm: bool = True,
         lora_request: Optional[LoRARequest] = None,
     ) -> List[RequestOutput]:
@@ -140,11 +139,6 @@ def generate(
                 None, we use the default sampling parameters.
             prompt_token_ids: A list of token IDs for the prompts. If None, we
                 use the tokenizer to convert the prompts to token IDs.
-            prefix_pos: If not None, we use the given position as the prefix
-                position for each prompt. We will cache the prefix's KV
-                cache and reuse it for the next request with the same prefix.
-                This is an experimental feature, and may be replaced with
-                automatic prefix caching in the future.
             use_tqdm: Whether to use tqdm to display the progress bar.
             lora_request: LoRA request to use for generation, if any.
 
@@ -171,14 +165,12 @@ def generate(
             prompt_token_ids)
         for i in range(num_requests):
             prompt = prompts[i] if prompts is not None else None
-            prefix_pos_i = prefix_pos[i] if prefix_pos is not None else None
             token_ids = None if prompt_token_ids is None else prompt_token_ids[
                 i]
             self._add_request(prompt,
                               sampling_params,
                               token_ids,
-                              lora_request=lora_request,
-                              prefix_pos=prefix_pos_i)
+                              lora_request=lora_request)
         return self._run_engine(use_tqdm)
 
     def _add_request(
@@ -187,15 +179,13 @@ def _add_request(
         sampling_params: SamplingParams,
         prompt_token_ids: Optional[List[int]],
         lora_request: Optional[LoRARequest] = None,
-        prefix_pos: Optional[int] = None,
     ) -> None:
         request_id = str(next(self.request_counter))
         self.llm_engine.add_request(request_id,
                                     prompt,
                                     sampling_params,
                                     prompt_token_ids,
-                                    lora_request=lora_request,
-                                    prefix_pos=prefix_pos)
+                                    lora_request=lora_request)
 
     def _run_engine(self, use_tqdm: bool) -> List[RequestOutput]:
         # Initialize tqdm.
diff --git a/vllm/prefix.py b/vllm/prefix.py
deleted file mode 100644
index 5b6e8e4b92be6..0000000000000
--- a/vllm/prefix.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from typing import Dict, List, Sequence, Tuple, Optional
-
-from vllm.block import BlockTable
-
-
-class Prefix:
-    """Data and states associated with a prefix of prompt tokens for multiple
-    sequence groups.
-
-    NOTE: This feature is experimental and may be replaced with automatic
-        prefix caching in the future.
-
-    Args:
-        token_ids: The token ids of the prefix.
-        block_size: The block size of the executed model.
-    """
-
-    def __init__(
-        self,
-        token_ids: Sequence[int],
-        block_size: int,
-    ) -> None:
-        self.token_ids = tuple(token_ids)
-        self.block_size = block_size
-        self.length = len(token_ids)
-        self.hash = hash(token_ids)
-        assert self.length % block_size == 0
-        self.block_table: Optional[BlockTable] = None
-        self.computed = False
-
-    @property
-    def allocated(self) -> bool:
-        return self.block_table is not None
-
-    def get_num_blocks(self) -> int:
-        return self.length // self.block_size
-
-    def get_block_numbers(self) -> List[int]:
-        return [block.block_number for block in self.block_table]
-
-    def get_length(self) -> int:
-        return self.length
-
-    def __hash__(self) -> int:
-        return self.hash
-
-    def set_block_table(self, block_table: BlockTable) -> None:
-        self.block_table = block_table.copy()
-
-
-class PrefixPool:
-    """Manages all the prompt prefixes.
-
-    NOTE: This feature is experimental and may be replaced with automatic
-        prefix caching in the future.
-
-    Args:
-        block_size: The block size of the executed model.
-
-    Attributes:
-        prefixes: A list of all the prefixes.
-        block_size: The block size of the executed model.
-    """
-
-    def __init__(
-        self,
-        block_size: int,
-    ) -> None:
-        # TODO(zhuohan): Add a capacity limit to the prefix pool.
-        self.prefixes: Dict[int, Prefix] = {}
-        self.block_size = block_size
-
-    def _truncate_token_ids(self, token_ids: Sequence[int]) -> Tuple[int]:
-        new_length = len(token_ids) // self.block_size * self.block_size
-        return tuple(token_ids[:new_length])
-
-    def add_or_get_prefix(self, token_ids: Sequence[int],
-                          lora_int_id: int) -> Optional[Prefix]:
-        token_ids = self._truncate_token_ids(token_ids)
-        if len(token_ids) == 0:
-            # Prefix is empty.
-            return None
-        prefix = Prefix(token_ids, self.block_size)
-        prefix_hash = hash((prefix, lora_int_id))
-        if prefix_hash not in self.prefixes:
-            self.prefixes[prefix_hash] = prefix
-        return self.prefixes[prefix_hash]
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 040e9756e15c6..122960035e505 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -5,7 +5,6 @@
 from typing import Dict, List, Optional, Union
 
 from vllm.block import LogicalTokenBlock
-from vllm.prefix import Prefix
 from vllm.sampling_params import SamplingParams
 from vllm.lora.request import LoRARequest
 
@@ -161,6 +160,16 @@ def __init__(
     def lora_int_id(self) -> int:
         return self.lora_request.lora_int_id if self.lora_request else 0
 
+    # TODO The current hashing function is O(L^2). We should optimize this in
+    # the future.
+    def hash_of_block(self, logical_idx: int) -> int:
+        # Compute the number of tokens in the sequence
+        num_tokens = self.num_hashed_tokens_of_block(logical_idx)
+        return hash(tuple(self.data.get_token_ids()[0:num_tokens]))
+
+    def num_hashed_tokens_of_block(self, logical_idx: int):
+        return logical_idx * self.block_size + self.block_size
+
     def _append_logical_block(self) -> None:
         block = LogicalTokenBlock(
             block_number=len(self.logical_token_blocks),
@@ -265,7 +274,6 @@ class SequenceGroup:
         sampling_params: The sampling parameters used to generate the outputs.
         arrival_time: The arrival time of the request.
         lora_request: LoRA request.
-        prefix: The prefix of the prompt of the sequence group.
     """
 
     def __init__(
@@ -275,7 +283,6 @@ def __init__(
         sampling_params: SamplingParams,
         arrival_time: float,
         lora_request: Optional[LoRARequest] = None,
-        prefix: Optional[Prefix] = None,
     ) -> None:
         self.request_id = request_id
         self.seqs_dict = {seq.seq_id: seq for seq in seqs}
@@ -286,7 +293,6 @@ def __init__(
                                       first_token_time=None,
                                       time_in_queue=None)
         self.lora_request = lora_request
-        self.prefix: Optional[Prefix] = prefix
         self.prompt_logprobs: Optional[PromptLogprobs] = None
         self.state = SequenceGroupState()
 
@@ -302,6 +308,10 @@ def prompt_token_ids(self) -> List[int]:
         # We use the prompt of an arbitrary sequence.
         return next(iter(self.seqs_dict.values())).data.prompt_token_ids
 
+    @property
+    def block_size(self) -> int:
+        return next(iter(self.seqs_dict.values())).block_size
+
     @property
     def lora_int_id(self) -> int:
         return self.lora_request.lora_int_id if self.lora_request else 0
@@ -408,7 +418,6 @@ class SequenceGroupMetadata:
             numbers)
         state: Internal state tied to this sequence group.
         lora_request: LoRA request.
-        prefix: The prefix of the prompt of the sequence group.
     """
 
     def __init__(
@@ -419,7 +428,7 @@ def __init__(
         sampling_params: SamplingParams,
         block_tables: Dict[int, List[int]],
         lora_request: Optional[LoRARequest] = None,
-        prefix: Optional[Prefix] = None,
+        computed_block_nums: Optional[List[int]] = None,
         state: Optional[SequenceGroupState] = None,
     ) -> None:
         self.request_id = request_id
@@ -428,7 +437,7 @@ def __init__(
         self.sampling_params = sampling_params
         self.block_tables = block_tables
         self.lora_request = lora_request
-        self.prefix = prefix
+        self.computed_block_nums = computed_block_nums
         self.state = SequenceGroupState() if state is None else state
 
     @property
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index efe570778fb43..aff8ebc903623 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -145,33 +145,37 @@ def _prepare_prompt(
             prompt_tokens = seq_data.get_token_ids()
             prompt_len = len(prompt_tokens)
             prompt_lens.append(prompt_len)
-            prefix_len = 0
-            prefix = seq_group_metadata.prefix
-            if prefix is not None and prefix.computed:
-                prefix_len = prefix.get_length()
-                prompt_tokens = prompt_tokens[prefix_len:]
-                prefix_block_tables.append(prefix.get_block_numbers())
+            computed_len = 0
+
+            # NOTE: This only works for oooooooxxx style attention.
+            computed_block_nums = seq_group_metadata.computed_block_nums
+            if computed_block_nums is not None and len(
+                    computed_block_nums) > 0 and self.sliding_window is None:
+                # Prefix is not supported with sliding_window
+                computed_len = len(computed_block_nums) * self.block_size
+                prompt_tokens = prompt_tokens[computed_len:]
+                prefix_block_tables.append(computed_block_nums)
             else:
                 prefix_block_tables.append([])
             # actual prompt lens
-            context_lens.append(prefix_len)
-            subquery_lens.append(prompt_len - prefix_len)
+            context_lens.append(computed_len)
+            subquery_lens.append(prompt_len - computed_len)
 
             input_tokens.append(prompt_tokens)
             # NOTE(woosuk): Here we assume that the first token in the prompt
             # is always the first token in the sequence.
             input_positions.append(
-                list(range(prefix_len, prefix_len + len(prompt_tokens))))
+                list(range(computed_len, computed_len + len(prompt_tokens))))
 
             lora_id = seq_group_metadata.lora_int_id
 
             if lora_id > 0:
                 lora_requests.add(seq_group_metadata.lora_request)
 
-            lora_index_mapping.append([lora_id] * (prompt_len - prefix_len))
+            lora_index_mapping.append([lora_id] * (prompt_len - computed_len))
             lora_prompt_mapping.extend(
                 [lora_id] *
-                (prompt_len - prefix_len
+                (prompt_len - computed_len
                  if seq_group_metadata.sampling_params.prompt_logprobs else 1))
 
             if seq_group_metadata.block_tables is None:
@@ -190,11 +194,11 @@ def _prepare_prompt(
             # mapping will be [-1, -1, 2, 3, 4, 5, 6, 7, 0, 1].
             start_idx = 0
             if self.sliding_window is not None:
-                assert prefix_len == 0, (
+                assert computed_len == 0, (
                     "Prefix caching is currently not supported with "
                     "sliding window attention")
                 start_idx = max(0, prompt_len - self.sliding_window)
-            for i in range(prefix_len, prompt_len):
+            for i in range(computed_len, prompt_len):
                 if i < start_idx:
                     slot_mapping[-1].append(_PAD_SLOT_ID)
                     continue

From d65fac2738f0287a41955b45df76a2d5a919bff6 Mon Sep 17 00:00:00 2001
From: Jason Cox <jason@jasonacox.com>
Date: Sun, 3 Mar 2024 00:00:29 -0500
Subject: [PATCH 045/196] Add vLLM version info to logs and openai API server
 (#3161)

---
 vllm/engine/llm_engine.py             | 3 ++-
 vllm/entrypoints/openai/api_server.py | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index e84fda5640e4d..c9bd89a1b18f4 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -7,6 +7,7 @@
 from typing import (TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple,
                     Union)
 
+import vllm
 from vllm.lora.request import LoRARequest
 from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
                          ParallelConfig, SchedulerConfig, LoRAConfig)
@@ -85,7 +86,7 @@ def __init__(
         log_stats: bool,
     ) -> None:
         logger.info(
-            "Initializing an LLM engine with config: "
+            f"Initializing an LLM engine (v{vllm.__version__}) with config: "
             f"model={model_config.model!r}, "
             f"tokenizer={model_config.tokenizer!r}, "
             f"tokenizer_mode={model_config.tokenizer_mode}, "
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 3777e0f3a0601..993a834e5a720 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -15,6 +15,7 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse, StreamingResponse, Response
 
+import vllm
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.entrypoints.openai.protocol import CompletionRequest, ChatCompletionRequest, ErrorResponse
@@ -168,6 +169,12 @@ async def show_available_models():
     return JSONResponse(content=models.model_dump())
 
 
+@app.get("/version")
+async def show_version():
+    ver = {"version": vllm.__version__}
+    return JSONResponse(content=ver)
+
+
 @app.post("/v1/chat/completions")
 async def create_chat_completion(request: ChatCompletionRequest,
                                  raw_request: Request):
@@ -231,6 +238,7 @@ async def authentication(request: Request, call_next):
                 f"Invalid middleware {middleware}. Must be a function or a class."
             )
 
+    logger.info(f"vLLM API server version {vllm.__version__}")
     logger.info(f"args: {args}")
 
     if args.served_model_name is not None:

From 996d095c541e1cd67f0a7ec2579bc3bb0a435494 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Sun, 3 Mar 2024 14:37:18 -0800
Subject: [PATCH 046/196] [FIX] Fix styles in automatic prefix caching & add a
 automatic prefix caching benchmark (#3158)

---
 benchmarks/benchmark_prefix_caching.py | 59 ++++++++++++++++++++++++++
 benchmarks/benchmark_throughput.py     |  5 ++-
 vllm/core/block_manager.py             | 15 ++-----
 vllm/sequence.py                       |  8 +---
 4 files changed, 69 insertions(+), 18 deletions(-)
 create mode 100644 benchmarks/benchmark_prefix_caching.py

diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py
new file mode 100644
index 0000000000000..c43bd9c3bed3e
--- /dev/null
+++ b/benchmarks/benchmark_prefix_caching.py
@@ -0,0 +1,59 @@
+import argparse
+import time
+
+from vllm import LLM
+from vllm import SamplingParams
+
+PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n"
+
+
+def test_prefix(llm=None, sampling_params=None, prompts=None, prefix_len=None):
+    start_time = time.time()
+    # whether use Prefix
+    if prefix_len != None:
+        # start inference
+        llm.generate(prompts,
+                     sampling_params=sampling_params,
+                     prefix_pos=prefix_len)
+    else:
+        llm.generate(prompts, sampling_params=sampling_params)
+
+    end_time = time.time()
+    print(f"cost time {end_time - start_time}")
+
+
+def main(args):
+    llm = LLM(model="baichuan-inc/Baichuan2-13B-Chat",
+              tokenizer_mode='auto',
+              trust_remote_code=True,
+              enforce_eager=True,
+              enable_prefix_caching=args.enable_prefix_caching)
+
+    num_prompts = 100
+    prompts = [PROMPT] * num_prompts
+    sampling_params = SamplingParams(temperature=0, max_tokens=100)
+
+    print("------warm up------")
+    test_prefix(
+        llm=llm,
+        prompts=prompts[:1],
+        sampling_params=sampling_params,
+    )
+
+    print("------start generating------")
+    test_prefix(
+        llm=llm,
+        prompts=prompts,
+        sampling_params=sampling_params,
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='Benchmark the performance with or without automatic '
+        'prefix caching.')
+    parser.add_argument('--enable-prefix-caching',
+                        action='store_true',
+                        help='enable prefix caching')
+    args = parser.parse_args()
+    main(args)
diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index 51c1a6540a451..1f0bfe06a67cb 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -303,7 +303,10 @@ def main(args: argparse.Namespace):
         default="cuda",
         choices=["cuda"],
         help='device type for vLLM execution, supporting CUDA only currently.')
-    parser.add_argument("--enable_prefix_caching", action='store_true')
+    parser.add_argument(
+        "--enable-prefix-caching",
+        action='store_true',
+        help="enable automatic prefix caching for vLLM backend.")
     args = parser.parse_args()
     if args.tokenizer is None:
         args.tokenizer = args.model
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 08d519ab767a9..daf83827a7e52 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -236,13 +236,6 @@ def _is_last_block_full(
         token_ids_len = len(seq.data.get_token_ids())
         return token_ids_len > 0 and token_ids_len % seq.block_size == 0
 
-    def _is_last_block(
-        self,
-        seq: Sequence,
-        index: int,
-    ) -> bool:
-        return index == len(seq.logical_token_blocks) - 1
-
     def _maybe_promote_last_block(
         self,
         seq: Sequence,
@@ -436,7 +429,7 @@ def access_all_blocks_in_seq(
     def compute_last_full_block_in_seq(self, seq: Sequence):
         if seq.seq_id not in self.block_tables:
             return
-        max_full_block = seq.get_len() // seq.block_size - 1
+        max_full_block = seq.get_len() // self.block_size - 1
         block_table = self.block_tables[seq.seq_id]
         if max_full_block == -1:
             return
@@ -451,9 +444,9 @@ def get_all_block_ids_till_computed(self, seq: Sequence) -> List[int]:
                 return [b.block_number for b in block_table[:block_idx + 1]]
         return []
 
-    # Can return non-empty result only with prefix caching enabled.
     def get_common_computed_block_ids(self,
                                       seq_group: SequenceGroup) -> List[int]:
+        # Can return non-empty result only with prefix caching enabled.
         if not self.enable_caching:
             return []
 
@@ -463,9 +456,9 @@ def get_common_computed_block_ids(self,
         ]
         return commonprefix([ids for ids in ids_list if ids != []])
 
-    # We only mark the last full block because with prefix caching,
-    # all blocks until the marked one are guaranteed to be computed.
     def mark_blocks_as_computed(self, seq_group: SequenceGroup):
+        # NOTE: We only mark the last full block because with prefix caching,
+        # all blocks until the marked one are guaranteed to be computed.
         if self.enable_caching:
             for seq in seq_group.seqs_dict.values():
                 self.compute_last_full_block_in_seq(seq)
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 122960035e505..04a9a90a68bcc 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -160,10 +160,10 @@ def __init__(
     def lora_int_id(self) -> int:
         return self.lora_request.lora_int_id if self.lora_request else 0
 
-    # TODO The current hashing function is O(L^2). We should optimize this in
-    # the future.
     def hash_of_block(self, logical_idx: int) -> int:
         # Compute the number of tokens in the sequence
+        # TODO: The current hashing function is O(L^2). We should optimize
+        # this in the future.
         num_tokens = self.num_hashed_tokens_of_block(logical_idx)
         return hash(tuple(self.data.get_token_ids()[0:num_tokens]))
 
@@ -308,10 +308,6 @@ def prompt_token_ids(self) -> List[int]:
         # We use the prompt of an arbitrary sequence.
         return next(iter(self.seqs_dict.values())).data.prompt_token_ids
 
-    @property
-    def block_size(self) -> int:
-        return next(iter(self.seqs_dict.values())).block_size
-
     @property
     def lora_int_id(self) -> int:
         return self.lora_request.lora_int_id if self.lora_request else 0

From 17c3103c562e748686a3fa4bd9b43ebe98aae3d9 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Sun, 3 Mar 2024 16:19:13 -0800
Subject: [PATCH 047/196] Make it easy to profile workers with nsight (#3162)

Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
---
 benchmarks/benchmark_latency.py |  6 ++++++
 vllm/config.py                  |  7 +++++++
 vllm/engine/arg_utils.py        |  8 +++++++-
 vllm/engine/llm_engine.py       | 15 ++++++++++++++-
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py
index 6e3b679cb81b2..2fdc08c5c26df 100644
--- a/benchmarks/benchmark_latency.py
+++ b/benchmarks/benchmark_latency.py
@@ -26,6 +26,7 @@ def main(args: argparse.Namespace):
         enforce_eager=args.enforce_eager,
         kv_cache_dtype=args.kv_cache_dtype,
         device=args.device,
+        ray_workers_use_nsight=args.ray_workers_use_nsight,
     )
 
     sampling_params = SamplingParams(
@@ -145,5 +146,10 @@ def run_to_completion(profile_dir: Optional[str] = None):
         default="cuda",
         choices=["cuda"],
         help='device type for vLLM execution, supporting CUDA only currently.')
+    parser.add_argument(
+        "--ray-workers-use-nsight",
+        action='store_true',
+        help="If specified, use nsight to profile ray workers",
+    )
     args = parser.parse_args()
     main(args)
diff --git a/vllm/config.py b/vllm/config.py
index 876a439cd1280..e39fd7265689f 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -382,6 +382,8 @@ class ParallelConfig:
             parallel and large models.
         disable_custom_all_reduce: Disable the custom all-reduce kernel and
             fall back to NCCL.
+        ray_workers_use_nsight: Whether to profile Ray workers with nsight, see
+            https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
     """
 
     def __init__(
@@ -391,6 +393,7 @@ def __init__(
         worker_use_ray: bool,
         max_parallel_loading_workers: Optional[int] = None,
         disable_custom_all_reduce: bool = False,
+        ray_workers_use_nsight: bool = False,
     ) -> None:
         self.pipeline_parallel_size = pipeline_parallel_size
         if is_neuron():
@@ -404,6 +407,7 @@ def __init__(
         self.worker_use_ray = worker_use_ray
         self.max_parallel_loading_workers = max_parallel_loading_workers
         self.disable_custom_all_reduce = disable_custom_all_reduce
+        self.ray_workers_use_nsight = ray_workers_use_nsight
 
         self.world_size = pipeline_parallel_size * self.tensor_parallel_size
         # Ray worker is not supported for Neuron backend.
@@ -426,6 +430,9 @@ def _verify_args(self) -> None:
                 logger.info(
                     "Disabled the custom all-reduce kernel because it is not "
                     "supported with pipeline parallelism.")
+        if self.ray_workers_use_nsight and not self.worker_use_ray:
+            raise ValueError("Unable to use nsight profiling unless workers "
+                             "run with Ray.")
 
         # FIXME(woosuk): Fix the stability issues and re-enable the custom
         # all-reduce kernel.
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 0349c3a6636c7..6882e8be34d11 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -46,6 +46,7 @@ class EngineArgs:
     lora_dtype = 'auto'
     max_cpu_loras: Optional[int] = None
     device: str = 'auto'
+    ray_workers_use_nsight: bool = False
 
     def __post_init__(self):
         if self.tokenizer is None:
@@ -168,6 +169,10 @@ def add_cli_args(
             help='load model sequentially in multiple batches, '
             'to avoid RAM OOM when using tensor '
             'parallel and large models')
+        parser.add_argument(
+            '--ray-workers-use-nsight',
+            action='store_true',
+            help='If specified, use nsight to profile ray workers')
         # KV cache arguments
         parser.add_argument('--block-size',
                             type=int,
@@ -305,7 +310,8 @@ def create_engine_configs(
                                          self.tensor_parallel_size,
                                          self.worker_use_ray,
                                          self.max_parallel_loading_workers,
-                                         self.disable_custom_all_reduce)
+                                         self.disable_custom_all_reduce,
+                                         self.ray_workers_use_nsight)
         scheduler_config = SchedulerConfig(self.max_num_batched_tokens,
                                            self.max_num_seqs,
                                            model_config.max_model_len,
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index c9bd89a1b18f4..8a2573034c940 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -124,7 +124,20 @@ def __init__(
             ray_usage = os.environ.get("RAY_USAGE_STATS_ENABLED", "0")
             if ray_usage != "1":
                 os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
-            self._init_workers_ray(placement_group)
+            # Pass additional arguments to initialize the worker
+            additional_ray_args = {}
+            if self.parallel_config.ray_workers_use_nsight:
+                logger.info("Configuring Ray workers to use nsight.")
+                additional_ray_args = {
+                    "runtime_env": {
+                        "nsight": {
+                            "t": "cuda,cudnn,cublas",
+                            "o": "'worker_process_%p'",
+                            "cuda-graph-trace": "node",
+                        }
+                    }
+                }
+            self._init_workers_ray(placement_group, **additional_ray_args)
         else:
             self._init_workers()
 

From d0fae881143f07a558ea72b2cae3c4c6dfa94937 Mon Sep 17 00:00:00 2001
From: Liangfu Chen <liangfc@amazon.com>
Date: Sun, 3 Mar 2024 17:03:51 -0800
Subject: [PATCH 048/196] [DOC] add setup document to support neuron backend
 (#2777)

---
 .../getting_started/neuron-installation.rst   | 135 ++++++++++++++++++
 docs/source/index.rst                         |   1 +
 2 files changed, 136 insertions(+)
 create mode 100644 docs/source/getting_started/neuron-installation.rst

diff --git a/docs/source/getting_started/neuron-installation.rst b/docs/source/getting_started/neuron-installation.rst
new file mode 100644
index 0000000000000..0aff1037d8a29
--- /dev/null
+++ b/docs/source/getting_started/neuron-installation.rst
@@ -0,0 +1,135 @@
+.. _installation_neuron:
+
+Installation with Neuron
+========================
+
+vLLM 0.3.3 onwards supports model inferencing and serving on AWS Trainium/Inferentia with Neuron SDK.
+At the moment Paged Attention is not supported in Neuron SDK, but naive continuous batching is supported in transformers-neuronx.
+Data types currently supported in Neuron SDK are FP16 and BF16.
+
+Requirements
+------------
+
+* OS: Linux
+* Python: 3.8 -- 3.11
+* Accelerator: NeuronCore_v2 (in trn1/inf2 instances)
+* Pytorch 2.0.1/2.1.1
+* AWS Neuron SDK 2.16/2.17 (Verified on python 3.8)
+
+Installation steps:
+
+- :ref:`Build from source <build_from_source_neuron>`
+
+  - :ref:`Step 0. Launch Trn1/Inf2 instances <launch_instances>`
+  - :ref:`Step 1. Install drivers and tools <install_drivers>`
+  - :ref:`Step 2. Install transformers-neuronx and its dependencies <install_tnx>`
+  - :ref:`Step 3. Install vLLM from source <install_vllm>`
+
+.. _build_from_source_neuron:
+
+Build from source
+-----------------
+
+Following instructions are applicable to Neuron SDK 2.16 and beyond.
+
+.. _launch_instances:
+
+Step 0. Launch Trn1/Inf2 instances
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Here are the steps to launch trn1/inf2 instances, in order to install `PyTorch Neuron ("torch-neuronx") Setup on Ubuntu 22.04 LTS <https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/setup/neuron-setup/pytorch/neuronx/ubuntu/torch-neuronx-ubuntu22.html>`_.
+
+- Please follow the instructions at `launch an Amazon EC2 Instance <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EC2_GetStarted.html#ec2-launch-instance>`_ to launch an instance. When choosing the instance type at the EC2 console, please make sure to select the correct instance type.
+- To get more information about instances sizes and pricing see: `Trn1 web page <https://aws.amazon.com/ec2/instance-types/trn1/>`_, `Inf2 web page <https://aws.amazon.com/ec2/instance-types/inf2/>`_
+- Select Ubuntu Server 22.04 TLS AMI
+- When launching a Trn1/Inf2, please adjust your primary EBS volume size to a minimum of 512GB.
+- After launching the instance, follow the instructions in `Connect to your instance <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AccessingInstancesLinux.html>`_ to connect to the instance
+
+.. _install_drivers:
+
+Step 1. Install drivers and tools
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The installation of drivers and tools wouldn't be necessary, if `Deep Learning AMI Neuron <https://docs.aws.amazon.com/dlami/latest/devguide/appendix-ami-release-notes.html>`_ is installed. In case the drivers and tools are not installed on the operating system, follow the steps below:
+
+.. code-block:: console
+
+    # Configure Linux for Neuron repository updates
+    . /etc/os-release
+    sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF
+    deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main
+    EOF
+    wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
+
+    # Update OS packages
+    sudo apt-get update -y
+
+    # Install OS headers
+    sudo apt-get install linux-headers-$(uname -r) -y
+
+    # Install git
+    sudo apt-get install git -y
+
+    # install Neuron Driver
+    sudo apt-get install aws-neuronx-dkms=2.* -y
+
+    # Install Neuron Runtime
+    sudo apt-get install aws-neuronx-collectives=2.* -y
+    sudo apt-get install aws-neuronx-runtime-lib=2.* -y
+
+    # Install Neuron Tools
+    sudo apt-get install aws-neuronx-tools=2.* -y
+
+    # Add PATH
+    export PATH=/opt/aws/neuron/bin:$PATH
+
+
+.. _install_tnx:
+
+Step 2. Install transformers-neuronx and its dependencies
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+`transformers-neuronx <https://github.com/aws-neuron/transformers-neuronx>`_ will be the backend to support inference on trn1/inf2 instances.
+Follow the steps below to install transformer-neuronx package and its dependencies.
+
+.. code-block:: console
+
+    # Install Python venv
+    sudo apt-get install -y python3.10-venv g++
+
+    # Create Python venv
+    python3.10 -m venv aws_neuron_venv_pytorch
+
+    # Activate Python venv
+    source aws_neuron_venv_pytorch/bin/activate
+
+    # Install Jupyter notebook kernel
+    pip install ipykernel
+    python3.10 -m ipykernel install --user --name aws_neuron_venv_pytorch --display-name "Python (torch-neuronx)"
+    pip install jupyter notebook
+    pip install environment_kernels
+
+    # Set pip repository pointing to the Neuron repository
+    python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+
+    # Install wget, awscli
+    python -m pip install wget
+    python -m pip install awscli
+
+    # Update Neuron Compiler and Framework
+    python -m pip install --upgrade neuronx-cc==2.* --pre torch-neuronx==2.1.* torchvision transformers-neuronx
+
+.. _install_vllm:
+
+Step 3. Install vLLM from source
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Once neuronx-cc and transformers-neuronx packages are installed, we will be able to install vllm as follows:
+
+.. code-block:: console
+
+    $ cd vllm
+    $ pip install -U -r requirements-neuron.txt
+    $ pip install .
+
+If neuron packages are detected correctly in the installation process, ``vllm-0.3.0+neuron212`` will be installed.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index bdc541cb2d58e..e90481845c4ff 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -62,6 +62,7 @@ Documentation
 
    getting_started/installation
    getting_started/amd-installation
+   getting_started/neuron-installation
    getting_started/quickstart
 
 .. toctree::

From 901cf4c52bf65472ca13aa4f996d631d00c2228d Mon Sep 17 00:00:00 2001
From: TianYu GUO <guoty9@mail2.sysu.edu.cn>
Date: Mon, 4 Mar 2024 14:48:27 +0800
Subject: [PATCH 049/196] [Minor Fix] Remove unused code in
 benchmark_prefix_caching.py (#3171)

---
 benchmarks/benchmark_prefix_caching.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py
index c43bd9c3bed3e..a0307439cd5f1 100644
--- a/benchmarks/benchmark_prefix_caching.py
+++ b/benchmarks/benchmark_prefix_caching.py
@@ -7,16 +7,10 @@
 PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n"
 
 
-def test_prefix(llm=None, sampling_params=None, prompts=None, prefix_len=None):
+def test_prefix(llm=None, sampling_params=None, prompts=None):
     start_time = time.time()
-    # whether use Prefix
-    if prefix_len != None:
-        # start inference
-        llm.generate(prompts,
-                     sampling_params=sampling_params,
-                     prefix_pos=prefix_len)
-    else:
-        llm.generate(prompts, sampling_params=sampling_params)
+
+    llm.generate(prompts, sampling_params=sampling_params)
 
     end_time = time.time()
     print(f"cost time {end_time - start_time}")

From 27a7b070db526326ede3335fb07c1fa13ac008bb Mon Sep 17 00:00:00 2001
From: Jialun Lyu <43287111+pian13131@users.noreply.github.com>
Date: Mon, 4 Mar 2024 09:23:34 -0800
Subject: [PATCH 050/196] Add document for vllm paged attention kernel. (#2978)

---
 docs/source/assets/kernel/k_vecs.png       | Bin 0 -> 27676 bytes
 docs/source/assets/kernel/key.png          | Bin 0 -> 111314 bytes
 docs/source/assets/kernel/logits_vec.png   | Bin 0 -> 17475 bytes
 docs/source/assets/kernel/q_vecs.png       | Bin 0 -> 42065 bytes
 docs/source/assets/kernel/query.png        | Bin 0 -> 32710 bytes
 docs/source/assets/kernel/v_vec.png        | Bin 0 -> 51256 bytes
 docs/source/assets/kernel/value.png        | Bin 0 -> 121414 bytes
 docs/source/dev/kernel/paged_attention.rst | 525 +++++++++++++++++++++
 docs/source/index.rst                      |   1 +
 9 files changed, 526 insertions(+)
 create mode 100644 docs/source/assets/kernel/k_vecs.png
 create mode 100644 docs/source/assets/kernel/key.png
 create mode 100644 docs/source/assets/kernel/logits_vec.png
 create mode 100644 docs/source/assets/kernel/q_vecs.png
 create mode 100644 docs/source/assets/kernel/query.png
 create mode 100644 docs/source/assets/kernel/v_vec.png
 create mode 100644 docs/source/assets/kernel/value.png
 create mode 100644 docs/source/dev/kernel/paged_attention.rst

diff --git a/docs/source/assets/kernel/k_vecs.png b/docs/source/assets/kernel/k_vecs.png
new file mode 100644
index 0000000000000000000000000000000000000000..4b7be1385aa2e012b3733835394175af97f073fd
GIT binary patch
literal 27676
zcmeFYbySpX*9Qs+NGPC$(%q$`G}7H&BMlNmH$y3cBHi5`(hbtxC5<#gN$0@1QJ?qy
zzVG?_tabi4v)0Vqb=BVc+VR`_4pvf-e2PYZ1_uZCR9Z?*1r83;1UNoNK?Z)qW0X_j
z;Lyq}MMagQMMcS!9KdFl)~0Z9Qo%8CkJVJQv3(CcS(~xQ$j4EerNQvoA4U=Aq#hHJ
zqhiUR_@x_Z;%R=%r10<eFGZuPV-TPA4>HolL3ep!qO3I2<)5#0_n64P@&4-m{`O>e
z$P3KoYIY!Sk;;w<#}L38liSe`XCmcb@)KUL_(z=^9t7^YFACjvj2|aRvnmQ&S}4M%
zD);(Ju8%C_ma7Q9oE_fZYbrncT!929Y)D6oR*8g9M=IIF=%*I@2(ALNnvi8MBJ@Z+
z8lSC7IGQxn{umMStNrR5<5z2f5kK{!r{KwyF1@tb;BY7>hx>7<vh3+v=;mcU7@_uW
z^Zgp^?ZwFq*+eqS55w6gzufCQT&22aHp`@-NlPnU39b%7Z_`X>mfvE%6RKVFk(Frh
zV*7;3OGjVv-A;{tOrS9-7QgtN^lI^_G#+{ntcORO<RxdHzqRcx75Y_6+vob7N)Z|!
z>Aexv&c2`G3KewotVWTh2|qvUNwcw-M_yGFG#yvbeH0MyRP7v*4Gu|I*Rww@mL694
zf|7dtlVOj|y)F|HLG|67p{&(tAAfeaL~~Gea7a4%iCS@rkb0|Bd&1`g;-Y+cy5qB?
z<6xCbSw}CN`s^J;G#_OT-}clL!i@N&0FvH<dc)5GavP67%Cn)s!}}spyydti^b^#>
z!q|iFea1|{m*ty8ug`={o45Y$%{DW3x!ZHUGpE?*l4eRcV^^0|sV39VW$_?)_x{+O
zKoZl_1G4!%>)=Ol9E*>H1NcqZMMi{d;NiX!B06Xve6ZQ$g=ZpzCwK?<G#&kkAO1Iy
zT?gcY?}V3rWLTf4i5_o0f|r8F7e>bO&v}PS2PgiH{Rc+BFwzfn4F8T5Itv8aHf=k2
zJ@}b6n_qCJzK~xCPy8<4;rNsBi+$V<N6vnXFB%&z&>6^2E<}fj_GvX8kq$pn*ujFn
z3Yo$Ga|T=f^TI%gpMtP?hOE*HCrs!^<P7dn+LwOxuO3bN$-g6vf9kmM?#=Vm$Fv>V
zb|lG|;19At87{FFP|`k@eXspVa*4zr0{0>0)idu-G($=~Dx6@+NG$vcB6=~IaE5FQ
z%eTDYNm7`{&#!{o47Xq6b)&6?qIQZIBE~ZfIt+Tq;ct@VKAQ-w58ml|V%TC-pk-4`
zJ^gf`quVH|9K9Ur$MZdi^`mqCbFLcF8<I~QukCZ+ywD?K!HD~;>#rL&|Fx*6dbPpM
zyUviGq|C3j({n}i$jb|(1+Ep_2gL{3=h1T5y)YT}Datvbj|l%;5iNN-@<}WOY+J0<
zXA;5vV#b+-ucPUxSZHi14k&A|%AVZ>XHnrs()R`6NXt-{P{c$+rM9ForNMnB74Wk$
z{7FnSWmIvIVd1Qio_(bBg*fPFacprEdDhuP=~XHR%1bH*YS@L`oT-VTi&7=Isab-#
z5T#5;ZN{VM4>F9={V@bFwsg&z&n&w+Fl+>w!!$(rBpM|=3*^+els6epV&WL%7}Jzm
zviOTdG|N>{)%%pR%T%%)q)ml!%id=~^BB~IG#86J6sp5QN1yKKHTbp!wuC`amqxz7
zmc!?lV8{xxk00xMsiLaxsP;ueH(R6BMW*Ca@GJRveluQ2>=U}mq$|tBVUA&ru`1^Y
zXzpS>S7vF>K?#crt6G_6YssB-K+ey+h}?4VEa|lf8i@%J$C^!%A%Vm#PSE?;RSVUK
zIv;f;s}HO?=J4iz&dF9?ya!E@Opi^Wjz&QDCVBEsRN@MDOOgs3<u9rZ(n<73R27SB
z<qt!a9AgQ-bjX;;RLp6MUe8_oT|2;JU<xqhN2&pI0aM6l$YD>U1Fbu#I&1=M0!1Y}
zB-p>k5=;>+a?=o0epL-|ed+o#EIvN5GrpM9k`4crI`>AxX2Nu$AV*fBdxB|#dcq=G
zPeSF;#Gq>usMeP`nMsAIpSe`KJ$5+OXkal`f1-RsKTkGSQ@+($ynIz_LuX3oOozQh
zv{+M@Tf4R5bwzOPtkHzw$<%Zo*P=_>{v$s1MzY2!7iJgm9`hd3`NnxIQ9i~U1|v}-
zu>qeB?`NWmH%rc$d_zt;yOW!zU9(-<n|fRP+eYh=D-{D97)fmy#%1Opr>=(Ux>7rV
zo$IloS>~So{CUuU#)0Jl=fNX(IrAX1y`-@%!<Dv+{L}3t6L%W7yY<<%oWrh*!eftB
zn{DRPr9)VV@v!s;(Q4Ci`B}sj$7P7!V%<PpNlOse7n}jkODhCLgA$gFeIDJL-0R$D
zz~%W;!<YE|_`dR;+0WIDmmj_?ksD|Ic@<N>K>Tx}XL@+IRGk!Jc;jr?srVd@yMv!9
zL(HYkI6wL;8Ej9FN*d-GY8$o=cMoAC=?mUi*6Cl4{|ZI%==2Xt47!LAj7Xv&6-yBB
z%bLry9COSn%91qlGX8ArVPs<z*JThq5rW;r*|WBmwRX{iIdYU#>8)_DmsgME*tnH2
ztjlpY?lp9^bE3$c&b-3<Ce~LzNIGBoau9Pv^XL<{dvN3PPl$SL7;WeowRTB%O6%kd
zcmlG=QVa?NQs+`jiTv@aF)ESADucLj!Js_kT#f9|?2oy`Qa7<AQ3h#md!>5ckSoVz
zOV6=8v%fHPk!H?88c$`aE#B7_t#~(kXHDm6;Npn!$R@P3e`sB(*V9qJallcCDa63d
zI-;MVFSF<iGhUhQEz^k5;TCl~wLi6u>{(o?&Rk<iWw8TK6~DdvJX#0t72~xgaxs71
z=PCx9DR7aE=8G~&EsdGHY>UEO|7{B?0wkYVi6I<!L3_8QW>s=9Y?YL6hNpPG{=h2Z
z+b@R*8)fOStuz0#IPM=dbc?@ijK{l&<%g5bt-Ebvtq+z~t2f@2%h(&(FHa}v64sX2
z;M#iDdbgd+QioFG@=5Wn^LfaK$|SiG>@PH(IBLtx?mMI%U+d2`PVW=UGB0==hh3yL
zprw4y{H!Px?9-r&uQaP=s-j-;qL$nBaKd}+h8JRXuh$H6^clNjeN6S(10#^sy3D1Z
zv~;X=Rt<fo-|8{ukbo2H+u_em-lB$n4>RwvXLe$c^($7?RwvS@f^#k*-o*!XM@RZk
zPXpT}^6l-{lvHI@6&>{)XM)qQZI}<3pSJ4Ud#q&N^&P9e&*Ms=Nf{P`?aM7C{M4}A
z9jNRxznR0U5_t2%^FZTf?|k^AZHv|5SF2rVz=Zu~W=~gC+yYBV-C)atQ`eQuntEQ*
z(^ekX^ytpQtlQ9r&WZEPF0(#+gTnH!2K)2e!y)uqx7yN$ROiMbzQUV>lzLZ>1GGbo
z1=-e%Ysdj?>@g#$5h<Ra3PkN#>&AZSaI<yh;>|@e^lZQRnrgK#K=AGI7om##%Z0T2
z=<BNWgr!O-{Ec5!z+J#h+KBtoy~wrEscBmpgU^A7={fWgwB(~t{FxZ)!|Vy$AG)q3
z0An^edd+YVaot{eci)Ufv||#GVn9rM_AYTwc4oKRM=nQlK!dGSUQh3?f9ZDDf>l~V
zgqG0Awtb@_;oKbI;NBC#o$kVgZ_1rct|7>KJr2RXROBCX4g10l=cx-Hq=zbuQs|mF
zhe*r;hU=colmf%?y`k$ix#wFd3JYerE*bTBwojCP<D#qK@6#(rdau8DKPmj|415hz
zuR_MLNa`L0JlM~sTGD3n@^B2mF$x?aJOLaMa0Cy01>p(*b1VT*2lwdrbp$xLPnK|q
zf1gnR{vST!!1tlepZ`aZfpCw3v**CqEgj*Xrx8ukAN_L-cMjZxd#frcEe-su8atSp
z+B#Z*oz4f+{DBLoc2e2^1@NdJzVOm2RKI}!XDrpUoV4WSc#Xj}EQTgvBU2VP8@q>o
z;P~BmfkPWpCqptf8*5ufUN-@X-%s!Y#}AiTDad|5;$$U2p(U?GCJJ^iCF5dw#qx?m
z5RHtCjNie;j8{cW;%{@{On}0|$;pnFmDSbNmBp2v1?*tX`kIG_hxHX3D;pa#@C37?
zyRDO<8?&t=<)2Re=|{}e(b&P#&dCyNOZL#Op%K{GNq~alVW9tf{>;<V&GNq^**gAR
z7O+6phdZpVSzfXJr*EJs|HD;YB}+F`Yi%)08-QlO7=mmZT>QVE|JR-Wj`%-XYW=t6
z>(^`?|Fh};xb=UVsymuGh=Of^F`Wee%dfwU|L4uW4f$Cgmi|9T@rTa8uL6V?MB``u
z57PwEXk)qQ06r2~iYclA|A3VJ{&)hv57!_5fiDAlx0hTr931_<wAfoUH~76p)NeRl
z_}`B<We~)S5sxT*{ri<wx=f!KU`~>$AdU(vGoUIXNO2(1e9q+%e*SDsEXvmx5jP7S
zo*bb~2@T~l8bUvL?Lv^R5#oBFw2$ZzZzW%4Ptzig%e^X6qvP4pF673nb!I}fZS;%L
zTR3<`EaCtCgCduX0H5%^xDVyu_g*2P^o~CJzb%B}{Ct5Mh))z;P{@4Y5dQZMJpT3n
zy#zdg^$iX_!MCjs<KKgjk%^N2pE*68BH)pcVZVAaOa8AB5D-naAN}t^ALbzZ9k_vj
zOw0GLB?|lcKJotFtNuM8ik7dh-1ilePye1UR(J;Hzt@HUkB$|N`Z9U%-z0%YU_1Ty
z+Jwp80XN_SqKN+wm4H)W;XuKEuMGu}Nm!WXon3|Kzb6bZ<MF>o{(p}dpM_T@3seh@
zx;~35VHf_HtFR>sRyh4vm)zLj9C(fOqrtgIr`e<W8d_AQBI7x08dqR8l$c{}F6xDd
zK`K~`$7)b~zBiktkjlS9=N7}Dt`^6lUxAU|dv&roq{`ZInW<A{5uwuxX-+WMpQ~06
zKz=%@=yR>=e6wF$R5{E$5P`|~m)Tgv05Qib(PjJ}7^c*Y5Jjt~vDn~zf^&H_;!|qi
z_7iCYwii_My)EFedX~i3$xvcGesl)EI`$F%3CGrI<C#)@Xei%-lH=BJHJ5wKa59go
z^Y&<)qY&R;vXb!wBmA6=V*f2rgGkdQx_N<533DG-<Z-&4LE^RouU@3V461%Vo*m4?
z+r4Xoz-GX9^h3~W@a0r3c)9B15Di(S<Celyle?p1wESGBH6N{9e0qcHq3wLT?qAUi
z@&|SUT@`}qZ!rwY`97ryD#2sXi>LE!g}^3zBPg+V>zl~QqP}R{hQ>hbM+7dincZ@I
z2gkA`)wQ(~Jnmr^6S-?OjsU@AY(0(t?rXnofH%Pkx=PO<#_@ZJ+KX5S5ZocU4w&;M
z5qaoW?a~keP8n(89xKBSA#rm2HHY)Hic|T@ls1O!NG1|tBqaweHwRUgZu?Ep!lFrk
zB=m~ZtZqi)8p^-ize7anT_1^z{!K3#DPbCm5{;6?tnL>euk!2lC`F=q2knRm%#*DV
z&4E~E)hHTyhh!Py?zU$GxCw)<b5)k_FAf&pkEXw+cb0k)&SSs&fv4ScB3GV%>+8$c
zB8wioCD6Uf5e-I-;#ZG6Nxgo(?YC!x|F)ph^PA<sB*w4+weZy?9p{huVbh5TcT>1s
zEV{;oSbSqeG}$9K@;P+*qSN3z7)`IL25?-J;chbtT)11-x>ODu%@Exzk?(t(a2&GY
zVDcU9@3veQ2Q-1fOKHT1Eu20HEHYZ>%T{emJ+cWPora;w6>+OI^R}w2t+y54s;kZX
z&l_##Yl5A3zlNmO-yRQ?s2e={qr=F=00|;rjpF<*LHtO7*cX`Kh#XUX-iW1kKUQhT
zaoFhJaY?!v{~FF?x8keQ;8YN)tGRxj{~4FDFjqb)5mZ6QVUb=Ty<7sPLfi35#YOu1
zc%;SadM8gt0>q?UA*NoS(lw|nak_r}fxb^Mm4FI+7;<~m^L}@_1X^4<tg@BrSy~8c
zZQ+s0e?L`tUhLfrU~eN+lBC`yf({;sOU*KOeJZuK`s!r{0?uRNFA5$cd~!n3Y@Btv
zSUXFjRL^p{$m0xZHaE`2R#57kq{{-2?>=~j^g=IOC0P#ptiLy5UD6nn=*_4Xo0g1`
z^`Di)`K=(Gc449aC8-J4Xw)SE?B;^oY4mL--p2Jzxv``%?D{N~2TIGGlU)Ypetl|O
z{gr+A4W(&XGrxX=(`%>nZg!g+4A>;eT502fOj0h1X~Y}T&B25~j61i@`%Y}dmy)4G
z(nN=65|3j-K9P9P*|cb+oZ9eqdmc7##YFGA(08o&MSV>b@Fd`s+SE2+#B`7GXa-bM
zAckChZh~bNhF5lV1b_BEGA4=jwr}`Yo&8$0!;WsOUZwpmCNZy*oxpU9J#?~w-o46d
zW*CBU_+PeOS-t%4+Tz*G75d|JZ(dyME${ZJ3phI6Af#GdSDKHU%Pr3x0rWO6jv~1-
zvYxLQ3THi@jsDrsSjN$EcWuRp-cy8FO{0*!&6qz`ph~+|IU?woy;V)|Hru)%>LRDD
zt*!Bvnh}i3veTn|GdyB|T;@A?d9<cF9=D>GD;~xx!=s3x#`KR$d<t)?J+3M8_uS<9
zCG60r?C!Eh$c&e17v@p*VXoDub@#}&mZ_d3-f-=BV_;}Lp$Q{Ol5QX?`lOlvdOHj1
z+Io{hsVzVtxiiKGJ(~f!T925D+XhF@XG4Akkh-OAI(vp~p|bE@pYIo~zg|y>1=LBw
z`Tl%Tt%xZvR}#!1>RRx)ml6u7?I_A%^E7V_*l}E|leMgYebSaxXap@6wNQn?mEQDU
zVy6EBEHxTf4&(PC#L37UbM@@IaNUc7TT&!;o-vf}Y%r2#Nyg;++}|vhy7zY9-(9qt
z3AdfVrSUD$B<)Y+g+-EyxWDi&3LURiaHpYktm|!J-q1HWh~cuCbEBbb3yo#gEu1RS
z^dyPe#VGTYD>G=G1XxYf*L>7X2n{5(wX=5AjaZoi3<Oc52}$N5<f1XurueVL7k-$Z
zKR2!9Z#5c^7J|>-eQU&&^$K`A7qDefDp7OQR(aNy;P7By>Ti@;QVTTiV~ha}YU1?O
z3;%vB43a%8cw_A-vXGbR(Vr^6+AMd+7&YC%e2qtOb~>S$qzH{@kP;SdDK@Xv^VrEL
z)UCCZVTueu!lsc^T5NPxq+&PfL~%ZBzMz?oTJEK6-k=xRN_GsrJ|19=Nd@)DO%yS)
zHlIc|z|QBRqNIxlwv&5EJekT_z}Zbgw?8q}E6s+Yt0E?0UszjS1*ou_4cZYgGTLT&
z?LF>Wuj}}PSqqkFnmOr+56g@V_RPT`g<NJt@;5<^Q{rWX&D#8tpovzc<qBtBF~J|Y
zCR&2Z<x&U#QNZNrVr#?nGJd~(CgCQ;Q>hUDp^*q%4E%&D&Bux=-%Kb#-qI1ripvzy
z4#hLbuueeE7RPUUgzgN9gdqG8A?xH(t5D%j8Q@)q!M4mayY&P!Kq-CozB#8JFK>+q
zc6Sc%JJU-b;-2tgyOx&huCknX0&}8XTWJe8&sJT!-(dBT?vvasKGJb|MYZ8JAl+XK
zMwQ`Fe_Z;+l5h7-ttN4}G>;-zZ|6W?k1T$Lwi6k{h8}Tv-=<pVKZX(k{gsFW=;yaT
z3=c2zoxdeI{$b^g<Lv&T>=|3@)ev}48NV+qqvkkDnU0O1tED%eg!u9s_}Ggztg_`-
zJG#KdKu`!^^~Su^s<z%r^BF*{Or3j(U8!u;p+`NWnjS`T(ffUbOI|1P&TC(}X}}sr
zM}U3IwN|WsOXAvWI)GR0d0~4JvB5_BHKczm3M(n>drjIr;5D2at)AOV-{{)bi|@P(
zt4?&&Z*nUiSL~1}Zfut3t_Xa_W@}{q$2&}S1N?9t@VpB9Z%IW!LnIC#3+vaI$Wv6*
z%@Vr58iCff!b;X&3S5gmefjEp`H&U0zOV!WSx4Yg)fh(2bKXVg#)au@(zhuUiZs}z
zN*N;kF11hD?&Xd%)9$jD+_zHJheDkNt|fwl2YD)uy3l%B<QR7l*Z>n_+rR%l!PdUw
zc!JVraK3V3GnJgonEvSVAG!G7UZOi53NIPuNpTZEI7_S74(n1mq-}d|Xh`7`<Q!|>
zuvL{L^SiTc!G-RhhJdNPYsW;`R>dXIq{FEunwSj|+#WX85HOjoH}adO5i5!{Z4B<?
zlSACgds3nrO$cnZiXDl(&V^T#h0$V>T)oNSeaP2%rA=~P$<eq)e>G)z7T|LcwY?$w
z<E1~Ni}97?qEv43_`pAX<GFm3I&<Ivj+^gT=X!lZ;rA?I?~OZ!?#=7jrVF&*x<!$g
zoj%3Bt3JejwtgD-BedS$wdsxW-E-<B-ImTwiLV6b6VVH~)NiTr9ykFEfdT?Nic1?|
z)ZeE@bbwRunAIm0BO+*^ZL1yW8&Ta$S>|)QM#?BaUs2t6QKTs>AwdsYebH){eX|R?
z*MXgt`8Zn-RIR!+#sc>?_amOEoQo@-7M-s5pUcx90$%ic!NmENtm0z1p(O5+mf(wj
zY(gZk((si&yoZ&NEdjc$!b+&NhsE60Y%?qJ5gXfQF^MQQX12s!<t&NKtoAe2#7V%>
z6_6YU2)zWkv2ot7p5d+Nb^Z{-Z`%mCzM&i@U5sRu@X<9k^Sq&PJ#GX191nh_b!{2{
z$sjvav-LX0I|@2qTSmb7KH8Y@KmK|sFcG_jKHT3E+5A0`?`g0-b7<QAt$BYN+pMko
z3+Z_@Ya%|EL9aA^(C2mXEa`*BL&2#!d*c}<ZkxGm=uFw+k1bq4tV6gS@ty~mVkGR(
z*P45KPXBf9_NC(wn*k@B@+-|BKRY!t)H?i58&LVteV-mIN8TX&dLkU*KgMU}!T2c1
zA^)aU7u^b8c%LZ*qrO>UQ%gFPOrv;##FKbByJaImR02v-dwhDh3|iuirIwA2S0r?>
zX6oq&yy8jc8U46Oe||%0iEnPqfrjy6-&)S5b%{R#6kI1mU6R7;Y3a>sUPMqlH(qb%
z?<+Ly2&Bi}?-wld4cT){`k^UtTdd!x0eEua81_uh-6C`gB46O+uXFjxAKMrI<7~qr
zT#I}_`XjH14mfW-PV+N-<nwMPD5?2XBORT5zt`>8%s=1SVZG}HSVOe6&7<`Eri9-~
zxZ6Ht{xhUPso(vYUe@+ggYA61qj~=oFWH)hbq%3rsa`^4t~xF8_gA5$LaDFmy}yKI
z&`xlkszyD5;m{lRhLd-14kZz94aL9i;3O4j=(JC+rnrLk*b~YpaXAYp&UIPs%vR>@
z@&N7}`}x7*u5%*!J~SpRe%n`W&XJk>rU=6&=jCv!z<jlr(SKd{XYW3Qv=kctQNrKD
zgn{T_(4Bv8Yr0f_p6D6Hjfse^duAwxQ#>WU3F60jW=^(?U<|f+N6!!v0i6=9^7d4d
z)#f|4G7|+n_og$Rtz=`{89yXWyA|OxK%i*%z357GYc(8_Mr&KnN;=&G9%E{-`HM-&
z_flj*7W!!^eIj2-OWsvmPDlvc{_5D=f8E!kwfmq7F$GS;tD$Z&Ri0;1(<KF}tdE5n
zCGSIVv@6ZNMoL?nt`9so3R}IO-wY7Fnl>)mXkN&SYU<Tv<X&`&Y5?3z_3HOi(b<jf
zrx-ZRhlysK_O}L5aNK2-(*(T-=TnQl8(sF<=G%s3Y(db)##$atc_3<y3HcD2_qK0=
z9SDCaBkjC#(O=j73?v+^x65vt&SWJXayMD>*k!g-T|EP=u`*k`WU58A#TW&(cdyOt
zH^TrRit?LhsrieK2%`XEEh0=S``Z$%V%ar*?Nl7%b=k8(wl8hHyDnCUyON}lPc$L9
znN(FwILR_`T>bU4V+wY$3?{iAmt`#=5%4f^)af>6TP$@vUYBO?UBQ#<jdx3Y^bo=8
zRKJgT)4s2f**Waca&@^DM(jA+bUtTQ2!zrba#J^__G{e*e1}aF_TL+vcL(QFhzKk7
z`X(i`X*vx5QNWTwWaQpSMgMz-<IjK8*=I%8ODssptW%I@sZE!G(wyIkOML33s;JnW
zH=hK2-pc+`z%zozXd1i@i+LA<XnI_o%<C*uFVH_L1L~izFdc}NHY<Ta3{2O@9-@;s
z>Ii~tMtN9Y1maC|%D~DDKd`^d)q=|a9cF48;}+x48<ekyhrdJVc<u$bmLnS+q2hmF
z7C#Ymv8@C8Kgfcw@GWAlV%h;O=+hl6#lG<_)4F$owjw*pikgQxp`%mhG|Gi-(>SG!
zG{8E=>=w}0{nz}vUS%^Z`VCa$@)r`DQe%`($B|_-3vWD3qz%RuON%fpveo1tXFMUE
zWIMHyaUXHq9?hV?>O<~3X?sjM$SXwL#D2O>GE?;<=r0)c6D{AalGgQ)Iay`>W<AT6
z1;m`%-qNwGhaOI!Gr;bzLy34+^KY>g`3-wtY_Grbt!CMR(jw#Q)>um?(^@`x`HIX&
zNq9uw2vA3YZ6&F!-QlQ)1K4umjsyMuPXuUXGx=wm#vIpQdMsg&c<7wTLt+|suN<Gi
zvNNk@nLsTI7C`u{u`^)OhfS++%C9kJ3<U0$C-b%7G@e;I2lvHkQ<Y`F6&N0MoUhxY
z&t==GgfzL#@%MxhC)JcmEsbxgaQ$NsgL2_&nRXcQ{+=HL;6@iTf<@*rVl)ITJhEQZ
zL`5qh|146L<@FX{gr*h6oYy-376%YJ_St|O8e`86`IfIj3sv(%c`VBoLaA!a`>ci%
zIVYB$Z)}n|4oW5Un$)nB+vdo`3<mp{R_I^RV#A<tLGuMIHN^aGql2<gij(GEp@d5<
zTl|LY%rAq%5Zx+^pNuxOQ3}a#GB$@(tTwX(oHh~Lx6*uCv?c2|`Vuc^_x1j>zb=9O
z)ee3n^KZzGnnqYSx04_lO7Va13IG3%{^CaeKOz1<MF^~5-$7k)^TwCv;$o()liT$T
zT2gu~C3kTiQu|+PFVs@9y*Bi3qt%Z?8os1?h}SDXpxB>W7{K=%UVrS%bGqqeR|vGr
zxo@Y1I9gv268Z4-R|j3$tJ_J&e&=J=v^P~z(XW4Xp;+Q`ScGRV>X!NdfbT&4DdafD
z(Lq1k8Ul^85_D*~GaTY!6>>f{rZqjOyevPNAia&2A3idU(4!UFIEyMvj@lKu7l6&G
z6~nk^?!F)WwkBhEzZ|{4I|q9vIp}*|6ZDQa7dBdKmYjkv4m%SzRKn8lk(Yf`b;)a)
z8EQLDT&tE@z0N@-5J9@z+wUhI&z%}?${D9wt4=`>`|KNzDjx;Yo28L0?FP1Qm=Eq&
zx)Wj@%g+WXeelS+PA5b!e1`fIbPZ0v0XI~bSF#?$CjjoC2~o*dgR@nZW|;wgSfvkL
zUGv4s+n==g!or(Qn#aa3T!c3P1KSONYa#Tixi24FB>;|7KY&QyJKyJ!3uQs@D6aAZ
z76nJp#(Wk9WoI`R^UB)-1%*k2JV;j>Ivc|r^c{4*E0SVB8$R60-VdER5`4T%DhSz_
zJv<~-OC*{=GU0VSux8S$du3mf#%gqqY=4t3MRjo0t*GSZJ7_5<<YM^PWTByLdo1fK
zuQPO=C_dxf<G%eHgk`ab-R9(kXJyO0V)CYJN}|c^py#Eb7->3YX9lL7eq?=`Sp%~D
z{D^a(<&1T!xxN1W^q1uM&hZiR&+IXK)7z~dcUot6Zul4uqpks?r&et}t8&4U<7C(E
z1^8*1;~-!4uLPWf&bzGu6wkP5Sv-{tf~D@7Kc^6Skg+zF8r}_+G}SzX0>GJnz#0=^
zrLKodGI#aNryy?J3!C~XzZn0tG;AL3{HDx!MHbkM!{+ZWBK-}(n!Xm|fNNqFYBYv2
zy3MEoXH;0+T~hP+9ZHy&X79!pmHvJ8Dgp0gq*ZyPU`slDf(Rb}E`b9X0tv!%EFv}&
zDWfuyw;3jc=m_>$>G$0kF?5uFK+7ER@6OVeRgUG;<0bbldp};k9?cH7e~Q4?HCL3c
zsRJKeRbx3pv++_Odvpf$M&-HDBLpOz^m{z4xa@y$LR$DzS%=ltl^|5Nv*p*u-T8vv
z*YP>jWZ2~rS#P??!--I1bozdAq4s|x!*Yhd22bh+9shCRlz0*J;-VFFUl_|TUW32j
zF-ObwP{=YpV!Ni7dB69gyz7U-WI+(3Npy&q_&$0q%PglGsVUGywC<;hzdh(Y^q}GS
zLf2;Tx=g)C3)zIrb|D`Kp}<pqnurvPKzxrRIr|cTQr=OoBAhM<abtD;MG`&$WTnsO
zW@#^^xd0V?E=TR{l?pi9ok^<ol8b-U#voLNBBnzI6fzzxHZop=vy(#FXAvWB%Jz&{
zs0gvZ!1Dd#(X=StHZaARP&&nZcO&bna5kK<?RHxo^sEus&J6_@&VNpKT7M}NcHoG<
zBb<uO)U!qCgT*Nu*vkCIGghg;J3BYceHn*PhI{F&0O)kJ`^hF4V}%&u8J^TZtIFgL
zM3YGLk+?aO3y#m5Sl>XWT728Bv4~~LFM!*KCYeh{(GUB#mkH>|j50JRSNPaDI686{
zYEgrmBZCXfrTl5{D{H}OzuD*V@U;omJFR2Zj509{;R`;fz1ali55ZPYlCXntATa;x
zcRR7%R}ErR+pRhCq^lZ^wRePHd_(488XuY576@XGC3MrxM^fM4ULJ*tjQbE16^m(+
zZG(uO<)&KzT1r?M#ZF%zXGp6eW~Cw9B&mgtfyC*)B)!#%;3^!wVdaXB-IDAbYHyJL
z)ybA_(WX@@goDVk!e5vi@$#1W1B2y*JMk0sS3OCp08bpq_f8igs)C|OwtXinOw&|D
zvxS2CN`yw_>sw_9yztiSexVzRRynQTaKkD+*69m>HwI;BKAP4HH4uHMj0oNoYs=k@
z;!jOy>h&RL+zUW%>j-$AEZS~Z;S<*J-ZfJp)%~LHGs_$Q<FAXU06di9LjTyW{BCDF
zr_gjDc2gVC#G!NT*C-MOF>iJup!j}0%;hvU7fu;#rJWY7L>T`M`XMX~h*JhBP5=M^
z7+G|Umpp<`7`O3!PvQ^vLXbQ?iu!MopbpJWf3?58J02BCTxka=>wSWQ$AA2o5d}xX
z7d@^0I+FLFYyvB=GtV`M@bw>3fHs6OvDyH>0`ORv42+<6x&Afs!{3q9lOHw#cK8G(
zIe@u59}*__Lc<C_LhZ3cxqP<L-V;W!^$*NT{vh)Jp^=$^CoSxc2l9aFdcBXtL%>I@
z`_%{6Y5T|f#zK(;Xz@M2FW>h8I|z~;M|u$i;Amp&35!5{qVBvq&FR<Q-#@5%CrXBa
zFz&OD4x1@$^Zcjv10~6-`--p-9%B$GJ&%s~xC<3Qi83QcGQtmk7S3rkoev;LxZ{CU
zf3?W}Nd5ZxKjd3c7=!=iT*4tdKs!w~KLY+n41eRS2S)=(44}?iEoq#;;K5(oJoNhu
z$g8i}bdVq(v%Hf}cJ0PD&&OldSr_nrE|wk+$VK&5+q%2U)1`kv2ql1=h}80eAF;k&
z9|HNuDS7Ti6Enk?666(Z6UFJ+BUVs3sP4BKik}E*T48a~?*=2OWzpiXr76RG;EV3(
zREJL_8wD(v9#h%fUfMqduxGR8X$3$;k-a_#<Uf9g&ij9JNMv$=`fQ)1Jv>H3Tm-gc
zbZyt0RyxFDQho$B@v&rTO;n&qMg`&S(r2v~Hy4MRB|6nAyEA1MOt;XfBKN|KJd8i7
z-1g@y4OC3-$3vflM`3Zka)LTvIgD7}!B(Dy2Yai=njjV5S(Wj_<2Op&E?q#UWWN$}
zv)HtmBFb;{$4roPDEwiaKOWG_Z^8K6i*8tz5#yg;Y_MaU+Dh)MB+1B(XM&b*fY_Q2
zAAYU6%4S|;D1p7lihKSKH&NsP_K$A&y}?9ydjSGMU<;1OpjO|r07+q*I~(oKoZqg?
znh$<wE<Nv80x%1ENPQM6SHk#@gRVPB?^f#5i2rc*1E3$JwT{=5F#XaWHmz5c&a-TC
z(2_~1Kc>*F(o`7;1c*{`c3``%kQ<`O>TH5(`vidUCQ_mS8Lcdq#yvJ5aZ}=^(Jfg3
zOk60&H$<NjatMF}R-86-9M#Ud8fU*cF!R(_s%H#Tg|7M(Rcb2XA0!A!*laW(TmqSk
zDImI^98Td=Z}vD-1E3RiAO=GR{ql7H2t{U*HVfD(QJbYE?Y`Xv^V9;C=2I2tqplZ4
zfQmJZ$y{KAjY*OgC<<P6dhza4$LU2*x!d#5{kkh+ojYmRCZBef59>r@edT#>Eh==N
zom#VTIa3P;tpWM)kF3Io4le;mQrnd{JXqM0PNW9G^L+1hgVWCHcCCic!C2O(j5LKP
z8<ldwVQ8zy;zmr48={ll>zpv~a!a!tgOoh52mInuqAwam9~*@LRxHmeci?c}^Ps-x
z5=g5UQ$)1H>&0bXpjsUQIEL{<`mg#Ppv$BikcMHyvw4Wuw+5X1gPu{cr4q(H@;_*G
zCnTpF*^Tu{^SN`UXlERwj<rB<y1U+m0{P~1CX-s*#Yx7prq%jZEtk{1S=FzkLIx>5
zcUOggr(;&V-8yEw2*7Ctgy1ISzM>h5EEAJ;x$>}BRQ||Aq~Y@>^j?rMYtt_g$%gSX
zpvC@=ZXjEY6X)x!Z&QXlL+$c)WjXF)F^-cPaiG%mDJ#o`y7Fs4fYbmmstF^w%cWE{
zb4D(q<wTp>pw2Gi{bEBk0CG87Y2Lt(@SlV<+#EFSCmCHWm89~<t9@%&XC}jTYD1?{
z5kVz=WB0vjG)_}{ouU;0c-{j{hVIucD@Zi~SQj)_*H*gt#adED1skW&otNYk+Eu_-
zZ%|A!QnrnWTNy0kJCI0K+DdUvO?+(}xMq}R+!Ol#`gDh9>scQ5<?UBhs|}6m1a@<V
zosRPytC>=ultTJko{dT-h2MR=Twa4gquAn3cJ^62GJV?2*8AK%@xFa{TiK6jxD_Nu
z-q1<<lNyQor*d}y8Y1s~Grz3CHmP&RX)!7c6f$%$6N2VX>1e6}JW9*}<b4)%<*9*S
z{rtA|xSvt|3_9hVW0OMVtW#s1z;mi+>phx<Fr**($%~|azej~tdtGY>lj#U+#Zf?K
zKRRp|qdqoPSGgp|Wq%GjXH~{ordw=fKkEuqUljf9_@w)(>p13)u-Qx&qUEcnJ6WQu
z*$mKh^YB?w=QwMv^=#yEqk3o~P)JeSp$5c0Q@EOX4?x3i9Z8%c_2L^=zY7oyJ}QoS
zJ43*o-YJ;&7)aJIzh0MH=CjyaY~&_qd-vV9QxLyhV{hOAZ_Y2fgL}RF!ZT$}PY7Sd
zvO211?voaj)wUZ*c@y7U?9ad0f|@b$rM2P*8+rQb&I?|xM+IL?f#OEMHzT6Qp5CCk
z*Wjh5PM)_dvp`ZfLsF+<3D1f(Z-HyR)yGG2Y<;T%&<~coG?v>!G%tagljJqMo6i_(
zFM)l|ekyL(U6oyYZ8Q$?P&mXS6||O1els@u_AbO-dUj*_ou%=W0ON+|-PKlQKe+2N
zZucWh{&&-h2ixZcPYxS)St_4qmo?qr-Dq#Lz5R(+2iyCkn>jw*+%!Y%VeqQQydB8S
zJ|U*!R6g?}>h``^jQhcwhGY){Hi=c6-=oKg-g05>Z0oK;-eXyP*f2#%*tXE{ylc`S
zpEET2jf8Du6om21sSWILV5i{DfIaA8b0%4kZTo*(96cs4yq}A^H22P*8;BWtJ$)n@
zM#8C4qGP(<x=tt?%k-MBwwg6bvrqQEdc+M#)jff|P-lM%o2jeHn`2$gQ%Hfn(Ks-y
z9%0P7N7(uSvS!QyG6uO|GsloAOkkYjk3H9`8EIU`xE)9DTO~CxVJ>@&H8CnTx6C!?
zvYMoxLN(V-Q)wHnAFG)sidvdk^|_GdY2icVOe+tU0T@>s4>j|L&*RxJdr#K4_^91;
zFHn)Czjy3k%FPEp!8f{t)aJ$QG<&gZsF;<p5%D_pvNK*SdI37=x-cpxO;4>-43z#-
zXl8$A=vdI5eiM+{&>9HxhGY9Rn-zYqd1^JtVD0V`)%9VY*!Q9L#y3`@Gea6n$B!SE
zUA>*Ju}RtvIMyF;p}J5M!2jl5n{ikymCOj!g}&ILesDCqCBgM4!+h8&!*@RmzzMHd
zSJwUemJ0TJwy&g#c>1%mj^h|_UQc^oeMxB1@3r?j?k_j)8En0E*GKH{Y7(0}!*cB@
zaA-5%pxo+bZCmexkn;g?)%v1%wSf%a8g929Yi<LDDU;zAtzw%|aR6k`+wv0k2J|7|
zt}9}1$ionk>if$1>|N_UY-CW0#<xnahrJn^(e;tuBvH-uctCDLW~tR>Ux&R5$YpJK
zmBP?NZwCi@)7MWOwS9n+nTg4RB>iK2lY8$3SrXfx2Jc*HTaQpd(YdkxWWAbd@bX<!
z`w2@0Lf2`%Adr@v8Phxd^w!G%7y`Slhb&3I(uU;e%4*FFJ($1MFRVmDjY-_#nnIJ7
z;Y3cxa**If)T6=B0OM}}L?JLI$N=M3jni)Na?iZ)JH&>aQuWYZ*JO7>+~Wi1jqX!+
zx!Sw;lk;uS8}9E&;o$D?r~6!-;ovyTA4);@Q&2DUJFa>pEoE(T#BHYL;sp7FG%Oh`
zNYqrtb4YTge-z|QQv{WCsx)e0dn?_PqEM8BzmmC-*939KYWd%!9Ui4kTmWEf^M&h$
z<Dq%#)&<{y^`V#9umJmEf_o1x!FnJDv5ws^E{v0ICsbzhEB6u|gK@>6qQIHjPe7;B
z6K-~33WfAL0eb*%Gud&t!Z7B}EXhXW!92>oc`$4r0S#7ebW66DEgiMW0o1U(?9sKU
zo?5**UyoK*2=2&-G|rY(PwUugKPt0j0@qe8%Q!oJhkuMSwUw6!(V>?qg?VmVaL>f3
z1^iMJf;lPkO@k+Z-bwSS0L2-1`DwXq{Rl{Q1g}*?#YXfw59`Qwfm8f1F1sbQ8v0ya
zLeTuI)jA;UZlAUqcsmga)Jp_S$nzy|+32J#by&Lp@B}Iz+;q6cn(tsY+}V1b`_-Ce
zByLU^ezw>8AfVEMef|K*8>wCbW|9qd=9Ld(2C94<b7Ga{TRYBYH35U+4=I`?6<P~n
zNAo!E0Jo*T4PY>xIl8M_zW*A`R}NE16Kqa5SKN2`!dw$HpE{SGI+IQK+DMFOGd%!9
z#zS(3(56b6$n11`tibTtp>k?}h7(P$2nig$P@SEnU{?`UV8yvTB6Q#M%D6kk-zNI!
zV-2C2PoxmH=#8~7ArkJn6(OKn>jl@M(?mN%QL)OZD<H*jL4~AySeiZJc-mAdOV;1o
zki{=V<0VPlsT9_L^g&36b<<Jzk+-J<P<`2Dc)sl8vpeOyRkiF@y<CiZ2~_mFzz{ec
zty?VIXTR*t?1CxK{wni<73DJ*31H&0QF-({A>=CVWWAl<&A$j<;!$x(c3pC}VT9(y
z8R&$MgAQpK(*)uor8m$>8L5g>H}qX*bJE{_d1^HhlRGZB8brAohIauBYUb9Z4{km~
z1Ii%u3@7uIKcchLrKc}L_^>K=cJ;2zdHUYC{aP8kon%#ld?I$yicS-B$UY;rPK8Ql
z*C&BncsNNnUfZpFZ%_3+(0&7Cma`fJ*TY5o9aK|Rc+-Tr&txS+i4v{}$*9)=zv{WV
z#PMRumdL*D$Pd~yFBJ>dR-U4+)12ks<&uIu=iI_uD&4MMCVo&xvzD1cyU(p~nKB^l
zuS;N0Nd@^!dnOe4n5!AihA}Lq+~=Muv<hU%4KZP&CGc+RboHFigF%szyzLp!T^F5a
zD14mL{nJ8$L(Pc$o5NO8=XLA&z9`xrLzdRt<IGFww)8Wc<l(ZW<NnN7VFKq`zgX0=
z!pq(FnlF~@7i{@=azhoX&!V=@lQbl%uTor>>hp(+D)UHHi4c#ag!{#AFh=VSq){W9
zNJ}|=$H1<%4OL-JwMh~EZvl~Xey@FI%DQ6!2(#2PT&*8#z-HAB_eq=p_W+SpN@Cdf
zXj~@;fTay}r0&9QfLhWVaYnDxG0Bf5wM)F5ww%O^;bDUPTGSeBm3#+vl<KpmOP+_+
z0S=3Phz;c+hjvuj&V7T=O7mcK-0dt=i{)z>u=%Ij#^vN`vFo|R@Z;{#ry|<>%A{tE
zea-<QG?S9t)yuLh^;D2)>zj%C?jUK+uObr0i{aBi+>j(k5jR6;;-F-6vE&I}_%#U!
z@;d6$f&x$&&Q<g2xNLbEuV_XaG@sQX5kBgc52!!;(mq?3?6Oi1Kop&pw2C#eW=U_J
z^1Z4?!ynp|c614Rf|kG`a@7(CP>-q%Uco^lkyl^?nX!+fE>Jk{8C{-R`C+(SscUd-
zg-hXtTm)H>z<V>xL{;tVcNK=tEaD%Kxuj`xWZ8;KIVbW7XsH!-ZjaV_;>3KeHsVw}
z-cKv!m@3G~dHYv2?a65*?fAk!l4)mTrH`yyC+GS0)P96_w>Z;Hhrn;(v7fD4?{<oH
zn&pi(R5bgnlD(ccV>#KHv-*@TRN(wa=GN<d?sV2xZRrHwqlP3Lw#7-v83r-8t=?N3
zhm#?0*7tE97-{aC37MNFxNJaW*!u`j-8Xg<rCvmaDdGfjRpPusrQPC6g@vFTD%9ck
z$5EB$k^PCqr0eV6^3oW?E|Y0)tHE}S>zC8IVBFJ{5MsWX-orlHw2+Ny%>fQ`n_deU
zH||E;y-PJ!kh@K5jWt0Pb!!tC$SQvf6T16>yhemjPm!*T-{&e^Z^f)Bt*YXrl5s{N
z!`QLtx~xx;+izrk^wAAU*K)O)ME$c8he`*7T3nXXM@8d<*Pb%yd6a_Hw$00w-7j}Q
zgcS88(cQ+!2N%SSTme<8S&zJ;E<59^o6^t~m#wgEg`N8&m&+*jU!b!#eS75$os`r5
zwat&PILBH}O1RGyZyd@)W4qQj%bqd{u3^rOg-EItWQR!sn$e2`U233wZxA0gY&!I<
z(qa8)6F|$=#i#q+C5zhv89Bp0n=_Y5clVf7W>cgc&pWn({gi-5<$4iM9u>tP+d@@^
z$5kH)tKUjxWRM>h;<%HM<oh}%$YGf>k@-ZGB@%gZuWA%);XUsJZ#KCeH&J8tnreaV
z@f<|7zUs+{Ai76jHXIBv_99rJSiRKnd|%2J8s7%|T8}GMVEzSEH{!XTTPK+CQS3yr
z^NC#UmNv5Bb2Kc0TVW~_7)MYZ=s>zNG<}x!irj|&?e*%gz{P@=+0+_fTa)o&KnZI|
zN8oKCk4(W?J36aL?ib0e=iAeeU>xl8tFh+;>w}Nzu}XG7KpeyKrXim=)mYYA`V#!0
zar1W}T&O$9Gd5@mO|{DoHvKHdk^ZB+Ry|Au+f~&)c-Eq`9LH`;3?B#O{MSHGXTH{h
zi7Ra!@f{AjolFkCqt%BZos+~aDGu<r5&1-pKES}pk5%96Y&0ExrznTTd2WFujs(D+
zBPD<mJ93`xa~NRWx{bT<^^B6|vWLusY%eIHI~p)1;4(KH;<DJrYMarcRSvQZyeVV)
zd~j8eF|4Tl1qPc`uKi*3g-3{*y<rO^tlsyMHoOvh4tzIPHrHG$q&zu(8;8Ets@Irc
zmRPVY_yVh!5WXzj4kF+pUfzr_ezj4I+u<zZ`%uI<%&7jk^C$p&s}-x~BGvm`yLPW)
zpnK-031D8FAi1)2OuQ^WCU<x~h`wPaAHBKbj0N-HGh?aU(x^pUNT|WlPp?na+XkF3
zAXuBX4wT%>UMDe65BNSFYzf`hpA1Cx2M!#c%4Vjdb?us^x^EVfEgZf=SX^Kob9=iH
zbyE2w8DwyHyZLOa8uWSVHd63nAMC@|Fx{%K!#y)`I0}wSeal09MwsSgRn}Z{LfW4>
zzeM+bVM{S^VB#z*Ob~i~w)-M@fqAXHsb*Pbznk%JQAL&+7wM!QcT3*BQ~8#q#P;XM
z7olaG)3828$aByQ{<q;nEkb>(kRp2%?k4c9wJG<_UZv#5`}a3z!Q7qBMKrAhZ>||O
ztr*T&Wjk!*w4^9glf|)*ry?nBh-o?kr@y6hGUZ%q?+%-yXsO0igT}R1q-j%d2AE1V
z=fSPSrbU*KE-oRllFp4;u+1V^Zb?V%P*(Ot9Fx^N{muv{B5#av?AT0|>sLtY@#<cc
zxh~%F?3d5!vo<c$wkTPyYO(V9Ei8v<mKvUN3eD$_`Yspo$9r{DmwP?j&!#ldwOt(i
zi8ewb3FAnAlX1MIxTbIsG_KcpGjw*&H%VG<I_*Hc+q9b5>(d`iKh0OWwa6tZfg90t
z&X^>b7*ntqHf|kiNfYGVnmvnA-FWjJ-$qKZvT@Io&q{ZXpKEntC_F+!)2Ah(f%NXz
zr#tSRuDQeQ%z^uzftWN~JvABcw>KB9_xet(dRkxY>+;GFTJO%v7BL~e9&PyqrGPXK
zZxat!_A&T>32%W=0JgS5ZwCr`mlatZ8H>M(;4CvbWU@YaJ5eeHDJh+Z>siTep499O
z|D?HBw|sYAJIomIxqmh0Ej9Zy5rRdBC;Kz8v|i2S7Y%3p;C*fz<d78STo1I$MyJv(
z(~OUJ<MbE^^<>{VpUaB1{+vQ9!pCaduhFe;o5vf*6v=c|R)GUAOu&HGRzl6@a$)D!
z)ab+Dw<#fb`c~6HOWs2c(3$OKqU7~c)y57}r>2cq-G%GjGLTc5_6rC@-3cE%Z|g_f
z)0Szz`Maf<ff>bGSHN9TtS7s4M)9*fj3e^;)n=c@MW!jFw(mKuZT_@&)%Xt^ZhMBe
zXDdev%^q9EHec_#Y`$}te)oj@6YWy52!G}4{aS%f;-AHab#I8dtm97t&nRa(Z^RE(
zzs|rU!}G(uI-YXlsDDheUV)q>E#oYDuJ}D(VD-5gxJ;cZ=+1<!3XeURvV}Ts*#^pu
zdi2ftu<}|Gd80F5M@5u6C`i1FvhUZYt&Wkl@vN^qo`~zqp76MJIJ$jaT>6?RmWglX
zo-(}lT(@*6_sVoO<&qSBm~4~>aS6Z!CYK-FW5F|s#fgh%<S@N_0o5EaT`y5+VqbzO
zZM*BLEcR>HioN~bm(6DW(<ijLT+-&Ph{#t-rsn%Qd1)+r<vk=3ZJ^nnYXHQIS{0i~
z{CPbpiiu102{fl}l115@pQX8)-MvMFs3R}TE^IwNS?@x~yfrrqR_NE+;VoJIiCd5`
zVkV_{?mYe^C2q|hw2Qi40)x(?*%Xf}uMKg`XJ$4Xw%o+Z)3U<C-|Na1ne}p5$K@z}
zGf?lfpnZhDgZ5P~Hn+%rj=aF<{?5J$Yng;oUKKMkH;FnWS}?{16s|0UZ`|*|mQN#l
z!KIjTgU^$|Cuow>1@n5|t9v6~GbJ-)Nu1?1jiYjb;8$h(tu4qPG0o;4k}J_e8JW4<
zPc%G%!wI|z%r;)K?;uKW?snWq%l+|V&5PQrZN0<UmV?ikDLN)0TA%TnKSe2YQ&v;W
zZaTxxW@rO)3<iF*BD-*v4o^Ysu(oj!0JWpIATc$~d0DVNHw9J@l#MB^ye6L8m^{cg
znHW0^svS~Bg});fAyeGfl*ooLB}^U}SOj=H9uSk@$-78Svs68{9lz<G*JOOb83!p*
z>E#&AFDo1Dy(;5T=r*_arcAf&OzdY4kXgPOcBSZ$)-fty@keiXsKUp^J^}SmtjV%{
zUw5qt%FST@_8lR@&>%<oYg8MJJnRmr=RDNVvCN?ect6!iD|h&O`@{Ks<0-v+?X%Hw
zHTh393dltr0?Z$)C4ShvNr}^4ih431YE`C|ZxJNeyWtw{z$OgpGi5%CY%|7xQevW~
z)M~ia6Xv?K^!AixY8(}RL2-BF7I=rk74l{2d4$*20<rsgL^=HTNVt2Sj`?=`rOl9&
zMp;}W=~(rFy5iS{NU?fs$P68K+6uCiCRl37(NPNCAHNmz@APmY9*dD3!Z6OJnjN~E
zftQ3LuHX`|q-<Lv@~(xaTb@~%-%PowO1L*ykQ3~BA9^l33f&!rEdmu#+Ro%nE?qk*
zIn;qdupPy$oL<X;>q3QZqK|6c_dQ2Tz_5V}LV`yThC^r;UCI2y@_BmJDlffi(LZvx
z9=3-`izFNyCC^5Gp{ehzzV-B~9`J<t7%c`4t;{pt@5Nu5*4XByxD?6=(z9fLmvYo$
zZSBC|6HAe&=UAV_pMLyg7SaevvPy#0*XC$lY1b)r&H9I#_05k}TWdcB;>GLyjFhPC
z+i-ue#!ic^m-Jpu@W*ONtaHtMxi@jGw_W;x$6s&-l%IXKl*EgX=aak2kyGf$n~gfs
zMeeEjX>BrF6fT5;faME^NCxNEkEqnEW+sO6upTU5;RDp(_@zVO&5w!=DS}wwy}O85
zDG&~!<x!HoVjvTMdo2xSb1&G5(y~lJ?4ulVSV0$T><Vt%jPJ$)G<^uDE*6qiU#03e
z-<X@nZ~(9?FcG!Yv?5V@80}o_Bx}ThF_XXO!$@$59#L>;2}{MUGK(|_6}P4OB~{o{
z06H*1L*I*sQ2h*Lf2Mnyg{jU9YqS(;u9<AW67K^J#`F;9j9x({lXbjZX$o$W-@M)I
zvz{tb?YFOn*xV25w-&-6r&)GIg;{1%78=r7$;%Dw&X$qm7syVFtdOHG5~jFA2V)2F
zZI(P8)|EqeEW3gRc5O(u^ES05e8kTmNTT!<{+fD9i9#;#Im*5!yI#*%!WY2_=YF|{
z$J}%63S|UmHXM@UR?L1GqMsxJH*Qbwrp@GRu`J;HMj@~-<XquDd`+Y?#Sw3<!|Rr;
zi31(Mk8;GGmu1rv(soXbv8vJsFCP%jJ?rha7^w^~)S#>k!JDgkFQA!nnefWA@>C|Y
z;Uv;tQOGfeigZ6W+AO-ZY-7yqk<r%xU~Z!_2&b|-(n(DO^669iar&_Y`T^N-vIsxh
zM9HHDpu6rFqBqq6)v2E%Dd8YH3bQGg<244;w#b%^&u6*P+uZUF3fTeRL+xWgZL5mw
zkY!xt|I^Nu$3y*neI#2cTgh0mGuE1HS;~@q$-XaN3>ggBMwTW-Bz(urAX|j7hO$$Z
zq#8o@U3OAPijvfG$MSuC-{;le&-2go-@N#I?&scf&pr2?_j#WK2qNFCtYD<D-{&>w
zu+UKu+%%f`up8P4$*1I~kH@;#Ci!{+G@%zYY}|ynCbC+0Ur+R7@Eh@3u|m5UZZdM3
zs@r6{Sd^jATMb6HfSs3x<)6ByxwDH9X?>DM?!y{lZYw{aiQMvV*^(3pX}?GroDdfG
z-hD|`{9E5=5G==|81c{NqFC)m5JGC4)lxdI0-HBWy$3tpfS1yn0PS$zm~ZdxhN2bL
z+${1iDJVr#dstbCow&Qt*r2LL%x=w)K~Qj%VO0qZRo^4Q_QS?8>Qn~Gs!auGnHwwE
z%T?a1P4V14AGYqP<}+4nK<b4UxD;7n_2nH8#FX8dYu1I~V)WfSU?rp@;C2eUQp&4;
zsq5eswrZ`kdcOTvO5`cGC{=f=&lwUeAsGjH%-thG+7<P*&@>gWE38p^7u*x2{4}{0
zZ-7YLB7kOv7A)T}8}md;E}C@az=yKUDBq`4#Y$~U-BvCc_kO_w1KV=<2Jrnwy`H>0
zeXpDP_!$PyZR&6Xq3OW$K75)9Vq;rrTjR^scS6IJOB^_O91`{pa1RH?LkFZufrAs`
z`nz^8sK4(jw~YWrsaG?-39-t0U5EQ~w6X(DA<hp}X|k$2-ceyZAt8UCtht%f_F@bb
zX)aV0Kg)*6;)OZY!nVfGn}u^rGk}d$=P^+eU?l9&WMh__<W~ty)jwYL>Uu&&Nmc{J
zFdf*)InZ8dMCsNX&#NuJdLS*%YP|=Xnq*OWugz#FHut7_lOaBPZ?1+%$kD?dVH)@Z
z9_5|Y`^Cx_2u)SPZVsmCAvUJ(CNM<)4w+lVcdd74-wV_PiGKvC*p+S&lFB-^Qho!u
zOX<clz|Z=7XYvzziHaUF@bwuH7-8kGsZIOBg*9=$)!LAcHLN;Adt1$FeahvL7MO|p
z70UO2QIw*6{Lj%{`9cZ*y;mvsbT(Ghy4x9~X1zeb?Ge-j2d*1m%ctPlK{xND=&f;^
z%}FEyNs>c9Qaom3;0pIj1)R@$V7PDk8IIOXHeo)2BRF+B7}o*_UXv~1N5+l61N6NC
zK%cb&-moZTSw^?bu<Y~^qvS1Snq4f2fcim|EiY3HI$-DHQQBfYhOtkewr%44YmWY}
zB;{-f<}poS&YtUEIHmWQyGM=Et6@3cZQD>G_UBCW*#=-6D><!jLX-`$;J}p?cRvvC
zJPdV`&PM6ImAO4}IuvC3ytYQKc?W}u<HiLersMgQUl$}wX&(c+EffwS{0VUYnymqF
z(LMEyajUIgm$Vna*pOBa;FRHBg38T+Xk8oFaYr>j@IwQ;ZmeoPyGIf%K#W}e5~qzc
z4bYeY<>@z)&HkYL8wJSoB|NB2BiGCT^KY4-LSKu6n6UTBkNc64WRx9@*$&1DC(uDW
zVXA_X(oq=*u*G>sGhCEuSHcpm;##m``=^17;wI_4Z;3HI;Uq2ii@<F>NV=EeW5XB<
zv&cBJez0LF4xf$Z0neW2&?Yp3N2*dT*AJ8Q?dH!I`tVDPKzc&9ly?q~*<ElVq!=Z%
zYvaIOll)LHSCB!Or=PNrpEg{K!auZvc>V0*MQF~P?a=2wJ3oA!LKvQ=k|brL&K*dX
zO6EG-cDHx!#7r9B;RN<?&V6%1JQhw$sCR))OswmgsA+T@x2Jv#fbr|1Wpz$<^ECU9
z?cKHi{Pa}=0QjZrq?m!<yNm^vmR3XLy;pbiv?aIfV@8&P8U6D($zsH`fP$Neh+`6G
zo)a@w7hkeE_=9XszTlzR`p*w2m(eH!6x(Z=kdgrJ&Qx{%zCN=CY7DwgU5{L~qX3r1
z=LpsD_jY7>#bI!K>&q@h9@r&+lJT=M%T%&BGmf@#!8gpvz$2zX8a%?r>8?O<858vS
zFPcs%0Q2>zMV|;58ziF+S7glIT`a1>oZ6pu&m_qR1@59kmOXbk0c8O2LmW)!Nzvo9
z!ph^@3g0IDo;G$1J9N|LAw`3R?E=p`3odPriO%+;LkRg$p?xW5f1&M>T>dGq+UiMx
zSe1?JCa7w(%9^Z`Me|X*3t+Z-5X)E=m>iA2Jo%V+!7#%H$=s8iifC{)-W7fH5<^(;
z*aZxlWt#scN}H7TRte~KomS+1ahYa7i+lB<iLsbXF{5y=wGOa(#uIRoAta5U%m!?E
zN6Yh)Ia+x26ldT>FX_x;n@d<=B9Cq5G+2M$R=!6=H0)>8<e2f-YjDS(e>i!^8Ae~m
z)1ybRSu^F@p)s~^nS&`FUS>)tO$p-&NmXv##rAl;c#>jeyfwsJIDq!^R@2bJrSU6y
zLXJ}xkUzGq=k)5>j`GL8u6mAw>1{NaZ3|DCFATPC$Eb;ZgjPL$q*!A1D$<|-@~Q<(
zv7gn%o0s{!Xf@3h)u(S}E%kTAUqdeMt$=uz{Kw$bfR)fU){1xI3)VK`3r4}E9lG06
zIS#5jgx#^FZ02$Akw;p-f}0TYt%kQZXXyg~+eJ$Sc8f32r*8)Bknj>}8n|$a=)_js
z_2#5ovF=2qafW;U$r*kdaf!B1Z+gd7m;KDE@kOUm*iuOsTE`t*?z22?UGnClx6FU~
ze)C97+1ly)M7{bgeaNKjW_$VAmW{3r|B1bw^_K|O`4lbn+geAhITC#rDT`R`cvn*6
z;526%;<Zq?m9eF}Q;j4T+f=UBX4t>TxB66Hr9>ZK6;X0Mi7an>n3v-u*U1N@*F+{N
zPiDsB6bp9I{rO)#2WKfhHE|dbL)98MD&5Du#RD`r_9XJ&7HsMJl#!uP=vn!a)s!AI
zqB+Q|*(#V`G=#4^-F;yAB>(mQJky6~i93bz*(?F!n~Om}teFmN%D$fM-QwJDDSZnt
zMMqI1a|&!0JGZ_Kx)98unfu#jB>q#rQ7`F3ll^Zo*V0+uZJQ(up(VZ(AiQW68(wzl
zz^3o)e>Gf+!s=72dE+O3lah@O^Pz1#B@*)~3*c=lU)c}n)0O5vMi;IB=i5&wcC9;1
zC6qo6ud~5qeExA@4rjkVPaWE94AiBSb6>j;%;LckdQ3@{E;gm@k46Ma&H^Lo)gAel
z8QMD|WRz(-z~Y*G#>)N4Bnk_JspjKp1NTJqAJQWIR5k(gL5SEh9Xdk)`ab~}bUbWk
zUr`P|bvTNf2$+xlOWoN&?Nl}%n0I5riSU0faNniVC(WnmhBn8ZfjAuRKumZhWge4`
zNQFO+S_I}JbxQK?p`&-;BJRB!ZVZH{lX{){PoqjDf%)Xm3i$j<?1m)aL7CV7#{&09
z%?I=8H+{x`;!k4#zu!zH{b`WZ@+CDE0qsG;ZQLG4O*q{XUt9z^!w0UaGyiCkaxQ73
zrHuMWVG9|!nB`U;znB1S{(I15<MivNi)kJc4Ij(c?QQ|^k`xg8f~fNL4uFT|&Q*5F
zq>nrQtuz6$zZ2-4)g{pPC4A(n5GkL@e$hA2CTJmP@`S?5v=;#Na=vd2?}D7*R6LiG
ze8SUi`Hyt~5|(MI5h5YOAs`QzYtwP7C5(IxbcG1?>pUM#Z@#$=Ey$P!kBWT*T(Q$H
zt!{1(PJ&p3vp^yA`?~N1z-#STyg;Ps5AsG?PeEzvy$*90;RUCX(i-1u89o|!|CZyp
z_4D%@NK;CF9tQ%L0B+QR*R#(WvYsR7YP)2{@sULfullcEUj6lBxH);iK}$Cy#o8TO
z3xEjCpt2{gAkRgM`_)cdY*PYWz~@GHgI_ySzZ(RSEn8iD=n;kSVgTDOb7)3;0k1uf
z6h^PR6F&p50R=Mo!CC!~^G=cDp<lWt#cL;KNyGwEbbIJx>a_-t{(l2Ng0@NUPj#d-
zK$rqk?1K@euY<QLMzqOmCw$-3yQJ6+tpefq_R<wE6>)9pd;}$aK0ub~><Xwy`tp}9
ze{N&Wg|*3cYS%ZIq+1~fNfX5-We}gfiwCelPa-9q+`eurr_+##MMr=8if!8f<l$IW
z=!Q1`AlN?hS?{?OfMulq0`k21{NxsF7b(9L3i#RrSJnWKFyK$BjKn@3ocIpFRg?Z>
zzBdS__|6muw3Uo4%~VRR1(D_M`y~6L<s60bS-@Xy5JoGp+CMZ5XcEc&;QuO)%_!NP
zZMdK=yw~rolEr(`VUmZ;@FDNP##kNbND`>O@i#<TM91|uy^L0TGiW`*%7y+uV|w{~
z&xNE@vgdTTK;fyOXz)X>Egqh7OefxGJ4C=%<*jz#X7yTI5OAd1;dKXAzbZ4#<0&Q0
zz5!6u{|g99mlMx<y@i~SmA+|L58rtX?W*UPsD2vyIi;`V?N{ONR?<-*8&}%qaAc)<
zchx(}%-o95<_(w>5=A(mAdmPnsNw+7<E1^~?#<!T#wdVr=LX$Eez$_`9<$KgH8R4T
zFBfF#&@%eP4!+9BavWVM?&_s_etPIjsmB&<NZZVNcDWgNhc9n0-_qu4S6tmp)Z}7w
zxdi|t^$N>o_ih3vas1Zr-5q>+-vc=xCze{q-yh6~q*fmS3l@#rZ1#zLI;+C%YTudw
zw?@@^0zNXo3H(Y9)~EJcI`6lavaa4lA)A1^b~U3wDKUb0r^H`Eo$F8oNWQ$kbv*Y;
z((&fv&t`)_=7p%ueqhgLG{ugOvXFWdtj8wEc->cVJR;o0N#=mwUPufa(n|t4b?Jm(
zAj6kT%>N0Q$vao!LZSzn;CA2V{v8Wn*NvXN{%%osMMxdrk@XYjD{`)aE|t~(22YdZ
zE&BJ;hBaQTk+kYMr5Yk|)l00l*Hy%#T|ZK81FD$yx~?Z3yH}m#isZ${o9PVIkHIWn
zp;PQoRvfZ<4|5&for<QVV-v`(5poUh0xgNSUZ(WTf_@Q_8UB6R>U6n)S|QL~`wj;W
z!Dpbit;Oz@>MGoZc4CZvIWfaiW+j%fNWSGyDZ4K}6Y^*~LDPwaR$$UBO+UG_Yg?S$
z!6bu})>o&CszLr$Na@Ivn$Q^45x~Sdmu&a8tb;eoX?^%f?}dyA4vqsmS<U38=Up*0
zahH1E+<(7CVGd=aGD>UAd2W3jrJ;Q>N{=Q>acn_9i3J9kK6-8L=%~LZzS3a_P}%RN
zJ)(Ne<K0DrjeyE^<<p`eZPZK1ZWdDms2>-|K*+__Ml(gQc)p9>dByJChTggo8;gnF
z@UFsPdGfMsxpJSG+AH*H>-I*BgS6^w9VCWYNC02E&1`2KmC)gonR%4|s79Z5J2Tt{
z=nU?iw1Z7loNETLs9V*(Mgpni0)@9ef{7ZMkJC7UTI6!M%TRT!L2N4b-w6OGv^T;8
zk4Cq7?piLSZm{5_uAPDzTLQ2hnYMa*-eQs<9<K9h4zhoF0stlcqkfH!)5hKf9$v<e
z$;b36>s(-o@Bn`Q#tpR>QDO?R*is{ji|;q@c<EwmbW5H+>F51jrBa(2kf&1X8X8Hk
zL!siSb{$)SLHNr`NPpH-aWbm9qZoeQx?B_r$FG3~CeBu?p3zc-Fxg~|=p?K<onm{L
zfW5YbHjlGQKQ>F_qR;UawM;k^o*@6BP|nj(Q0Ih#T0R?KLL}3{J_s*72-vK!ux{Tf
zXVWv^&KC$klwnmheDahlHav!y4-W>E5;B~=f@k8P-gA!Tv0-LJeq_E|JH}R&1*U>{
zsFD$vVErQ3y26n<FrE^bV&S$J8Gsfl%n8L^pn9Y#NJLVvLnb;9HHn!s+=O`XzwJ7l
zztk?ZKPz=mS~6cwlu;8zn2t(7D|N_yOf#)^plI0~Evfc0n2=qXs)lZRm8q^SUM!cy
zuVHIk^k_a}4Co!j&cwIfn0mnTn%hAK=Ez+w!B~;VLg?Z0oXgD~<{WSHS(&ubd2&a!
ztUCl<+%W2zPxKptM@1V!=3KE`sFR`cLhIKF!k~>b9UH-${)~<KgP#Xpb2y6Rv>5!D
zr{B}!^W-I%-cFX1pFPSZy7XzKn8f2~_SqZPg`F#hrdNQCX5K-BjcHv&KGQm(z^sCj
zYHgA0l0z8tFt^^xbsf&Ts1TyO;8{>f$438?aHaQAA^=*G&)6GCuvh4W8@h%Zue@5(
z0o2LKdpt*96KO1C4_TICcL#8x57vr~?P!l@(!8%|3Fb*fex`*nu@D61Lj7j|4@B&m
zTv1A+tJkRF?bdma&ypyP2DbJBob7Ch8F6GPUPENSNk#6!haAw#e$v<uiQ<Or`?xJ9
zBBXJ1pnY9T5(DIn`??FYdDmkeKDIoXCl!$@MQaGmnX&-Sy#C{4C6ROsl^XU*1vM88
z8Z?CA_&|kBGot0IcE7<Fx{{5i{O*l9Kt$S$tYy-EFS;c}n6krJ7g%73Q7IsUjx<I0
zp%GA2o|t^|q5T$!kL`Ld5VlHP)PLZ3Wr#To?%Ecms;|~~lWo^h^l%;rx3DO!RE_MY
zPJ26md4MO`5F2I|L8;dC-1RP%%f=LF-ZDpSZD>hJ<2aq~oLu=T=r-qxr*E(u7zM!a
znhjNLAQ8gmS<BumFduz1lhyPt`s)iZ47kK)npl&$1aLKUY)LA*`k<OJ8FE!8s^uHF
zitfG;cJL6EOVz9_FEJt#*GrQ7_%qGu++-3vyi>#w2uudv9x;x$08Feq7~Hn(Mgvz=
z+{&C50`Yrgqgm$yTy<QRYj7<uPS0<%qV~;Q?6zPzt`b*lMm!F(99~NR^wblj`gY*3
zBY7~B9>{xgaJoGuYt-S^=gECu+2~E}24tr)gMlMX$vbMMN=3y@8VS#{CC5g3aI&wi
zF!2(~T_C87(Nx)pZV`BEwoONLi&r|b=57w+Voaae{l9Q8^0TCm@ru9Im;yQn&6SE7
z$~r#y0C4b*j`p_unnM{-R`P(6IID8waXj;JJ_p}+0d;Mws7^rqK9&WT5mLuAE1f)V
zQWI>A=BXS8axq22h2<!m@n|)nZjxN*c$+F3+zugK{uQC3nZM`G{ub_NvW|)y3(?vE
zc~McF+s6(zQGr&fNp`fm4<oy8I+B$Qx!k7Vr$o})nW#**?wb-vN_}0(WR~&yocHxJ
z<j&J!^D&99QR7I~C3Y4_gSSghKyIcPWb|3vHQ>G!jS`@67Q-5CDdI2S{N~=M(No#3
zczN2U&Bgj!W<W<bHu0fN*K9p#dttm#xRh^3%wyXWPeG*_E6=z@FOsNDIUVQxr&Dcz
zIh}%`#yxz-LC$3MM(i!np(dG`;MI&0fk$9rEWC&TS%jzc%m+uw9p#4xRbo22c<imT
zP3IRQR<?t}@Vw4rp94(UUmajH@=+i<GAN>*CxtvcbCeY<uvVB`Pj-$<5|z1jRNQWk
zWQZ^daqoEam^d!U;>-xRY4SQTob0=!1SZt=;W&|g*3fiKxyyh@dZF@-F_E9o6CkBN
zV~(%xdZHo~{qZW3+BiXhTn#FhRr9v^2Z)~IQE}ptp4doV_tdxmU?ie?$=-?V450E2
zGTn1!zdpkOcxU9-&Zo0}%*`aOT#~FVkMj}2ldF&~jevQJ#DBNdIlnW`fa)|36N`#V
zo>c-J*VLcNf}%Kag?mv;l=~N%VK`VBUD*1b*BHLY)FW{LAZ@~suL!nyyom7~A|ie2
z-2D@;^Hdi>)rbV74w`{KX52PHp1BrXXRI+IxxQgy*_9Wx{+xO)VhVV*z1fPX3CTll
zk4N3Eog#bqOk1s{!=w@|74lj_fzAc<sJ$7JoS?#!>;EV-kwF6{yb9uhifrFIJ2QpB
z9+4p*iVEn`QknbcUvocoH6RLmr1f{3nrq|Z*!z0Ue_uYs0vlr*gBT}gS-K-|W(;ma
zaW0R(;x#O{k_2rW*0k#h*SMht^RzA)eGYK}Is=77!l;8$P@6yrGN$0Ci-nNbOogZE
z@EA-<4k}UVV;42}g~z$Ix?;GKz1{K4sVO*H^%bbq<yby*Kr;6?3Y|Rp$g^R_7N%=-
zdoL^`r}1Y3QGKQ<JCOpM>TGIS4v_8@I8t&hrq4HVdIdGGaZ)E~x_l1AoN1}%S|+;w
zQlc7Opf;lOc=H>Hr&k=mzLnUdU5<24n@o^%iWh3n^phEL%N@XDD{Uh5202PqHEsub
zDAybRG%>g+VD`Y;Ys^K|YTG?KbFoc8)S!OH7_QZ8C=o}sJ2Z-HS9?}P@R2d7UK$$l
z*HTzLvoa?<2}<s+bx5ki>uY#I&LKKD(hKay_;$3lT@o|R;B#j3<#=yP8M7j^$b$1O
zL9Q=(<}fTOU<7v}LOMc`WR8R5!aP&?o0C1x5hTZC-`#N5Q#&H1kS+jBSPSHaOIKab
z2~icc+{eZ`BdI4H849c@K+%pFhA0sK@y_)$$92G!i293Oq2op|Cz)<9elbr=Ths$n
zKG6$Y;IRsNTn0U39|pLIg}u5-QZIx{f2lPUpIwzoT7<uS=}YG$!bxDkD7??UZJY#2
zOX49|_JIRz?{nh`v0@n)XR&OJ6fWf)^DTT(odK0w$dtk9yp8{Gz64q5Ss}{*QWK&u
zscQlg3B_pAC74`Ql9oo(-~w1ep5APvFMy4ezO4M9i5!qfsz5~gJ?Ui<Q2OE&ez?87
z;i>%iTS0qyLV$PJske-q`Mk?62?0eslO&2;vaPSS>>II4e>1jpvoqd&FSiAZyG6++
zIQ%+il0m?{lqA^Ik>LN|xWZMEuYnshc=*qs{u}dg69U?_ns@cxAJCky{NR5|R;w%r
zKIgx%oPRqo^+MCNmy*MOrgfYL!TBCnF2635L3_b`+an;Tc6p9of7sf8g>6tTA!h$e
z;cFnpkstB_i`@ftl#kJpy7P2<zgZ<g7ioT*pfO(aYVA0pAk*C-*N*Y3+zcfNDIg`a
z2E`(#<VQgTWOo<vMx+aR6Jly7QR!dCb2RV#?a?*$$#wqMm#p7)(nFTHL;NuZkwTbq
zHvm}LgO2iE+Pf>NqFV}$awkCc)-G@no$mAv^l=@^G$>)V{44xu2Fkg3m;{D<0Wx8c
zY-;kWDR5^@s<Yae1_I!wzF-HC@~i^o`45Gne2FypwMHK4{lQbiY6g>e9YN?Wt(&u&
z_ifz!axGw~4rgdQ<xK$icpA?I<XBbOmHWt=(Fs7|&FD)7!+PlC+9i_uiOIr=4BtS5
z;vunC_ma}ZJ15VsncD{~r1f+2?mJgx6x)IzM>Mq={q+x9H;(~UmBSpu`v)8j;|*ZA
z4ZB-fC-)<$JrwQWr}7EXDjW$su*0wX$Ke0}!T%TO|EALXrY2{}omi9b4g{U($+XoC
Ku9T_%74cuDm;Rps

literal 0
HcmV?d00001

diff --git a/docs/source/assets/kernel/key.png b/docs/source/assets/kernel/key.png
new file mode 100644
index 0000000000000000000000000000000000000000..2059b608caeaa7991113bd0ca05654e1a53d979d
GIT binary patch
literal 111314
zcmeFZbyQT}+Xo5=QqlrSgCIx^N;lFWA)P}BNO!}~AT3BKrGO05J#>eX(hOY^LwCbH
z_|4xN_wT#beP^w44(FU*&))lq&*#~Ot0>9fV3A=VAtB+&$x5msA))ReA>EtAxDVV>
zPemU`Lc%Jsl#o!7laQcMak4kJv@t_Mk_}HxMpswUA@bk#1~uZ-&<<lZ%Go351ph=~
zl0_${#l)A#2>5KGMWR)jMHko;Sd7I~!z?u(_|8P{A+{Uoo7XB6oq>7UH|P|B^|$A@
zw^xS){XX_D+|74{Pcu02keGu&iMj1PNN;4F-Yg=E6i(K7kU)@H{4tnXa3>GZ;Hpa6
z+8APHs<#Gn?)Tolny(~_Jl?&%)p|`3QI3WrX3WHhRe|=HiCU(cB|tst9#T186*+rf
zT+F^y!s8c}VhPkSjt8iC(T+=erX0&6af|v1W5_frXFfVFkRCo69q1vZ&vs;LVw#o@
zHo@#!7u@Un{{3NA)GC^JUhKnPWoKI*yG!)9Z01>XPct(M7s9Keuv@j#*c8`5H=?j*
zKLzPJpBG`60!+`!TO8CmhlJ}>lO7iu$t@NBlq13JzV7CiqI$&rBhbd~iXQvCsWqZ@
zqeA>CzueZKTE~w?DW!6zX^=_0*~i5QeYqFx7V+oh-x>}onL>o6I@CG_6~d!FuIM`+
z70L}LMPg(eEHZDs@T|###L>4{Fqbr&Y(Iw27ismW_4UhzKTt1B7u9H%ZTlGUk+L99
zk?G(#^&njJOu^abVQtQaF_zzJFaNfTbn-7LsX<iVzttK?2*3J;9-{aH0~y&LjqWAa
z1$lsoK0fXi@^=C@GQn*B)Mo~)M2ygtQoeOIqB0M{fMb`W#-he2NT%*?OR^1SG4oRI
zJUx4oHbSY)j&^9Kp*G?7kho^=i3JInaf%O$+9D%GlcPH6>;&6x2_Uo5Ad?v(;e5t^
z5b(HEeADUvP7C>201bY`I0gFZJ!Dzr$71(M0>2s&Ga*SCaZcj)h@nkl;|8`bFug@#
zY}Ii<)<>RbwcSHH@`vo9JP0^7dKgF}BpI^){(cVnV~M2q!X2SPw4zL?SYb<_QJEgc
zi#ff0R(YQ}FyhOLJi`1?NPv=<#TNw?QWrc#$o((8KN%kdJma`G9-wGMo`U1NV8ln5
zfzH^j<3N>$XCJJv$b5!BgOM3h(gItgIztnRLJE%JAn@(LGJc{@|1ex89{+JU#WP9y
z_sluCmM;a~r^@0T5T3tlHC}&2(uK7ggV`ZzjGDsQ=hWx*>hUT~F2P7lZTLp#1LG!>
zZ`!tn4C6Sx?Oi4x%CO7OCJDD7Hup}1PI#)RFR8-XpF8IAk?Pa1<0ePw1?t64M;CNg
zE!8>r)))&>l?1>#ycZ<)eSC16keZ47F#PWO-J6fS6{8_K!Z<<o6Bl|ZuC2&KJBqJF
zWQU(YARXQ#X_`g;Jb{Ux{iz+@&Xa2V5`xR{Y<lAOXFq}-%E>bn(Iv(sWY^^I<m`XE
zDMyAU3Z=3>Eul}2k9`k{_x?fsEdL=kR`QEvI;c$!#pg=Zo!4`!-_(indATz_NzBR?
z<z{4y<U&-kSaewS6N2Sg5_%HJ67858vj{A^xNvPn*kU!s1*Pkyy}!Lu=Xt%#e3+Qb
zlFX8+(v&S!D6UnehN<yGMW;kH+eyw$jJU)q3jt+T@7J0w@KUOZjrobQp<n0U6xtLE
z$(S2#dH(9LkTi4lJI9ovACFYkG@R8VHT7~di{0dl!ooQeQ-sU~oQV#ZDpJoaKMimV
za1B+ujv#VpQ+Tq9zwQ*VtAf-^w3>@<<bu8~LgRAFq_X9f<DN>7h&xxWiuVhDTH`jf
zdR{qGg{m8(D^s;&-9AM!wK%0vd1_@iMm0V(hWRrNu{FvMJycEpwpo;#U$1ytx$~LI
zU{FoD5T>{rHRqf}7TGRukyt*ZBXKcx5pdyjEq|?a{ra9-5L3|D{p0(w59C5^+UeVE
zLv2GPq`jm$qm#(S$Yyz;l2t^jMY%t6e-xXN@~I=GklXUbV-5}8UmsULj(-y2%Kqf}
z(d?tf$JrO%A1nGt`rK0uVg77stg5U%Y{fclNdrkHy|YOMBV{87P=#DA#b#5fvL)?b
zx?{S>x|~H4g<5*NI?d(J%fn%ClM&;?vGE@~vu>H&_XIWSY3j$^*xc;5*tXD4ex1N5
z@^Ei(StveHf(3O2A}CJz=3KJ``(1Q5M^}$J;hj3G`fEb#CM)p^<-NafQ(JLOODqgs
zI_oZKiXDVEE{6KyY~9;=(}p{mJC-}#JNGzWS-dmfN*!7=UT8hdJ6hj=<N4I%W(B_d
zb+_|0|G;a>cAf2LZudIMbU^MG#ZtpT*>T)C*IAUqY)x-XQPVqnfBP@?(9C?p1jCQ>
zrhfNs-`winenEozGawfQOtvhvum!k#2nZpUeByn$LRiU~C!9h^L7-1^Mc+XuNizI`
zDC04g^JZUIMU;h{Id@2)3V40|r;Kr~v5s-`KvzF*s)5L*WsSkv@LmjtS4ZHxPw!6S
zMB-BEs3kv2{m7omvK(^GF36TK@iC1s^)j(FN$v!Pk3<o5b9XN<XD^?2;|=boR`@F2
z>O*VMoa@&<4(M_14*T?<ZyYMKeP&w#@g?~yzLU$7JL|*yrL`YM<QZO17>26firb3)
z1?G_Ipt3^COd_msAj_;oCVL_~_em&aDN!~4K(&uJIouF>KUXs+CMP79QuZ>5>H|3Q
z<#*Zde6+6<bL6HtT{%h3+~nB4q77%T!V0%_B+8B8H#SV(U^i#nd$uvfJ^d@9-@ToM
zoqL@{S);%nHgP?4Ke7rU*+@$Cu2=^}ch?wF7y=n=6^@gOb*Jj1Dhi$WD}@|Ar?J|J
z&si?C?iUKxL{u`{j@cL3YLS*eU{B2z6`GW8dajzNQ6PD2D$MVRXLL53s+VN?Vwb1|
zC-@7eYj>=_l<qmj*}j$=T00IrPUfAoWt!cyH6896P#j1-vFWl+ve}tis`_PAChrJ#
zoFD(FM-D5iCbsj2`L>=YFvKtr3(5+v2ztp&$fvrKZO=3uI_t>8x1BN%E)1sX$G6Gg
zY%{*5v8NezSm_a25z3<Besy|}Rp8oYsv6~_FkbiF5#OOp0f_mnexsqY-_Q*RogUo_
zH<a3@#O+&g@lY{b9ebk38Xd1+*yXx(cX3sqpsvTu+;@n;K@zfZ4#I49p~exJa*OgU
z+^N~$H^4axZIjM(bX-<ZlUGxA)_0x=|4d}dw!?<gtb6OVkaP3nK+Ouulm0Y)K=gY1
z)!fHLP0P*RiXRr2QzVtbe5BqxnwMKA1Bb0^An;zZLvhfE<7!rS=ZEAO_Vk**rWu#c
zbNOWrXaP<$|MmFKjTyK{|1aG`*NIIw1I{|7`Mo;Flib~YY?ueEcqYTOzCbYlawomk
z-D?MH7k5UX`Sb#^b3KI4LTy4#BBBaWKhVB(9NS%Oo;c+@O+y@SH(t;${Rk3yIUgxn
zetR~Pd7E%ix$<$Y0)c!P@FD0XXd-jabM99B!sN)THIvzI$II*lab`H@XFwT2iST3d
zzTWP?fRWkbH8}fB@KEqvo%wcM4#l<Mkx}D9-uP{uJv`iiueS}J4SqH3Yp(Rcxw+WW
z>w?*<HbsffVbQGne~3r&a7IG1qCh&@M0&sa>S%NsMbQU6is(#PXvjS_k`u{W5BZ%w
zrWi)Pd)5>xC6_%?*Ho6QJ(3_FQ`ehY!MTFiaQ2I$pI!vp6rV5M^fUwgzDrWy8qD5~
ziV+;!Ul?js-hcQuwW|;CU?a@5<;)cok(hyN3?x)!G9)zM3K{r{Ad~-nEse~Cbno|l
z6eOfDOC;1k-%$d7?>_H=?_HTce(%MHBB29+5dvS2&nSPrjk@>w-e1@ECV^*2FV!UE
z<bYo_QztVsJLk9dE(s=F6u=Ek2U#6J0wfG~Ut~E|dcfsIp0HHccF|URC17fA%WnL}
z-o%XE!`9)h9wZ?T0pQZs%*B|-!`8;mS-?Y>?)Mu4!1dj25FO3$S6r-x>9iG9Xe8{N
z%xHMnIoLVqM6hURXoQ^Jm<y;%O8;3L_)D1Xt&59;00`vn?#}Mc$!_mt0ea5Q&ky2w
z0ebO*4S0ji+0)L&*n`c^`N<!Z{8f*nnX{>rrGty5y&cV6y~ZZ?t}eoKbaxH?{rRJx
zW*(ORv}EV}=dgeQg6^Jxp0jg+{;nG+Ds*>OK*iF-%tlAj(iV^z(1r*<AD7VY_y60I
ze_H%kN$r11K7YaUA4UK5=)V@#a5i(2u(t);bP@SyzW!|Y-w*#RC<MA2`oBc+hn#=k
z1q3aEB?S6=(nPQX!L+f!JW^OnDysv(z$&}@Aa4QxF#Yj+cRkdNR8?Jxgd~O}C;3v{
z19>YQ^U6-G@pj@lWx~TH8ag@259(w$xT+g8jWcz{T=TDZ&1xh&`RSRzyh9|ssbNwb
zh_fSPZ0B}O`0_?*^*lJL`{zx`ph(fIdx^!a1tepBZtxIXGO-JTa{Ekp7g_99iF)IF
zAl#+EMZxz+LZv}M#t=jLkIOvz067egL{`$M!2j{NznDA{a%{wZFZjE<04yT0mI8-G
zBeMUgZr~OCy}Wz>Tgx2Cs0~=8xVbO?vqyjT6FFAk|C$Bl`hP<HkCpj<OX|;Ek@x?x
z4J77=akWf+>i}xl6DNT$MS?Q6EA9ab=Q9FO1NmU_N%ViT&ri;P=G*JtopC*T*VB2w
zW<BE&;u))gqSf42ABUYA+TV#)gQ_P@>^8n8Q%O7dDla!(?l`lQG&s#KGl*ljh#TEr
zpSpKbc^7(L?RQrg!R8_L-oIE3KT`hDV$WTR`T~>p|7h`%Dau-!b4;p;Z(aTQYO3?Y
zBwed#ry(GpR^JT;L#O%k0rwflZpt-!eALsriN=BJBdDlrK}nMrU#D9X=8VU3^v<T?
zZQbP$WyrADV*0KRbo$K)#Thxq^p$G3dFs)xrq&VE%$k!a>sj)-z5B4f7riyjH^(J*
zi5i+8jUojUZUDK|2%fKen1x(!ui*-Al@6dRd*`9jJM01j+lg0T|1msvt!V~Z%kUn*
zfE0k;30qCW{7H!xAEDsqy#N#?b24uApNg_8rDZkMeuuqgAy5?sn}pTj1EVr~17!5I
zOs(+AWcD5zV|Q4Kjz#*f>S??BYR<wrPnM!e)#;nC#n-N$C5<QY9|W$CMrNCDFPrh6
zz?*WuEJj4K*S#g1C;%VlSE9c$q<b14fX{ce*FFn$PVW({$0<&V<XMB;(U1(Z&2X_3
zEB$I5*7Pf(8=cyRqXG*VB+-hCN~3ZQ1;c}ziT+jaAKNwRNx+DbVE=Y$J1&lnmU7Ol
z+cev@#_`-~)5P@&thxW(^DuROOG)@x9X24axia9)0Y)uuIFLWBhBdM!TaKmo^Gx>%
z@3q|9Ip_rq8jQ5MP8h6>8~R;`9uj#j6&POU3jz<-h#*JV)U|^?tDHoZN-~zBr!&rW
z;S5Q#?e7{xQI_)+7DCDG8~{NS)o<jczIdWx=e^tLK1Hr8p)!^_R;SglM7(o#I9M`v
zeNtraTTqY4FBrC=S%43}p#~%`07yPR8UN%j$)~jp2<~o~52SrmP2sjqng&+vOpe@#
zna37siy{Ycu`BCuC$}ptN1WES0#-$aIV6uOb-lN%W`W)!+Ke@5U&Yfa`8mm}`IT9B
zD^r+f#n{*WB607gbgzMCLZ*u95cxY*CB8)?__Z%A4s+*t3Ew;n3!P$v%{bZ3x=y%)
zyy3k)folWqJtCwv?e}n!z5A`f%XanK)vPNee%IEgO{a|@mi91e$mDj#ph)ePs(#c4
zCHOPrW8HY{W9co1n0H?%E%|I}lDwjB!kbZra2S$Dc4<iVdJtcu5pau4#jvc%uwnJ7
zqScYe-nlO5^?Tm!<l{cC)sGhC7YwxPLo(Efr)ezMsfSzv=!2rS4IfHfPiNhsjhNhA
zhOPx4XP?*CUD*ZeoR40;ce;3V5GHy>t0j}j8$RQA-fO5TdOGKAzi}0s|0MsAtgHB<
zy7fMJrEjy!%Hi6e@-FX1Q3R}`t!iAiz-(3#=kPYe3-N7K30~7hW}S2D4G-`q#V89w
zLbZ}<nGbgS+pG9oj_L+CBYpOEBa?JpRq$G07tU{RxFojZt)8&DsO)#4cCh2#EJ*sa
zV?!!ka>8_NEl_?L=ce*l7=_dR<`>7liLpdlQ?mRj<J^9m7KyZ@@WJ;dWyt{)w@akW
zvf$f`3EoL&yC!$V16cDlTXM1AzLLmES?sSXzr7&pS6A3BJ83B%FQ4tVy<%?=UJq@=
zneT7fP5fAKIII9-i;_Oqw{JXt(R{etZ@oA1Oi;>UB~IokOL2_}NeQVqq3Ar=jRKJ{
z;Q6wIRuObYPkr$-jO;4RVgJNA^HN`eLSQijQR!oHE1>Cnc0TAghrd(SO{D;6#>mi0
zJpPsUqa~y+WQcgg&@SY(o|PR`$q4ywBS9hMmZxOk+|R8unZM;hz&GUEm1J6K=y?eU
zA~i3qu;6l6+Ou}ExW3#;t&z>?1iaK)b{_<;QEb|ew&$7=?Nt7?9ai=ff)yO<Ah76d
z`4rOBIDC;^=l1g@dSa*+#RVm>Zj*FG#A_`#!!Xu%Oq<X%0vGPQ%h5ppCg$b8{Y)`4
zOl{=BLCbL(*|Ry0@Le5stLEKS^)osPLqz578d>ks`*#F}c-@O38d)lQr1r;jhAZ?~
zy3G*TH*v70jKjuiC(Eg$_9k$CNz(;_)nMK_*DM8JQ?GHo+a#23Gt}2y5>Jq@GSIgv
z*?c7B3Wem`d^FEgUaKMR)<e0tpO3emP04eWDFtfAHJh)|GwCsw>6yq>aMYbF;`#CI
zH};i(Za(j~$7*L{uPyQU#S$_hdI=vicRce%$G%R0zcB5JVznW5Q%@E>|Djyn7bJqE
zy<fl6OkTFD(Q+uPq0q^zcSAVP-Z+|c9?qCC8DGCN<oxCP#$)3V%c@v^Ey*}%e19<w
zOqvCasyC(BfBM}43I4=;OUciVjSkui<>kJGy{3>3%#=*eq7v8aP*twuE2MV%`1NbD
zPdIg1rUafa*CEj^h0k6q|1HFIU{b*_mO)<klD;i(!szwQ*vR^qFsh#6c|bQh8%qc+
z<@$8fB^nyhVi%-?C)el@OD!i&=lijKGcWFs(H_^77n8aXnn!xsg*SVR%k4c7sv@@D
zeA9a}ZEvV8LmYOk<|)fmRV<5?AWS&ERGHZ1^#Sd4<%9B5gstWF$W4skPKwvQGdhp{
z2b}8UY_H`FcajMLYxhYlJZ(`)ZhFy0z2gUweiJ=m0~@d>^=zp{usH!Ocf?gafe*{V
zv-xgnKcDtYGb1TJ-?LI--FuAJRFPh;`668dC3R`bEXD?nKJHVN(9k;XLE+Ptz)cAy
zSxy^<@_gR1ZSF_KmXZF+65o*lL*ob-8e$MlU(lb1Mliq}B?ElJ!1s_Jr?VVnMfh-3
zqDw%O8m#noNFif8I>*?E7B~DMg#9_WP%U6^%Qp|Sxaf!OzgKZ-K}M%ngDNS8rC8>{
zKc|J7-=D4;Q?Oxmlb<PvG@W&VR{P?LPU<pSMBHajm@17gTSbyp!q7Eyn8~uHnCl>=
zkzxDtNae0$gNrF;iH&9h5l!1ClTn@V*q{au(#BA`O_*u#31Bq(&WAX(y$vqO>hk6T
zgi8ceMlV(4Hcknz<d41uMfwpzehus2TwTTeSQEWHXEp4ZuU*SX?bS8#<LwD1-V=v&
zPwOHpQ-cZ(aTs8(5=-kx8zasTE=gfGNaExQ;0_FgBvNBepTLUdvc}<FC0=C81x9|(
ztxBju3h#aYyASrr8Koaps4@5(DR@ZViPX@?aYxGW&NuKT`v!}<j$BU8BTX~(pq*Hj
zZhT!ha^<)vrLtr=6WMz~dD28oFpAyTB&(-4ip{a|k}i_+#(Mc|YNE+J=<B6s4wi7G
z%NOb`a>C7p;B8x?(_u?SveaEWxDF3tZy^UOIsK~6j+vV)^Ma+U#XBv%ueO`slUkm-
zeo{-Hy>dv7=^re{@UN!4s2DkPeYiJwa{{x_SI6r~^9n)3CA282IEE@UlmGfciaZ!$
zU6&kOXAaBf3`ng$X(D)p0Dt{biNMUq106um+8`MAoz09H9(1I;GquhVF16cuerN4z
zqNoiNRJcG?6>%53dLSE9{Smqv&0uz*Oam%@Thc_|BYZ@YPHHPg<YGOIdb1f0nTb?t
zlnwL?e~but!f4EtHgU35F&Lwm^3zoC!9jBKmDx8o!mT4GiUSkxQEggr=gv;Mfi!*{
zzDv3}(21khHkBTMbR@eA;-Yl`%OmsV2ul9yt=0@ey`{p8u%|M;$SaYUA5)+j5?r%1
zFZX)V?NIxn8y&EV@DHFxl&VzI%UZ_$SMu8B#l^tjTd_Rq=D$stU0G~O)EMTur<+ae
zK>BtrHcRaB5j{)gC>fMfY(tHGauk*Ov<r!GOG3b=kpec21}m}6UtvDY6C*SsHfH>p
zZ_tcS-UJ_vOlEljWjNz{IXrG-*eF$~ru=Lgd!uGKMgcT!?1Jz#@h*n%5mhw<4zJmH
za^6TE{0dGwWf$@eQf6=NTw(2oHoNP)>)qc#0^UTg4=3DXuiq*!oJk!!JkuTWc@s#4
z7x`eZ<2(C;3Tn9>3Yc{VwXBKB-mMj@pV-3SZk-1a@AZVbO;Z)WY>9-2cnuHml#b;8
zI026`7ZUGPzE#*OIe(JM&muN;X;8tTf4BKn&`>_S32ufj{$p?$Fwf)`XHS9PVwC4L
ze)UT7;A1-EtL$2s?$_=cvypTD*crM=)_AH+_c_mj)4txTi!sjG@?Q2-%H-{a)5cxb
z8#HeB^GVZKSBTljQJP<?%Uzr@Ten%9dLF*r!~SX^=GRjt#xw`<FWkxcPT_~bn&s9I
zun4c`!OGED%RM|7NI<a5{Ar6b_)+hbr3}TD4`hRX{@&pEV$B9b6b>bDI~&A9cZFz-
zawYibxI;wPV8ik(iCs%;T~3fW5m-B}V9nPg-Jav1+4Hr`-K#Wx%TIuLJ!~&=-tca1
zXDjO<{1`3Y{5`z6=cUs5fdp@4gW1}Zia1mV9V*ngp=gg4_Em4p;&S>x0+OCN&wja0
zjCRxrow>c)y{+S((63l8x8{uv31D1b(X}r6b!CEKiKv750FfK0vI9GHr`Pbt$xE>r
zS#;27wFFpgW(zy@yKV(V)w-WZXVtD%Mp+Gw*Kq7Tf(|1UbHczazoW9c=KU^m!FeL~
z*;D7AUz@Y>b8RjG?`iTpmTTje1TKGv=RW6Q{sq+g>}jga($x{_0JhJD>oxdSga2AJ
z5E!`l%{6!`S@w9o<i73))vm;ElwDg~Z#0nZ&9E4r%WsANDhvr_j6nsBn1KAB$$pGH
zeNVZOwQyv{y_&ak5OPwnRUX=WPNIDRHLi{oxp?a)yp^{$1DK&tZ-;NN*=s(RGR~?g
z3u{bqN*2(&4PWjohEXeVo!4pFP~=nyw{*NLKrmB}TUM#`mcaGF=?}haGkROw*Iy#I
zEf2pYKIaA*ZdFcpF^cSm7sk9@FVPpBg!6el7T=VL?rQ>KuO|cI0`OboFv=v4I3B%z
z4PmJGtX6Dpd592|$3j2{cJvP*EK4jP|D^n?pdgg5ats9-;b=kXHstB3S<*~LdfKCk
z$Ff=ZE)wFtl>G(KpGNw5<Vzx%(>Kwv*_V9N-b&{qh@O+vA|cP0qTSNn<8VI2QT=PG
zy6pgHsqXi#@yD|zk*jE9Gg<kZ&~!Illg(fQVXt}LQ;md=Bla4u9XG|cctVMBjky9}
zy=>ACaxbf@;;1~5@Gk*3Wek%exEjF_?E8mf9q^ptGpRR=WN<QjV~!=bbb(7$ttPi9
zv|<}DOdDo6VO0~AC5wh+Cf_`d=n?Q_5rbH*ga;c}3+SS6-znVqU2htqgV<n%l!d|*
z`p$~I9J5|Ov5QdoQ!_d!Fg2TajqA?B8gs9WVuQ&9WA*nK!Gs(umAX*vFzYG#qal55
zr|UPF(vyqLSsLmZCuCqK8b;+=c$LAWF>M%l(sJ@^P4}^UmYO!2&Y^3kcp#$AK#bvy
zR#s)z$Z4Je;LbF2;W<*&J%xJS+C17WD0DF>=;~?m$je$v_+UhAsOB+G#L-wB!nRc7
zT3E9jBJfb`9QTGH2jLE#hlXx-McLIXhH^z``8By4k;ERn$Rkc;u+mL6t4Bnm($4ZO
zSqmi5Fle35NB1zW6H!t_&K50iD(UPSu*wkf+IbSx>Py~%ZJSM>y<;TJ#{yKSD>W!_
z2iUQ4tg)cqu;{6}-9q@(d0u{wfoi<BW#;TGxFBhpELzgIatm2E!qdB3>Kae{{k5tH
zTdDrSYgJL)s9s#Q5uXIT&!MtiOq(6Z6cyd(e`4I4VP9qM?K+wD-Uz=rO@0N@v0XW$
z#IIL~50?|#z94?m$Or4GD4zDobn1Eb)p)ex6+0vj&oVf}j<EHjvuXCK>!wF?!l+;t
zAvHhXInGx9B0$0N{7d4ua3HMIqn)RThx<GXryIAcFF^wbj_609=oVblc#&eluvnf;
z+FcxNeBHl~%s2VjHtxONo5&?CPywBV)|Q6$*x*ZnS+xk%xk<ZOyux+ewRy!pzJUU-
z;m)-!NBo_!isDx`6*uiosErF(DyK1Rp?q7rU3lsCRu&jU_PruAsMC|;05Y71a)*8-
zpz!<!T>lgk6ON4lqq+_ZS|lIZhuLk^Wwx`r`<`sUahCM1P@r>Wx<yrQziYi?C3zB<
zLR<`+vdYU+Jk5$^w<68~O~eWy^4h=}<x-7hM@u%d4W5T&USvN?9-+lCn19(sFqlbk
zbizR=<4sB@49sxj@}%YyhSA9{6OJ?K<!T@<$B2>Fc!Hvt+GLHH?xkOZMu~n5vXC%l
zQdxJ7yf9YW$a@eOngDj6bq&{CiLx#0E>|#n>=k$JIZp<0C!C1kTY5K2vL6*<(+Q<g
zdZ`Zk`2M|KHGXK@H}q80N0^Cb1Gq9t3u>byC9F2EHHNqF#ml6RJI@hg#CSm^K-4Bt
zX)97yQjUl6k&=w!DqL#4@v*iwhVNAw-5H+V7;VT%spjUWH`k61SzBH)$?-y@AFnP!
zLtYGXM%H`~q(o|jMZ117><`n)YO=Vy!uQt#lg{+F_uyBoDMrKLay)nHWn0tKF-tre
zY|g3cB+)BmK&^ipN&@8I==XAzg#kY5;7$Zfu`HwcMDTgLlWs!GDXueMQlW=*sd}zs
z8tG574I$^h(Cx%UCr_<%NZeK61-PtDXG<F$$4m1{waWDM@9m!-NuZ@8Vo76$L+&@6
zB`Qr)lh|QGmW!yTKCWIT>DkGV4gke=toH*-jpQhD=exl)j$joRNYf9OFZl2#0B#dx
zGx^!$88COA7_DXx#&^@Rt<or<@vYwp=|6JM?xZ9!aCr^8Kokgq=xejk2rrV>MW5ui
z?dB#6o%8diBhn!5iy1^awcs<B=&8u20qjmogQ4B=NJpjSvGPvU2)f}cQZU6UKhBGO
z7K1UE(h^fdG7!=sp+1dALYb2sX0-iFdR7t(UF7yICBEC%oZeN4H`)Q*f<FLI?58`*
zW?_XAbH~|Syi&o_zaryhStMk83Wanbi9flIpYbier?bf?rH2yIYRz}zO*a(gnbq2j
zyK@!?N<K}#pb`Rhz$}r<AZ$!ik1bkE7(8}J{3oRPNCxokmbDQpkN*lXb5XB?w>DI2
z0f?^jTQgw#T5Rh!auu%~2vG3XGyFq+?Ew`x0D8A{v*vdLy?lp(`3F>*#}90`Poy-f
zfA`D}g>VmibC4|9?>E+$4nQ(HqBqN;pF9@<&gbV7<mTamku=Yu)r-yNj|2ZK#H@&f
zTr%7l^fyTJ>LIdXRxxb_#H{~55!c6S^7%9A(~Q#wXps~?8QlWvzu6-J8h`%@WY&he
z&HpzS^v|WyB!KOPDyAL%?FRf21^%rbkbwc<8Z8q4Q{+D?YJckQ&ug=k^Di*^-z-xA
zE(XnlD-w3e|I{{6(IXz9dSfC+;lF17zjVVz3>f-7Z4Usa`Y%s@cd(@tKm%}6`8NI+
z((Cgbh)UVg#_~UwMnVk*Bx^s}A&2+BjE)jttYs641WfzyG5rI(bJzgYw?9?#`d>!3
z?vI25MlapN`lrahD+(|MVkUAP$KgLC@4saJ*E?e9z}yJ<hiLsT!g-JiY@5>X4}XSS
z|1mFjJ^H_?|G%mKZ;RshHv0ehu2-UI1r&y<#6pJmj@*@E?w<Xf-$enCWvY+Nf!K^0
zhtqbdvW?@8A+vl0S23S&Je|{fWH(-@^VQgVFvH-{eO&Ca7G&HLbaCHD5Z}@(IN$_y
zB_o~AhnV4^^G((x9S2-+ygqKt)vNN=;tncB2UHyeGF31(ybUO&CGGoJ<dwQ_=|(vo
zGDZ{#IYR=wmkQUR)Eyad@7OpC5u^ySx_t13(koMjFu0|IFon}X=?k)TVOi*iVH|)1
z^v(pLVUw{Jm9}BoT^~=_^OX=y7`UgWiuys?tA2c7tdHiMt66MNngRH+4a1?%Z)#AI
zbfpm$T`9XD93F>WP_xev1A179_5u?6<-IQ=4`3qNCtHBISyuGF+-*AuyGOB=?tdZX
zoAU0$XrypT1O+@MkLpdsL4(3O=aD__xt2aM*-I6APyW-}f9XanjGrPb{%y~G>nkRD
zz~_D!@&W&mFbaSaQO`(|+a{Rzxua1vK8Szvw@OptM5!FK(lkoJ29%@`3ZTywX?)^C
zTR1R(icvhZ9lirt_CP2Q0xDkT)xM*SsK=Z)t^t6(FxtZWDMEh94CE<^83kgDmidPI
z^S(?Bc|!}RWiI@%KW!Q-eam(7CqShp5G>rNNQWNYX~z2spJPr9Q5#yvF-^k6%rHjx
z?`u5vc!UCU;WIkQjGwo^j0#2+AMy^~6L8BLz&hly1_;dg5$rMRrE@$$xn@x5CRg{v
z#3u*rN<cVm%eA{n%?D;wgqZ)O1f@JCIW1DaYkHOLOAyK&)41dTY5)p~Yr{+8H$iH5
zty{_XU)pa+JP4xY;6j<94h{Vje77rCfNtCXs4R;(fT#UwF~s~ZN6P{x4N*G&<X(Z|
z`BI-_wmDH7tj1^y1OBv&Mje0v0{Bd%#N7TV8zV+JZGquYOSkerXn}y&G<!!Q%Jr%C
zP4HP@K6m!JsokxMYjgE1(p_hI!6)SbZ*~F5#9b~v#}QQioz8AZiga5b9iUW;X}176
zdd{I(o-x7w`TJBZ%XmGTvd%i5-8dPlA3aKO4XfNGj@BiO)p}OnzShN5>>v~DA;5u?
zz+(SzHcnd1WcGPC#c~rs@Iu=R+!iwh(WyK>-{BqWNno#yGgYl_JfS`RX}4+H*<Z<q
z86optn?<vQIu+&1ZtegSECd6Xm%2MjlJ>iS`|L09euNfRT1i{@JzTyP-!;McANv6B
zT0fg#P&^gq2@prl^H;09zcH;)hhEl-D8ZeY<rx{#iy?Iyn>~&tM*KbL2LB&as2496
zLP)_5w`bj=!+BoU@g~jkXwye#()*flILN3Ga;~cHcQKNAhle0|W*pj&6jT!r=L&4A
z`;LiK4WDdo9hvQe{LUl2Pv91_sdGoU0v2e~fA5x-?*twI4>$uj<{PVhA4j<CM{*UN
zD=Y?gdcD8j(QL5mlj*hL4>*9p?RNA9(K9bbp)<;mgAd{*gvX<irB+JY%UL`(0Gqiq
z4nR-U{f$6@+3C3xp3{XOJRY1U0rr``Czc2cdXU%8-*HI@YQx*)Er38q+{?IRIht9@
z*{#lKxZbuO>?B$e2I5@Z;EbAHiwsZmrDmsJTQ~4Q$avMuFe<ON?Cj6A9Xp9X+|gnN
z-Eg@UAXk9Z^WX=gAm(t@Jh@#B?H&7^*1NkVa#~xwoci6rX+JK_p$+p#H$d)n_?;^p
z=3KkY2m+}A6hSJx+ir-_Z$AJPHWiL5`Y+!}5#tI#fBUK$uYUHfaAuyrT0*NXq$Hg(
z25@JS4=Gd4M|Yed+@_-6Y2O>U<QgMNc|H>434kM+UBpxAtzgi?<IJ(CvCBSSi6onr
z@VGGA0o+Glh?bxiY}QQyAj?mG4McVu0xYCb78PbQS!Oigdv*D@2%pY;c$Fz4d~MBJ
zPdMAw-exzAV}HG|_;$JUk}=b#{8;zfH6COy2*=I+@-wMl>aNGUJ395-xsuw|Ph7s-
zsOr@Bx0?V-@~)hI$h=zl{9)Vr>ZaHBAYZF7pO%`r-VLpxhL$MVo3Cj-EAqlhFz&a*
zDt3(NG9pp{DK5oFAyR0;o9+v)48jxP+s}UbBIrssr4om9$`DcbQ<-#XI-*$*8C2cv
z7MNG=Pr6l2T_Q-lclCaG%lB7d#Q@gpTs=`%feH3ofQd_~7r{jb;un>k6#|ru77Z<f
z#Bls{q}Q>}j`6Ya_-*SQ@A%<n`Devvz=}m{I2;re9num^y|qJaPyjV@k%oEYZUY!D
zj^m}>^6Y{wOCPYUKf)a@HlQ%#TvBC7OpGL+gWjAg(O!ht=g_*JAY^5qJG!1868{VA
zKt%G}nYNPsJbj{rQxWk)f_?$oEZ1gO@g|7cwl@tGB<d>b)QUz*PpE#>`kMND+K~s6
z8j4y4WQYjx1BoT^+V6)kXvmc|-X;V}yM1pSp754pIqe`)PJonZQhX7H@_33G#Q8HW
z%AE^eZdHtI0}$lc>-a_t4<Pl$&*=w4GCd;E18$bgfOs|;u6AAkO`W_2QX~|M8&9Uv
zXmlTHee~B-ua;W8_6ZC(Y{^<zb^1;l_Qq^&jf3@wvxl+Xn~2(sZUqPBiTvk@`z9XW
zEvNBL6H9y;>d!TGTChEjiDu1D+@37JGkg$DC1GFELMx`i4#3{q#PM5~nuatI{5R_|
zq6qIRGXjJ$^>9(DKXh@n)lF1cp6;^UkryR{jJ{_7$sPcz3)k+oVUINL_X_ieBB6dF
zSg-OOXSFMirbfoJ))d0DB~iR*l8#ynaIj3)Bi}Pcn5HaEAizd@h^zO&vFUPlSE;yW
z35(pe8q}Ki7Bz6Hk<8IVbcb1_>Km2s(a_Pdo6k0oDq$7m7xMb*LTv8ap))76-BLnC
z<a$FH7PXX@9U~7N9g^|w&13pZyvG$`3M3y4I^u3X)+MOrcL1L!i-~Li?jEAOXR~)T
zNJKa_>V~rcFA||Bz~yqr6FyQYp0#c-h`K_dw5-hw65Fz&RL2tvf0qy9!QC!+;96wB
zxc^!rQbLF^-9@$jCHD1LneTWe@4h%i8j#eIFL@{3n%7n=P>{u<LJHMzI;4L?s^!Dk
z*Rc4^hdi&B#oZ=LyozczH)F~82$PPuHvq;-BCB@sQ|$}nl5~(98I|ezH@#-pJHE}$
z0I%a^U&vEIJc0Gktmz*)EmoWCA~LTkon}kzpDuJ7YU_P1LElc)0)Nc&ZtM}p_|o6j
zX?Z5+Nc3C_KJPN3$hNF9i?JaajA<6`KM^G`!5RSGZRTM=0ZjQ2UCXs`Wsh@~^C6IU
z#Agi+K8dI%1b~R_7loqzoFw`}DBTwomsYWzcCo?$t=S%NCxGOT@axWQt8ADP;(Ap}
zR`KYI5D5wQJaFOww%qwuCl!w6QC75HHg#>q2y;ru@*vbSrR7r6>xQN|$r}&f(InGY
zC45|39$O~JgAn}KZKev^j&I;dgyovnFWX1NZa|(3s=a~jrpK*R%VvHaCThS1!TRxi
zGyCfKi*Xp1J;IAgLEp05^~r+VJAOdzliAIcD;m~tIa=ZCM%cVxUr&w)RT?<v4P}M>
z1Tsq&Ll?^Yq3EY~aC~T2I8_p%XW_{EpcZAr>4>y+i=sT*H|^^$OLlIo#PXgG_l1WI
zKiGgxC9%C2!oH+K-c2!$dB)nERpQZ;X;=l@>os8FHHbAB+OzOAi+=nrJ9xoOzXT=2
zOXNbMBL0qA2=|h-;%hG@A+}`nJA3@#PTE0b5!!&bnehQfEkyMZCHZLTvy3gnz%d{n
zC=S<e?5g8pB!R6gCHLgLVH|gmF_d74JNoI62@0aoY+6;yRtD2to)?TU5UQ36XLrlx
znVR$7HJDOL#<9wt=Admdr%Usid1kX`4ck6y8VS=Um-bYxujxy~^eES(_A7b>_2^s#
zJO!3Wc47PWN<@G~7&?0W*rlQ+X<4NA^L#bwK7~a$AhGTKw8T}}ewn{TSYs$h4{Yu=
z05A4^OE%i0wpSKD9xXSr%`92ICPgmUUg~^;ictJKL9XsIV&irdZSRxKP^g3^g!$U-
z%tYtHON{vV{^&$S%@<XYuz|r;UQlwFcFV|>o!KJ8fHri_u66h3a@QVYcIo!6C>Uza
ze&aN0gO*z^Em;bEOFq~;!~zI@5G!ds6c9U^fbn(0wzWr)tFCQfpZ$>MF~a(n7x7-i
z@R-AcYZ1q6h0hb0yX*0^7J$J$MMt#IU`(E;at}*7m%@48c}f0&S39z<$mZ1DTx?>m
zrGEHHx)kYs+Ib$QLUa{cB=S8BF9osv)g4_h;RVV2qoI)qbHawgM_;+P<g>qTWP9t>
zI|Bf>0{vc<6WQ^TGC8^w&3Cq|OVuzNHIGgDa=4KEL)+CU;=NIGSk|{kof2l;c;Kz>
zT(*}y2_I}?MV$&}MxU^s4e_{Xwp3gm6!1-NxaRROa80{xK}1Pz3zn~8ac-|NO#xS*
z9^=R>sGPU26i~gZ&XLVW8(u5~`9#Rd67t8LY&iu*R6+vKTFN7u(g`lcAm8r=NUj4H
zn2S{c3BuGTW0Pg<BzqA}c0X`2-+6K9La8EY!N)=@P6!gJtG910IC)(WqkSc9Gk^o}
zb*}TJnfj<hT;qu{gg!}U0`H{EpK*VLheEuG;Z+8H|Ioo<=B-e&*11NuI!=0)&_<-a
zgOw5dzOt;$G+Xr~-F3y<Lqir`3}n!x!(l%!Og>|io8jl?H7pn2WdlSSIDqp%<sY0J
zgQ-m*ZL8c*Ofp42We21{e&eth?4))`%v%Mi=@S|UF60yuV~jzpS8W6hjK@Oleu?Xr
zD?b58q~%JM-O!^Z)dDGMe72@m7)4bE`vqFYq6VxBiO0B=Ph8%}aMBJ{TcjShRM^@%
zYN3bHL3yBT-inG3(`*f`&#dA`@^TlHcSXt~rRv`&A>^S2!}-~5Kwg>wpQuT}IfiNd
z((>=XXGwJh$em=O$G2-R_C8w-TMI2fV23yAYz9o(X!I+gHh75OVVYdDJxm)uzgiQ`
z^u~vS4C-pp>#N3oB9ntJy=rttH8E5Lfax`4k$41G=b@Gs1WX|wEjvzu&FT@i3qY~S
zclIi2e%LlAhy=q~sH>mTE)MWiCs<%cEwU8$!<usLKb^{NB_4GB5>^+jY-UO>7Qq|S
z&f~*H{iP9Hy`)iEK#n|kLNTUgHHJRM>+4$`t;?O;i%^lxv=zP!qqLQWwQ+ujAatKk
zjNi`ZC4ZDma-gUttHnPtC6L~Z0|EJ~@Z*J&x6|SNT2GleznV|G^`nYTLt<o1Ivhhy
zF%vZx)H6mU$_#<D1FJP|XA{yC9`e^C#r{zDH2ir0O@Om*bl}E~<6HAbn_w;K!687N
zA%hHsqonE)@>CO1=vgJ(r-R&YaYIs7)3yeh0*y5S)r0tZUe6rc#EqgbCR0l^)tH@h
zjYqHu>z*am0{JGFKdpGyQYl|pu9Z!jB^#87y{^yz7yTr}mIp-1nlN8u?U`o5vao@z
z8do8_r;F(!wK&Z`5$!+0;te-L6Y5pE{PdypRb_dk_xF+>r9+A?ziOXfgFOC){?Kw}
zGO+F8+jf%Y+tMv+*(wqmqn{(wn>=)%BGTWH3!}K&(8PeBz5gAnFH3xetAna%2~MEO
znHfROK%&&@6l1I5P&Kgggke*T+gKiMcFW~FZm+$pyhEwp^o-mE3v~7d=@lp$BCNd5
zZt98^RKe9nR-%&Kj(*$LhP^$C+c)L|O{c*j^rM3831uQ_?wdWRLRFp1ua(ks3GfPh
zm%<qZqUjC|yH6gYfHi%#fatLwBRI9Tx}QX+ltq@(?`r?+>uO;1$krov?pbP=KVO$X
z@xz)(68SOTd!3Q`Zm<yT9epiGyu3MW&iq=!{{%2)_mm9~=!*ixdl#+Xb0(rauEN@n
zYC6GAn{Nt?VXZpI%F(TK7Uar7_<-)LTstxoT2U@^$d(anlw-I-;Qd7r+(%|n32~E#
zk3*tjWW?T5apSa)>01#ljAF~*NazQV?2YD5IKU?@cb1HTs`n`%Ww&ROSlW%aot2Jy
z$jWXT*ID1gE4I&661wq=4Uh+mPnhk@mNbQ~Jb*+OOVv+a-BWMY7#>u2=n3XO(+5Pj
z&G5LVHzongSOBgY0GkD;OYKkGK^|J_@o7K|LxvGdUR*8Jm?DnELfCou4NY|FPA*@J
zM(lW4wc1&N3~|OD49u<Q6&*Zm>zIClth~{ASlwxS{2t3KF<i4cju?+9dU^9?GAUY9
z!uA&+8p{Ms50k7pXDK{L$cR-@fF7gX@lWhxT)>EvgkEtIHA-asW)uNL0suiYQ7iLw
z-x>yQgCP5nfG%IW%8nKJBn9OUZuhiZ8_di!7>OyK#v23#^Cn1=fz8EnT*u<;$b0R5
z#R-NNy3h;_LdxE$QN*@+NdAjBwvKPHQ(F3tQiRH7M8(s#tP7TNzqHdv>JzU03r_vb
z0f{M_ne{3;hjgPPhc--|9c3K#aSp!uF@4^894ClNEo3!dkLkLU_nLDYn9#-xb|49Y
z!XB=D#7;MmpX~*UuOSgs{E+Pi`EFWJM;b;QVv5@Enn~&aDS#FqgIC@*m95;!{-O>!
zxz-I$C<L}N9?qDdO=4zJAH5ksl%^StbpqoQs*+U!l7m9mx*|EaHgk*74P_0dJ%_Mh
zR+7P+UKmsSGvZViW!vg4YM&a*wJ0=O)b1%bH-=nUvAnKW$Nq9%$KsdI`N_L%ER!-{
z7adr31ug9mWKuzjzsZXA(3T&JnQ2v^(E$~5HR1K60O3l(%IHoI+nG^G0b_Em*AP}A
z;j68x(vq5X4*jZZhElml!{-Y!dX!<J1{3%rS^T40-R|T~Zw(>aZ5WPt;*F`YeWOfY
zvV>&&tbT1ZSbq{M{%)D@KDeK6kak|zUy-_)SmeZ%H1ek`MKl#wnn^RQNczO(cfbwF
z<m|@M4@mA1?++&hA)uT4-KNuac#FhJYD|{&aa*wL)D^~8B{}^2TlGQ3A?4vAiUw*_
zM__#EmOMryS+$8bFt0t!0rCCFO{t1+iB2lg>j3o<jP;>V%;-I8Tfe&AtkAZVl)M(+
zrbkm$If&<AKHMLgTA^r2bcn!KbdbLUl8E}xha4RUeZ)QBV4l;qboaTBB9p&(!EEDz
z_e_uo^LUQUjT_M+3OA-sAw&ui_1&+;Ydxc-IKa0bn;ep0?;tV+35cw`mw16|)zP8k
zXl0DXUDw930T>&cX?&HLiyr8GbtWR)PMVvGZ=vsxv&Uk#NtcY85rLXZV=7qOUN*BS
zZ-IHz;eK`0IXOjaGm+EE6Z<@9A(ri*DGT;rm9Mak0_aCxoZ^#W;btI7n^%$?gllol
zJ8f8M^zw!K&W!$5|JCNFDQH=Rc=T-Sa=cM4&u3SwOKxn_<@ZSA`C92B4bvP`S|X<I
zh;LAyv&eR0$}lI)eYrg5gNd!Q0~-U@xulR;`{rw#QX4-2J)(p={7Tfy-JZ#E3G|23
z4oFS7?tBiyl^+?WBQ=989a<Ukrs_WH?R&RPvk<Kv5Kjr9q3do@%~T6!crSg2hT8}(
zBAcrxJL$NzLaz`C@@J9uZHo<5IfPbQDmvk07Z(=ovO*&rRfK^+vPN}*+_P&wJj7H{
zqCP{)Gb%y6Ps6V%oDc+NXgmhkjY!nqTV<ght_Gf^ho#S`TsdR6lT#R)PkcCs)b_t`
z(2}+>HEf?W{gm0i_235kz8neC`$f9;+{a)mq>)UhS|-;<GvgOmcPOO*{z0}EjU#VO
zR>?bN@5FjU3L>H{cyB7yle^>DYQZwR!U8&Eof5Epom*OG=WUtqp3~G9dSN*-1mP>$
z7P;Ia>QjR4)7R&#StV?+`~uQ#C-;c;qE-IbUs0q1cGDKiQsFah3N6)s+Zbq<#<g!d
zqD1hgV<#LhWvH{a`KyB3WZs61mA>|Fv@zTs%sT&8G;1|teH1IgTtNA?Iwy3*O4j<L
zTK)8D8Iy07YpNpl!=-U_?dd}mPZlS#r{sP!S|VEx8rETR=|^#bZdTn9&||%7kl#sq
zZ`W7%l*tVI*twvH;p)XPaLhcO_*v<gU@)6#cv;!@%a)PL2ufY@B^5F^S6#L|g63qj
z<|$@K)xO|HhtZ+N>9t&sKY9S5_@&eQcU@Pna9;GA)r)%Hh72?wB$jNYL^bo*1HjHa
z8=&|sC#iu%q5=2EyG&Fe^FD|D6&zq2NAQBf&8DJT@!M`}4$G-KT7Bn$`u0p_EevC2
zfLm}6Te$|<VQ|}uex3o*7AJ6{|C-3neyrf_f`bBUndFWx-=4vCy5WN-6uHxP*>G-6
z)}A|Q!h1x6b=$D{xeVqRL))scG*X5N>n6sGyAwX;%jJ~w-u<?vt@mq7%^=rH6g$|4
zXX0M9dM6R>?h}Iyi{u6agVu2w-nW<YyFSZw?!pvjpzym>OXD(6*E-qPnyo?=*kDbx
z2q4Q%kJoZIN4z;d6T*(W=H1Lck8iWLOpY@E;(4EOD*ojVC;Yys8DqyC5Tpp|J#lYX
zrW_a~R$CPo+$HxIIJI)UJI3P{L-`w$e&mZXTRAA5iN#-tg*@N>!!w}kymn<q0enCR
z<yi+(K{Km|?ptP0Nr3ScK493k*$p4I2dGUGC*OLcRpTTB$r+WxS6QTudD~28K~sgm
z5hA5|<u*a3dIArE-i<*J*5Xm|k|-i~T8cy`XO^k%WKCu)$Fe{eRO}^luw*>Fe~vdi
z;FqAx&-{KV<%L;<o^?^xu)vU;B&F9;ItFm${K{fY@Kw&PUL>f=<I~9|_*4J}oLX>w
zG>zeT3VH;<yG9Q%T<TxBz2@CHD@bi-czQ8HN`PY!-TW?(G2V+B&9A5$QDx@+Gi2%Y
zsY5^I>xW=VYYcy{&P0A+Pu2?Ij&GF`c<i0o{Lhv^a$nFhidp*cd2EiQ6ld2>p<s)P
zPnONh%u!1dDm`xL1{+$&slWJR)L7mn`eG)qEgja+U$f&TxwL}~XO^7sg!dBsy->Ss
zTq#c9d@%GyQ{Sy=sd-y4=YbGr9B`_Jn(USEDG6-k9M!FQgg3v+->MAMa<~w9oUHY(
z*Uk*)RnVmrnVXkvnx;&yc^F=Kh{hL_3Iu)thmj|Gw`Y$B`T70D_>reiRLLI#?iee*
z7@p8ANk~u&AO89&DcF~IpD0G)c-R{_jP?m4rwbh1Q1H$<{!m7GnwD8Yw=Gu<V6b&#
zT86^g1`|6I9KMQ2hh{l`O@gqyV{^kG0fcSMK@ZF6$Y@*5)i$pgwB(~nZi$0gV`l{2
zPg2QantFh6m_ZlWLk12K^%8rs2t$wTfozbif~2P0>Pv=Ft&^|PRgN#+=7F<O1A}90
zN8BpsV6#MxVbNowf`bV@Q7w6^ip<!j*YSA*u)EVZhKAEcFk6~71u4v<%NxBR8xs;B
z83YZWpA5vFeE&|UBB=aC`-+QK$ySM9G+ln`(Dv^55}8y$kXZ8{q|rplFe4R)dYTxe
zy$Z2Rgy$WoJ_W#5Ft%47fi2pE3pX4|ol3db7-#1tBY23HH7t8eN8WIUcAQ9dM_d$2
zqP*XT#j?p$Bvgdl=NR5RulO#jJE405bl9N~A3($1Ncms3H*1&j&Y?~b>~YDItulAL
z{7j9C_zghG{2)R{wGz84N@mn4c~_JT)8W=O90L#kksc7~c2%$c@IXdp69Dp>)@s)>
z{Y2a~uphmB-lWJo%jKSN593Ju4^j>+pH_;WM}X`qlPTzKXZ75$*h<|Y1<0Wo%@Cs`
ze^(Wzbe~3r=#Dj1RY?KzY@#Rveg{#~#boiczt_nfNhbL@2B(90zn$c-J2XG-fruuP
zcHy|51fTkEgUx|jnLpR}k#mHT)4Y#EYw};2CMg(y6i}2@q*eU8n!l*IKbW^lIUqqr
zQN`nbN#SUb1TsicF%UU_<^%n9mjA_q1@HibXB8%v{l8d{e_sM!!0;e?na)KEoK8j5
z^bDXHk!{kGvT85bHS9~n=Dap*fpjvPLY?=Xe{`^g1K?oE@0$(ZwZuV(N(fjx4ieO2
z;4ECneD!d1*>{eQTkl#3WGLcTEBanoUjOEG)Rg$YX770bAoLx3s2=j052^ALpJucH
z!acCnYQi%$tad=1YkoD(RZP`<WDd9+>^M)z1BLg;OCq%YU#aokX#Y(%zApf>R|g2A
zKtT>?egGhq@)o~}(akvz3iJAy$`YVFkp-A%3=(FYd@f}0Pf`**iUlLTyis&SyW|*p
zs$JsA=A?)zM$zro?|p9cu8S|h$=$Qzj`R9tT@1%R`#8aBrSRAP1z?fk2<TutzCla?
zXW;~vu;6ltJ;qT`OB%m3R<@35G<Vuf#0(HmuX3eJZfR;8qrm{&s~bA94D9WJ{!G#4
z#ZMWT<n|4Y?VjmBowfX=>;TG23Od4-pcyx96ofMaoT4;z`O#`jOTyREdLx#H*ItB|
z<ommu5-kO)IBg^t58mHx$9o+wM+pLKtKnVew0v+lKv9lS0_S7l&jo=r;ZxL_TercO
z_xS4x{;N)RJnV>(8E7NDE8Oq*d;;A0?=>3@7N(a0;Fmkz*;ty7A0Guc793cAG^}vP
ziSF>sx~QEd!5TUhKXF;oSQ0!#?TQAH5gb_@4+P^-&l(l!eTVNRSA`nI)dx(oFb0Jf
zKSTARTVRxO<FpufwVd+YR9e}Q0DySz6bGY9_kEMg+IB{OYxbYltLnLD4TO06v?{RE
zegP2>{$AJGsie|hRA$Qm!`64lQ~Cb?b2@Z{9E2i!zU|81krI&>*<|l6dt^p5kxeCJ
zXYXB<z4w+1*|Pm!x88le-_PUw`>*%o?cC?SuXA75^?E(mg@YgkDmZK}0%Y_sD;^7k
zSD6rAy7Pix`IygwN$x2$Ztjf5b>Qj$<)J&JaKFu3BvD8)c5?ZNRX^K|HLXXqZt1Aw
zPYg=I2+Asd#{bU;kU5uZigNbwTNG`$a{OORFmk7>8L%t(9k0*)hx0~WU})hc@f<ME
zuA|^-WN>kR)!?b_dY;0~?T~r={|#<<LJlIdx@aNP_~f~lsQ;`C|7)(E)qrW}&!-80
zc8LuSbN3OBaIfN-hURnm`gN!@0{sCkTS;U;4%a?x#{SO*WD>!_ATbG(j}v^f9c9k_
z8lS6n`*K~70&4tDQbeN9p<6V|b?Ol?QA~rnvMa|hsw!X0bG2#aPNbOHc$17x_q*$o
zMI{si2+RDfJz*hA$3^2exP$t$!NRAaZNCZnKJ}*#^@^B{8?Y3a23*0Cih$*E41^M+
zGf$;|sX-f^HuDsGz0dhh|NYJ2*{=E>H<!f+h~ZV+rT4T1!h-|L9xXoc)$dBZb;(%2
z4v0=Q1hj0cisA$<)m%n@wZj`a?=t4BPBoIf0;=3-k0vhNE}Oe@n-9VJRl4jsFK(_S
z&f*C~QUwr?g6E5iB5}6o_<V-I%QEq`DYcfyy72iPu$14&SAv8KDX0?JqCmygxW?{`
zAqt0ob*es<mnD(j&+`C_YvR{TYFik(n>Y3F9Z4N<{74y-bw7wNS31VVi8_9JqL9;!
ze>~hz$o&wp=r6?H(8Em+gkFYfZ||KrDukg&>QSkAkFPu>PXAvry<gk0t=^K{?RM2%
zsQafe5bN4x7xi0=6s9G*e?bh_zrdYjLw$AWrLxe_yNsk;755Q9YCOG9cI|{88N3>-
z`XXZ#exQ@D%FAB|AfgmBI1ee<{TTQ^z-t;|(ei>1d6#D{Pd>TSZb!e<eC=cFhtIK%
zg^2p1iT*u8<NXE`xZwK8QakfgiRr_oi9;UR51Czb4r8ZWKbiwL>HyLOr5S?NAITq3
ziB{rUgN8kc;fN2|>2<jZUggnl?vdgQW~y(-rh!=4iAvkf53y+1|8NorM`uS@Z=?>D
z-I~ZS+#|xZ?X_gx@voI80NIQD)V;V`p%JSYkToTo%Sa)hmO{FUHMs3>ZgyzD%Ved{
zz>XmsB#S`X379|M6Kav#=QmjR#ljV2#{n7n9;NNjvat;)!Ob+)=L{?*9a4Lxw!l<^
zMdwfl9NR-H?KXIogApK05}}FJG6qEaAm?G#R~`2ssXq+Z0aBV!j=9K6BjoX~?VkC{
zLZeF<Pq7_|)V996%F=Fj@i1qF)KBq0tD$=~iw}029W*(~9F-R2u2X?j-Jb%!u?+!{
zH>Ned7UkqHW}y}Cj6Ev_)4#z~C3Hr~cVc8a>VU!V@Tt!}b6B<6eZ$hh2<f(K?-MP_
zs(~-PRZ@S7t5mT6egk}$1!HV?cDP>RBOcB8DJcIRlI0cUy{0Amkn(z$P{kn8Yip=*
zK88}jwM9vHS{G6dz`(1;>uNq|(fmFj$2dxZt;j1iQtivm4&(wz|89R+eMu6ig}V>o
zPQT^+c2H%Rlw55j&!CO1Q5h{f+i+Wnr_bdgP}s3?H<@DJza%}$GrjsTx+-g_@ZRBc
zj);JTynXYbNf-jk$s1b|cX_&{=w^8GmVbYf)sFU=*#0DpXuYE8=)l^lvi>VPEOTzX
zo=OaUud^pmh<$`yI`{IfF#Zs07_!83%GKufrU|EQ<X*gLfI(k<g;P^P$}>ptr5{06
zT&I{Tf`dJF2-xGF6*QCB-`MYLjF;wfVMYAB7kv<u?+Y+@d5KChN?A`F{Cddloo@PV
z;upLHtdFU_QP)dBHp653gJ&E(Gy}&9kTCxi@UL~-9rMu*qJj7JtD~cpg+^*X%hp}m
zE`7AW7(XTo83_d&UVNe+8FmxD(MtRLqqBm!Zpsn2p3j3ai7jmJUatGSk09nT{Vddh
zc)Q8?^eN@L?0zlT`gl+VQ?b$HP$jI$KDY?#J??nrk>83p?w*V-gJOeYXO>-B<E-x7
zJk@uFuT6ZX2C(7T{<_;rmD_RsRF-laaykXSdhgs+hV`$cG3?WpQ*rLvsnzuJ9`-)a
zjFR(>zxSzXyp)v(+owYK=l>-)CJI4{(i5_m3$bTKaJ_9$T$4~E*ayu}dt*3*gBe$T
zyVzsiE%(7_eQ<y-W_I3Op(z#~wUfiVus+E9T%QL)0}F&5Tsa^4852G(`JJSDojTGU
zf#&C8*lpsC#a|~(oU@{&nkdK<cmjq#tJCwwpKX2iM!HMu0}&=MQ|KvUm8-{>f6@Y2
z97ZON4u4w*s(X?N8B!6tBBrH?l%lt-K_V)&NsjHJWv;y78DO%tPRblPD6!f4z;IS}
z!BYHxHbtUP8J=J>qEq;*vih}9HxQp)c`cti^&)Azkv)D{GI~y1?k*7Q3?<M(iA~7y
zN!BbCkBGGqXH6y6xfy&x@c5zl2lNNuuO%!KN9y(q63=LZeW54ib#<C9<{OBEI?4<q
zMA^jq+YzXtWF|sf47zRe`$o^#Q7^TSYzOGe5_CbNFP(6DsW<+aOIHw9O~|b87V$ih
zL4DDs4v8Mvm|UjvoH7lPW3M(1zYAm%U@2|=hvX}7g_Vm2;R7Ukgji18|D<?;P<+o;
zo2|E_*oJz;bHM~9yXoYna{|A$Vit@r*;GZNM|(a;d{mLkJTc!^JA4iuPl&hO`yKJH
zl6gJ1Nkrg^Q0t{M!jC$e3j0}0jL!zl>ut)zgF6$wcYhFbp|~cN-8wwFMP&rJD<b#$
za=jz_ly@SPKc)_;j;q=CFCA(Ld)zs+{+i+XZ-B?&;z!j0<-B<7mO=dMt(z$7pFi{x
zd!&!LlUy%zi}$;2hP)_{oSvRa_x?56*l{PiHA(s?DIW~MR3yy^AA9h1puXx&qJA)~
z%l$V~g$_TL=H>;Jf;D)Fp0oU%qg)*5?{JQ4PDD(G)s2KAG4!b^Q_tE)!Ldu@9@!Qv
zn5pq>aTxJJn>A+!?Yehyi0c)S|E_&$eabc=X@n>G8nHRhv`c>T^OQF{8u45noFR$q
zTY0b<Ti|Uqtoy7jr^SUhwOD0ttBKG1!ung+2c{mALtZ=g;+HnW3hj606&!wKc)udf
zFPhR^ay+t@Pyf9TUi9|2$Yb2&%JM&TT5Tu63QE*z*6?{Ug@NaC>Fm3gDqjc~;;f4`
zI9u^ZuW8kw%udj>>Af#ysYl;Z*5bli@RpmXJW^J;X27aj<iN-ixGs5$uxgL)L5%Vm
zJ7Zq!bs0jTfb4|jTyAaQc|+We->12se9C<kSJ1*odF%U4_7RE|mM4Mxk1K+XC`BLZ
z)g=|j^6`nZjZozzz+yh-WV1j;as82WTj1G>`-Oz789C`HMGFFVH5?g&R)#tCs@V-H
zZV7CDJ}CJov0D2|?cXBze|LkR`J4>qHhB|Fuc=w;>q}{ZsaN2xR}k6hj;?;CLvk2e
zoPcpPeso(KU#-@XZQDHGU;pLWo!LUOt1INi_D@US*o`Fu)6@9gL}l@jvSw{=YZ2F^
z*WSoP`}7F(NnhV6bu2?Q5;JkBU4&WkH=>0TANaBb9Sg7MbO`e)k9yEDAO%nCuo0gW
z)FH%SW(op}S8xhfDVCTgDQ^8(7DZPcuCWOHS78l>_gO|16m#lqjcC90Ar}xsmyOc9
ze^0RvUcOR$rz6`%y8@M%B+OT^{{8LKcKPh7cVzii)vSs|)Tp?ycVQ>ECe1?}{!r)D
zCXyH!>OzKV&ACCe!1@Ia<vj~%%*X0haaTASqY}3*6&`Z^UTGj<j|#WyR$X#!8dh5^
zERXOV2vuDkAYE}cRUpEq<KcP*u95e>_bK8(%X}@$w_4Wn^lL-~Xtb{I>F--~mIq}J
zVu&@0wO*1ija6#j?US&e4$3U+WvSi4W9WA4dX#jT(3R|kfVPlJYz^;Va&|&P2Vdhe
zE7+{d7D7Ms`I27i`EAHG*}_|sr(i%m&3&0`HihFUqdq*Y&eFXd9?ObP4NvD=V_veQ
zy5VOlFJVl`6b()BBu11j&K7BqDD)MS{<1i%`&ij%{?H-oV(GD*OQn9Q#0!eTrGk2P
zw1u)SA5#%;_45mdkc-oyyiqZr>k)oqyczSiO?ay`S&(#K#daERYL=>ITi#~8)b0=%
z_%@mJU`>%+>&lS893&g1b)QJH_9Zb{WMO9$;(Iky5gqZk0puAz`*W4)$Toj>M|0P7
zXHX#B=Nv8U;Zmh{%q^V`ZOrwye9JNV6t5B_32!d@zT%-CK5z9(zYlr2GO}djhgIT)
zvJL1Y+I&oB?$}-<Rwr&hl{Bp;w5SngppbrNXgktYZpFzGPh#_D#q0|uhx%lzy{@O;
zuL$3sAZSsOc_m#`HM4_zz**Y@;;JERB6%NTwye5X$AlpYfP!8okIXz6ZFPc0ASAZ!
zR=2^1Yp@I}L3)19)ZN{NKQc&3f_Trh7SRjW>O&eoxZc9y^8P1{y87&}#IPdK2xM{e
zP~2aNx9)#BeY%5av-w3Wn*j%<q0{+8Cmp#PVQS<>a%R?I@_TER^YVL8L`N1#2N3)S
zvKGme7t<7NBR#us`Ky%acc&alYW@3Z67L4J`NHl+C`Vp!crJeCrA%q!5zrPGJhyi*
zekxY&<(SXWf#WyN-xU+q3VIJxg2*9ork{8qLi%u9+qD9FHBWEK1x&zBn;JuWd-a0W
z&dMr|(@j<LvO|saCe6W=cL}U52_l5XC)sC(t^AWN^IUmx;&zJvF+|0A*N-0$<qyhu
zVTJO>F<xB3C$h|pR-1T~C;e`*fsFi>QgkJZ$&BnIq)mM-GT0h>I3chAh?(iHeNZ!v
z`Obpm#IyaT*@>cI-urnBF@TU=GeDwif;O^!O@Gg9^pW$^QEGwW&7i?P)9vTCwzz?3
z^mCV7pqH`muX%R0z?vf(n-oxOau}8)31u3iYk7Fn$Qyn!2>sfds}J-C>70v^zk5SQ
z^n+Vsz(Zi?-z<ndgs7U->r@1;#s*H_qgOv2tL>@0?^!YOPXhqxPrb@??&~70+wAkQ
zPeCwQy|L%fS7QKzxW%iprY4ALt-D)9c8<p*Yde)rwQeZ!KD8Q+_~siVa>~(~f$N<o
z7zEmN?sTMglHV{8wCdgE8&SW>n{7fZZuuLU*u;3KURrQZo`1IW;LfrXaT?EF@`s(>
zFWp>X6N<Gj`Hem3IBn%s*1!D28yB=@5S%$|`7`7FxJ`b~zTLF>LDbS$y_k^Hsq}nF
zM!InRe-uZCFM4b(1|_;MdJcMjyOh)tnbbHqxWzDO{@SLinX=C>Pb~XeIiY<?0ngWp
zw$B_(K(z~9$D1T12fP`40X4GE*M&C(qW2fCRpxOeN=nD%>}}@yEP2kf?bsaJtj)Gi
zl$>2>=%j(}qiU5|04>H?yPQp~xlJpCE6v^O=GV3(`uUh$05@%B+2(-W1W(mQ_X$B`
zXc`=VW9V6_`ih^Zo7vio&li$vUE3x+F^W(**^g#(JpN@A_P?K=m&T+almg4VE%qc*
zRmK)4(RVyv5L?KasdOE5xs4>btyf`t#FAw~4Zpnl2We~-7p{zuPUQoff<Qr=+OR?R
zxnt93h(FL7<&z<pi6a1JaqCl@=-ul8|45sY4Wwh!V||4=wOg)LLD%T!=c~Rf7QFcv
zsYGyRnvi4MnS&J`^b9f8r)RaGA6<Y3g{cH`%S5G}gjgr3j1@epvnzg;x0BSJm*X^Y
zQ$_!DIm(W!;uJ<#UhdPK{vYu580tN99{p1h?m$QBbj6<X<d3u@m0(l+sT>aP16@RM
z|3)d^U*_}QYhclp5t*#Ge|Lvt_{NWuEPaMhgF^JdodbeG=1CoB6%izFvx8eS9Sy&Z
zKOg?Tpw8d=4B653F=$u6^C+Vi>BN50KzjYETyJ-TfG*GWZ%tHs5;~(cJ<7su3G+Cp
zDiAA<5LRu~GfftKCeZskBF7h#X(BKyorK!!4%ZKjyQ}r}5^&{rV2C8H79st=xYfU~
zaBVXgxB@-9m?nu6FZ?5uOolEz)d&6e9W9gA*S84b_z_qO!MXkeBmcbV`@5|!n(<%!
zit{pTIguRrPyYL_kPkQ^5Rst~+E+#raOb{)(L8E|RRJ{agU`7aC;pZNSHu1|N8a8$
z*+Ps~gg=dZGlc+Sety6th^tm!IsKn67h_XBR_A<fx`Bh~eTg8Rn=Ijd$SwW_5&rYH
zY_Wf?ORi(7K#!7R0yMMtS?4pN|GKi@dFY}<&!7J@gam$?QhfgfVpkv__6pi)4dhlW
zXHWJUh<ojB+zvZ7Z`u4`^A>jTafx?ddc9qNNgMdOVA|L7+uaYrJ8g%N#(Eru`*?_b
zJIq@d>a{)XCp|JwU?;8Bj0G!J4clKUR`kIyg263~-}yws?`Uis-XTsLeJ6s>XIl4*
z?R@<YBJa?@m_^dwasZLLeW7xqo&r^q?YH>bAE#y}@XQogtUTUGsnWB3kQ#K#EYuUe
zoCU2{7UWz=dmTHNH^TnF(whGZmZs#s-)UzaNTFu8kXz#oAav=k@`cg;opqWV<wS`g
z>&n$n#PiP6kj-%OD3n(vL9FPzQ*2N-rvr^uo}U)Lc454tBHWEAFr(39HVsNQ>pq&+
zJVf<QO!ey}t`oX7fDP@JyjPyUhZ4OK#ti2Ib=C;jwcVe}!~7>c4r#QRPk5-p!{^bG
zVNlHf#|ix@9;c(^LM{%H=I1LDhyKYHB6+wyWAkgY%>36Eu}g(9n<;K`wKUWgWIz~Q
zjgc9nyZ=_3614Ql`>=b>cV6r0J{dXkd&73O{$M3F%vad;(>r)IpWx!s-8WQiHUdX=
zIty+M<|!b2CKe+AV{)1sCWKG#sLgjbxBw2^j{dpjIpcO=dl(E~|KAV8wC17QJ{`rE
zkD$sN%&H}OwquJtGrDu@klMpbYvnIPPta@s#D~5>dL`XepZNR*0_1h$0IygHR%t?|
zzQmT}F$Q_YYsl(i+};}zTEf+Van&(k#-e{-OSU2LEhi1K9aALoK)hav11yh~V4`k~
z748rRWeo6H)bn!_@O3j<TeF_6m=&C}R(=f+^E@BZB9}b}OOLV9t|MLb!IT0wGB{$a
z@ZEPFyj`ZpwcYso`n)UyYwZc&f0KgWD)`Qia71ys{fQe&wDr$E;XDgtHz;;_5t~kg
z0AZXbNaT(e1xWC7t-e=m21YgH!tM;U>)9t=t<l-c;CK@{MahRfi-*|J!co6<*H)k^
ztzfy{%|WSQM2fWaF0^CNYYbprR-347MbuNtzf_!U23ny7%Et$iF%O~iDTBPd`YK5C
zash%Q-=ix`xA%_cJrb+0l|PNgPJcB`^6LK~RSY=+6-XZB61tA$&uWz%h5l7GPxlbk
z&N3kVob<PT7bKX^oWjo>d%vX>sZ%qeXi;$pV6nYLK_x`*d~oQ`ZzE5i+sn^2`onM|
zV8ZQ3-8C1QZL$koA+(eo!9ohwZkJg@k)|{E12UJC5z`GPm3sO~q@_`}Kh4~!UcDA+
z6k{CKG6b@mhYuq+Af@8P07QzGl{V@F%BWBtUT{b5I8;X1Mx3}m@(6H-ORFe#-Wmk)
zDi5_}I<a%r8By-cHZwoH0Tfu&-2C>;(<Frg1)AegMxqH2M}z~&+E}6a_E?J@sATi{
zmJKDi*Uf9BTLlRKoj(*R)Q}TjheQ#Ykb{Ge!X)cdEt@h{(WwCP7f-zw4K+sY?Unun
zGIlTD{UIa(`dwi7Up|pPPYpI4(+BcqXlv+V;EBO*WF)7}*p3+BGDS|(en)_nlr{?n
z=btt(CioY)X7#iPbda_nt;=Or@6pTdVY9d4Vy6bO=Z_st`zjKvjW%!Np4nZa_j2mT
z6wjr4p(_dgAtO89js0-3E0RlOMXS@M2@xk*CpQZcGPdNOFNr+_`xlAteV;dr*R;$*
zw3tfY^7&rv9>Bt}Rv%l~e(3oqTIA)!X7}$jJWGgz-)iDTo6rb1yS@|GBtkBjFJIa;
z3G*ntcdzyiqYu>F$`D~&rTsRK22^{CRt>j_KRorEWiE3B1;qSG&D>s(i-Y`6=zM@a
z_TJi*dZ#6f%a@bMNIwj_Wnnp)=<$n2^t=}K%L1WCUj61iswiFQkr>4BG?MjE(Az~_
zqjmvq7yuLwo(g-46<EUh+H}WazxxR{(g<9K;`^<VjGii@ay{JSibm3Q`uV@B)rMK;
z%Q<FpPvTV|`L-rRpdUrnd+sdidhf(Ato2%$-agYqWjXDWbMI9WVpCK3UN$&@lTkm)
zW(-}~;q}WgW6AxTD)aqEF0T2PYiZPonP@?}z#lSCV+Bh{Ai9?qQy)ESwzpj4vsFp+
z%9m=8<V`}xKdSZX<zG;``khKz*X07_bOrl@X*VDFs*}WIM05yK=C9O<0mX~sr%OQe
zfUFlxm;F^!#I_2`>!?I=U{1!!Z%RzrRPUI~&*wVA?ilL<%G(79?9C`{8<-}pR@ePG
zLFj7JCOBXvo<&s_Pt`TrKq9@UrfJTK<j9srS1lS_2M?e>dn;H{f1Yi5*&|})Q-DV~
zzM1o|3~`Go%bDjw#^{e~TUTR3&+h@0J0S^4*FNnedj9wU8BPtf&b_xp`GDsrtC)O5
z*zM@u{Ql*Ux488AK{wnE9bmf5dt(Rhk7jXw9Mc^*L<lQ{*?zcO^}8JC+5R0j;-0mq
z2SbK@92JN)PI3-U;x{BPN{rbCCxl;fgFrR$?u}?&ck>b?{q)|iQ|wf1U=xddrnLi9
ztMeFVX4VUxf**kg*XZ0>l&<Zlde1^i7UF<9X^RXzF8SHUb?aI-a<6dApy`W5s;L-K
z(-c}C3)A%Q*?Ct-$h}HGj#H+#%<5Wm%P6$k7dzcylAt~^2T`~RRJXb>@oT@VS;-<a
zaLN*j9p*;zns+OE>XnRlZX-FgnsJ4aX-y=(S6{$Sa%aR+)dsuNig;6L5tgb2BEP_Z
zYB=)kVNZYl^M^g{i$&S9X&ep~H0E`urQZo@IW-cx{sMUH<G1i8sg5wlGX*vO{7Gm_
zzJ~Ju{>ZxKi3Q^+WvfMcLSmmkp!i~8q`!^(<PN^B4Jodrf6!a^TOZ7aV_y$>M;1<P
z_G!r7DC&EF2d<V$^Xa*VHG5;kL;|B~16c)!2=F?j^rQN~;e!D-JDv)sM|54F%qdfz
zai8$Yk9X}tM+P;ETtz`;i?D_f@7xo~UwvbfF|?{~e02M{=QMc|q*W~nN}DVux_yh8
zMRG~{FEI>$$o_U(0>J&3c!ZRVb=?kkbNwcd5Mb8AQslJO&@x_C(26|!X~elD<vvM4
zO1<pZx+*rfMi0MC9woU4Eym0s4I%L=N`g%rkk0**UI&vnh5SkhkW;3Ih57?~;Oz`5
zeWX`(a-VY}#;4+~Q0P%-KepdT;)XY@Izl(Dr?|YLTJQdGez8Esd<g>Rh<kj#S0)x9
z<vqD%u$HK#y8qbNM(xWs_g|b@R6Vh5!XYju!jdcQ<_jGDRfU&4thqF5EtCR3E~t}K
zmP<~!>JCz0czxi$VSWC__+um=MP>p+H0@_hqn2`6wfNqQYIw0)&K=yF#JWv7Q8pqz
zfYF~}JJ_2zs?J^sz4k^G@pXmTFfQeVTj#lC;+_%Dwxh1+OfXT=S@hSl?&G)V#}1tL
z?)S+&c(WmxRbsX|D#F_~rEa7&%F*|s!G6$g9}K!e(O$&ryjoQv0bOeK=G=6d=d+X4
z_Lj1Oo`8{@?lcq1zx3$p^(UK*o}bINPe!?U)&AolHE3F#?UV15t;S15Xr5nV$34a4
zqPZieT{2=~Z*hzBu~EN|_#WB0BuJ+j3>dY~Md?cT*cD`m<9D?$-mP6I%I>igFNiL_
zoin*#$AFg5I-1$|;HoX`k=dLVsf=;W#@HVhIW%be)ml!=`x#y7d>(2~?ljzb)CU{z
zK(8jv`@Zw8RFEblH*;4nKhot^!#%ys>>jGVQrs*%ZxMO_rIabl21{wmmHoUY_;Ign
zH)VUN?k@mXMJdX|LAu|ZW1_;<hhH<S_-;+kP|=^mCO@Kydx-kzpCvuW)+sAxIr||k
zN%_u$#KED&z^QWgh9+K{SCo-yR<}H(fk3C!b_>_Tc@^=^;0YC8*A9>vyb$q<@D{{#
zygn)M)8%jJ2(!<lbh%__yu7f#Csx^FJ0v|p2lbO4ce9wV<AQacNoT>r7p~kJi0wDc
zhiq4Be~M&&za=+Glkrj`FI%!mVf}ZY*8B6!BEPAJCxCFefLb~?TW7driF5~iVt14p
zzPqV!wf35Yn&hbC^8Hf&9()va{MNCfbaSl}ybCW?SaVTamakPJOQez)d|NXgI$^OJ
zho29d6a_;qv2$J^Kc?EgL6NxLwfWtZb6hXX6)Z0efEy)uxcCBB7b_hnR~P%1Vb5to
z()1~=5y8f$axgPr+yQU>Ys*(73&SPECKdG4R{aZ|Le(>_&Q>0M*Yhmq8g*q;&^NMw
zjpP)B|1i<3xAs(I9^`XDGT*D>hizseC+^o4hIhY7^gK2idi<gVtXN|>R9!Akj_>5T
zlxRX}(;MwiDe>}E=ExQY)cEu`mnYjrW}Ym%bFJPrzyk7LERV*wKjKj|)$XW!sIBBJ
zvQuo0r+kmml#2J1oQ0r~kc2uC>mF~vo9qGs7wnp(f-c~VZ?Dgo*S?hY)QB-WuaAFX
z#r|%Op(ku;O1)BqyopR#rc<N>@X}0^+{=Qu3&W*d;SJg1oP6y6e!TO|DOiik!^;hP
zVEF~VlJ1CD@h_*5_Qm4tUYwpJnOd$l^<~}=$g?1PYuaB%o}d$l-l~r1k?m-n7(nTS
zm!_y5Gz$_5=H7_0RIe>SJ)xE-s|by!E-1eBVQ1}8fgC@O-ivl(C7Uk=P2~jp$hlqe
z&8WIxOujcrY|~!Jv_|YXpqmSwHk*&97>0lCH;#zD&tEy$-CHSB96b)S6%}u))(A%t
zcPMq4Q1TKZdkyvJ^GCFftrtXTHoFZIvZbjno*B4a)rpRKb(Jd^5c5t~J^IFjhn0`N
zIg(6BkVNb%$vA!qn)j1>@neWCqM@H}GBWPAEIprLnjM)_uMLJ?sHmu;MXh7^er;G%
zrhTkfF?e$<hDmhmYTB=>67KB$RIj1t6}0l8SfOUJa(Dnk1<rNvLg!tW`Ni3H11NBF
zLr%@_DkpDezn{oJWOV#fXfxlNuB1*@zSj4P$V7WNh>06am~H`t9+VTXy#>0L_r)VA
zTt78f(+r3iJ-J450RWl}VUAE>!T*$Vqn1na71QZVCx2oKj!*6Fe(EfApAIV|e?gY3
zOOgA~YiI(au2q%ZSMTSzHxwnhH;o-ED(EA8;jC7>&h=>+_NssptZn9Q3J%7V7$a(c
zS@FEYKNu*}h8cWn?oM|`vZ9w<LgG?kkz~2x$wfp5G3vZ9@T<iAy(VsM--CO&1>k&c
zyB1R*n@Tyy;kYHrjqC6X*`ndW;T<`!Fs1Ah?u#+8PW^cCaxEA0ow0RiD{07}E|D@g
zhu&301dr9IbOWPegH*<6ZSfS31yXF0SEL8~yz_g}dh3}cOPX8*MuI&&Ny>+z(;VXJ
zY?B$!m!m)M5k@7JNVbIT4}U5QiX-|m^wFFxCuoBYDB34g^+Jva-SJIWH{?y$tq%^8
z&?20Adymgg^;d$8bpXaS3V+WZT4?LefXDPGe^z~u+ePkM={ov45=(LgD?l_sT1tM)
zt6w8Rs+Vw=5qd2#m)b<@8=GS7g1UHt4sPy1u}aNw%vfRMmoKMrbD!LB?ik2F0O)>f
zH^3PC9fVoq$-AeLgPwzdhWK`N@4Z<8+=3<REeMyObW`d8_F$+_m$lY>Tk-iFO@s7M
zD<Wduwrhsg<wu8mVUVg%K`fq+N@RN%LGx`j-d9LAyB(6)MV%I%#Xm;`1)Tl@k;P4!
zO_bTO>{^1{_Zp!8GEC_cQWQLykdUit@ZgB?s5s(#sv@CL;V!bdQ$0%4f~?`wr}1yd
z{FbQ_`GGjGdY{TvzLs~*p|EDf;-yVgZ@wjY=$>fVuDiLRwk)MAM~ZMJcf{l!XBDkJ
zTEB5UUGt3InjH}1r2{il@JDS9_y2&w7%6_iQ;?Tys+w*YJCREtF|Q@f5%u}j=ilH8
zT3eevg}xe!;9AW0aCD5=C|3$mX58)O7(X>!Vq$y(Uii&TW3y#5A1BMj4_o|-a;1|X
zb}L%F-rQ4ZRcDS@j19h$CE~e~*2K@!n_-{H_Ytuq5l<@dmaz<A)b=$u3!8_;_<S_`
zctx`5sH}Q~ba9xr3$%!P%BDlPUO+0saH36_ZD4&tyBOEJo;~MLY`m6SWJBXXROT0f
zUzgJv>yma_O+z4F_I6#MSn+@ERU?zQBjRGA5sFZ8|I6<YCB`q0&)qH+9wvV3g;
zYn{|_n<T)TJifOTlXZZ2RdfV6rEH}~uX7g+%*b;)=~Rt9N;1H>Dq2G0sGuG_Fxs@G
zaek45{7YjxJt}e7bT^q=dSd3{*(=DabhCH^2J)JCPt;9z>(tk&5hocZDN-BC4oUNt
zPaT9g=Urg@(*`DAL~pF+L^5v%XA0pyzHVVRoLiLAmFB2vuBMkM_*?s5I;L<sc0Zs7
zdvh>o{bxcEyifDBW>b$Q`K(we70tT$d8V`n=|E~wwxZTZ;C;phditB{Ns$L1`Luh~
z24)4=btT?iMI~yFPTm!nlatkuxY8%|YxZVlOH*-X%2-5?gj=23(0ERm`t@USY2Pv@
zLF6eI;7NMSiez;ZxU5(wzx2OxDA9B88)5kN;c7-aT6V<nVq^9i1PS^i?gGnkjoN+T
zac=29eAK+}qLZ;|BY7VudTvT5<cM$`ZI!#*BdfXjzI#gO(w$raEtgmkZ3EeZLVI)R
zcy(PChUq^zO*I<WnP?A_?^3y@RqxS{mydSs&LQnrd_~TDP-4D2+oz5U?9)X0VxIHE
zG^>tW-u>T2DRMD<;)~c}gvmJ{JUqXvK%HorPrFq)jO%h(Q(I@V6rb;2JVKILrw6n9
z=Ty2&RMCZYk5V=*0SlR>fJvmBjnEyAsVa_qRsA#VO7pYNA1fDjVefj&z`Xo(tNO98
z@Y=l>L`YuXSqZ<=hxbPrIK=x0EEZTcB6oPi>4k}Ig1o1AdJwT6bq`hR7^QG4d+1ip
z_Hwl)67InoX|aMQZBeS37L5{&uitdd42ni)s^xs2lhtb(u`jmArqR=nqzM1~q0RyZ
zL>BE~hcG<F9TK2iC72>U^UkK|`n8od8OY`051rQ|{Ji^xM_u5IruX6p&xCihjSVhe
zP=7LGRFzR=uJ!A(4`SknPi)DlcS%^gls~~%CuAp1883x@TyY=&gct>jGlSI@1wxa#
ztKO^|bM*wf@>~lfxf2(#FuawSuye5NQ^P!B+yPNXw#wJ92|D%k6Fhh7X}>$@T^PCe
zhF#v6B8f{=m1oxQZgFv&R<H!11s76J>e5<J3c(mfvUhffathoKqWhWtDV)|qp9arj
z^e30mqCo3A9mXq%kgAz061s6eylF7;8FVO=sn*P~k{&K{PcyMnR)Xj6W_IDHJx(?p
z@rSCd#l_4zL{3(v>t`Xb#MUW3UaP0&)a1&IwFK9e20Xp+mKz^m);Mu*p4EaPyWV3V
zyX4!qGD5dLwK?y_{Lk+lr!*i#{*nn5T4JZiPKV}Zh!U~;!bYHOm`9`cQoimPmT?PS
znle}2>PdAzLrr@Xm4;+PN>1^}J9bDSPTYH%>3h4zgbo3vh_SPZ^!xmJwy+XL@(Vf(
z``%Q`&IM019490p=<%?@v#A|yaqJ19Ma#_jx3u=1+*!nFrZ!z)3j1w}f(7ORDN#Lc
zUJ41ID!yphpA}^4+#KxAJ+u|5d}&;rL`Ye$t2HCd8<SQ^S>(9e*N&*Td&buotJJhM
z`?)^W#1%&vcPfh)&|9-cJ{(Q_=RvZ=vrdWAIW~`8U-kI|F6n$ReL=KILHx&#D~Vkp
z7Mhn25eT|Gu2vN?w=q9|-ch&G&bw5hT*v6Syn9}CRkHVmKmlv%&rh_>QzJ(ahRoBy
zxT<zM+V;EX#LKpwlvn0wn=TlCIa300N@dZNv#i#id$O+`k^S@}5k|zlsm60OcWLG4
zJjz7*Ov;gzwZE(!ZQyUcy)ne3^Kd3ySd7mpbqC^LipC)Pdo5~p6l@?!n*76I=v+VA
zgbViD*?P{<jKN7ann6o_4R>xzMn+XoolT(MomSIvzE|>vGxMEK!o|ftN<<aiz|NmF
zKeFO@HT3P3S|2BeLplB7PpWYbK#cDQ+(5M!Xedv{w%zpldFL0q^v?CzAS$O(kMZ?H
zzxM;24$`LC>-1mZkMh1rt@x3Yf=V&x-m}ro$5y6W!~HSeT})2TzNTBLJnLD<{D*IX
zTs#@yll^DUT+~2>OZ-6qCWjlOyqqg0xT{=iIbtUvtB-?r)g{8QIKzt4MF+?sfcF@7
z(yxzLupU9tzb63iJAO_m47a-NyPdT*!FEdWFNh0v@c#0&@L`R}_Ecs&NwD&@e<~s4
z<RM6*8%w7dC%l;cp!7?3E)WY+qbpOU*)9Oja+hGGmb&i+D==%2scm;eYei6ppy*YJ
zXb4K6nV;9?5<EnLZ1AdXlW6<O(EQ5=TA#T*R|!>Gjsiah<<AJs62G=w)(SDpv1j=#
zFGBsz5hH;8mOI%Xa_~^D_h#np8ARPSPu0v()eaHaa*)#|{_4|L64bbBq;MInAxZ)V
zUu%1XmRs+^aq90?_=9sXhw;I-J|bja%}6^R5t_!^2YG;R-Qir%^lgyl<Uo~r+7mZi
zyZ-zW7Wh_XjVwuH5TmnuqJw0-j(GtMbKX~Fj`Qyy-$)R1&U;KMe!M*!!N41H;D2mb
zw`REi_zZ$CD4`@KG?{+3G=Prwy9?N81R_8My>VtB4+Xzon7|JR!XsG0c|D0@_+tk-
zi@{B_JyECl4j}><`0L=3#Kky2o2hVZ^PC?IGOC_Kh=6<ebV0}u=L_e{m~o6CT;m?X
zHEM`o_(LHubP~XI5;}j5+R{!uC0!BYGnJJAG#`ln53FWS{<V!9UjiW}{S*-d`vOWg
zHS-KOgnZCHMLyN|l(Dn@<rn{QmuJuTmAA!&`U<DV%2fWj#VK@o!e~*Pykz<H%s3~s
z^L_JsqO|tkgC_FUSoYkV#|A$LWVWZqdEOJfIX0Vidal><fK}pnypV&@j=|jV!hLcC
z5(SzrIiMC!_8>pX`~G9Oo{*3Gan-()@DI)#X)49eFKbkKp8N2{I>oCAyZsDGuv+-!
z+>jHu+Zk?ss)smC<mTAmh;sR<o{YDLyp_V?76(g(Qj-Ypg^c9$V@FL^w)k_G!Z??)
z#e3D-6)_EeTt%<o=}#m}2p^SqXUpizW6{q7B}i8&LwUw}gMIp%+qcqdnx;KW&t%k8
zjdzq!>7??%-Sx~QGqgfX6a1%;xmA7UFFy<nfNIIfsO5`^UW^5NX)Z^!%dXefl8$zg
zcn*g6<@d%gLL2&;9F<NzT$wa3<YWFn5SAPO-*2;^L%)H@S&f<~)ECRfZ~25Zi6308
zu*Y=Hp`2<KYzrn15L=HA4HENgo{z|Y_*((d(58Mr<kk4s)f4z5y}d0_vVy-AGSco)
z<Dbq>I?)>(*mnvzQsLJ&KmCb2ynh2DC)Mkv!p>{-FuU||LG&xcXK?ogLjS>$Ss>UH
zDg&#^rS`GP(j%X(c07fMpXC$k5?5|Kstb{2Fltmn5irxB(YTNBf~P462p^#ao2N-B
zIb`utF)XjbjSeNI_3dzI1WF$?vSC7=Vf5*QD6*fq#*hUBv#62j<5q+OyYXNGWY(8^
zz9ek4ZnU;1#3Yub?Jay5`|)|nqvN1GX2iqWqe7f>mT}VhiU!GN3j(XU%dA{|WXtJ#
zZx)s)R}{;!HshfD-vRw|*#igqBI$CKeKpEo+-V}?CzhDKxd>hD7x6<UQsjcwo-A2R
z?75TOL05*;Jvwpz&BVQFKi7~?{<#aoYlPNYyFY(hxU5nRPv&YIo6o|@I=8UBtV&U@
z4}MQbFBT8-euZNj9-!(PiOyYQ87;1Xxn}TacFac1r{BXylB6IvjVEb@bkKa^rIL@3
z)u8^k>kM%L$Xf&KAGV9mpWS(^6*3{hxw@zAsc)G+@4+-*{DFslB!jZ%C7bfaqgDDN
zgH=i|#mbS_d}3HD3DEeMjFmg5R<yy(U|PMIZ5z21O$%M-n;l5Ay6<8I1Q`F!=cjxl
zeBy^I!O{WJ_9waWqlDgNml{KW;6D|`gAS1o2v5@x)B$H)!Iab}cX1VKyx3D-ye}J9
zCACworiERxV3ileaIkmw^caeXi#O=r-8Iy3qZ+ZOE+$mFTT6KOwMWfG;Iy9w`ie{<
zwDc(o{NgRmQj{1o1}s&L;pYfJM4!ZwXfN46DQ|^rJe_o?;kB^CLvLo$AnwS+p&q&y
zKg=m``6R2~v18K}Wk&FlEledO(!=>siNTixt{bDTj6xbb^OvoYCC2N)2InYTwy!4N
z-aqV-K6qIM=g`SI?wky@)onOF1K<$b8Kt)^$6FP}Fqt|~vO$EBN0;K9nKIkGx}s|P
z^6Ol^yI|{S*;-3?jH-bwTG5pM`lU5J9n#hN43px1rQ5WU?u8oFo^EDhsKjNuMk$O8
zgEG4Cpi1R1%4)+r>|~0cZ?x47RdIABV=Z1Sjk>Er2*t{ylGa#YxB3VX4M!xVnY*%E
zi?}SZn2cv@?<xbYcXl|m_2?vIg?YiA%tPO8V!K9XWkX24!!#)<jGs=78Yk-%k#Qru
zgcqcNYuoqXo(a<WTz>jpc9?*OS|B|=thJu2zAk7UtvE9f8|G_zeFgCyGPuD|GA0@%
zTHP@FuUkvD_g%x+{X+CS<_4Eq7RL5+Jmk!NaliJkTQPaV?Exf_g)(~g{9M}D%oOmp
zCLvwnCXhmAHp@+4PY$<Fv6`J{Nx-teM5M*oiGv${;_ivDxn556n-WKRng<lB=hq@+
zesHr*4~SD4s8XO(SGnX#S3cm(APy-%HgX1fi;JpredbS2q9x`o2k#)ytxfQX?Rf}8
z4*H)7UevACp<$VQO)}426$a{>h9KfgRaI5haZirV)VM7#^H%?D3-0i?MS=PQT-=*I
z!YD1|<^=p?GJM%>*I&&)&X=iuh_bicVDa|)O}(Xo>e`0(_wM%b>m`Pw#*6FkGOuK{
z_ADCWqJ!UBLL~!{Df@Og;D&gdCNHrgt<}Wbh!bjSh4rsld%6U3l7<ct!u3JX*MH(z
ziOD-(kO+=;38_KI`qga#NEdn+>>EX>Cg4}NK`H~o<;Bi$NuG*V`@Rv|s#btM@l%2;
zpY=>8-4;1+99u^H$}wgw!9Rb-v$Hc!x=mwRVGx|iJ00pm*`7s-ph8&d(w5P${pKrz
z^Kz+iV{mz-7Y1^LK><(GQJaY$W_Y|E>ASnT-!L^bRglf0ujbQSW1J9GD<HwmrKXJl
zs2H369Bzpcr5k=a^>s8lM2~Om(X9w{e~ymj)rj+tedn9Qr;*EiyegyQZE*0WP1myI
z{Y|ul@_4-(MnEPxH@9Qfp)3#o`Xh~4C7G{ygt+g@ywi&N$r!w~zQ*;xU|fMs<23^8
zLxSd*ouo?8g(9STR5*dlvlD2nTy1`x<@aOX=<`|@5LbgP)WNBIr0#z~bDPPxck)K4
ztCH{bX20OsZ!-F;(@iGz0R&U$B<gE%&&wmfrqL7MsC_0w8*|Htq$|UrNuL-y#ZcNf
zFO0M2&QV|(T0Y?xvBL$FqxgzSf*@_(Ql#e;@_OAy9UL5(w7g)QvT?f*wM@99^^vZd
zl0^0dF5l4WWEYI9oG+oTaE@6fBZk4c>E;YEJPi%H`G+o8RM+c?&*H*YKl3g2REVn@
zmQ@7hgoO32pek(oOW#kpAKG&qG$~LRhI^BC%^s-6+~2(QwbY>N>ce@=gp4E^!=KrU
z;#DeC4EH2mAtNT>4EB{q3!GLx>{NQniedP%YE0=NyKmC9@MGZ;N_W>&@hMa+wCLtw
zr|pb;Y8EglD%IpqF3BXfA)m6o8a4c_qN1YhXJ2Mp2q|;Dl!Yt4f<?mR&+!Bb`&X%^
zg%$msi6Jgt!1XwNSb&UyUh%E^!t%>cuHd<yn21Lf0vR5^xv$Z2U2m^t7ST5#!)wNx
z=dz@E$zfKZXJjO^h;R_e$C6>i4Sy8Q*7<*@)?*mqmom4Bk?IOLBO#)e15tt{Uq=g^
z5>o!@uJobnlX_g$^c8evRKUe>s0hvfvnAL>jZe=TPLQs6Zk!T=*HQzFi_v&|J=ZWY
zwb%asS}EodkKXRkb+d&a%R=S1&pd+gDxG6VUV0Sh8(qhc`W5CU6+CC9-HEGzYSxzo
zW?zG2M-QUz0wPY<DgYtQ?-G2Akr#fL`C`IBL4g*CEV(Y(jFvjNywFXJ@(rJGUg(bj
ziN{OR$<+QHC#1-gal~wFZpLWyQmF~SIfdWu#_ID7uce@Gv4DD2_kmdkSYw;XaILAh
z(UsitrtZ6(K9wMPxO8TDR{S#xR~c=!AKC^>lw57ktb`S!oan@dRTs*E;<{8~VaSx^
zm*w>JJ&mGXULCmX@-`CWyRdY=A=Ki*<??M_6o-@L0&nslm#LBqBD&x>-!3TOe?9DX
z9yxM%3-FM9g_7izn{?Kd^61J>=Wd@s-^F&i;Hu&ZK6ocojd3NN&;JnQOCf_@=`AIa
zV}>hTb&2tmq8~Ft$T+wmRd_3FIL@{!K7jEflT&KKu7u%G`5GOhz#gjmaaCri1)GP$
zpmZ;KW(aDaguLHoGfc{x=l#+8-9onG^rb9zIUHQRb2+`IaF;Q5xGU|f)`A$^-0W@h
zN)?nE&gI%na)z{`NEqXEQjF^@>MTTor4aRJCbhWd?i5*qZfRg&B>5`F^}5Uq&Kzs=
ztF!Ril3g>&qCqb3_4Ul1ZXKGMu3!K8b_NY(1GC;@Td4Xrkn<ewc74W|nsE}|(-W_v
z8SXqdgGyAnruebuLDQ*qARJ;qH9fWnDjX9CFX<VI5z8t@o{M%xLTnCVsV?4>J}5-Y
zW^G`dpL%ZG68m6Ac=!}b_Z9vEGEy&CWu#tlLwboNvNipmc|SHbcGVli*pdZs)K(xX
zYlhKkQZ49^wAR&qHr20=bI|IvOa9hgN5TQgWt-vr+SbLPf;PyXF@y3g9(!VyCLCXS
zU?4|_=9CK|6|cZkiM0?n2>OwggB%g#0`RErnmeJ?;Q4SOIgwc^f@Pq0@Qw6)Lp_6w
z%Xh->Iu#=Wn&pSyN2BYQw{PDv@6W1lVV=qS+}MEwE{))eN<%;0jJen}3fg&!r)hJ(
zPjr}2@>C}L^5)w`hv2Y^0S8Hgle2RZb(Ar#ZSylm6HxF4E!bO`{dwJ(koo-BS0TMS
zF_ErkXy{x&FFC%ap1`LQQ109cJl=p~;n0=b$NXA%BSV(OmkRF~d>aEbqAi@6yL_&U
z*uCqGJRTP!_i}`0n6kL16oQGqW|F;y$(3Q|6opr5&=3?eK0Y35B?3<fQ@M>j`Bt8}
zRN$;d@9`P}R+ZW5=va_w5RragSh!TC&AXW2QL@1ZS$AJRc-Ey2THchYw^ze2BJlRq
z*8Gp2v^YRSq$Gq~!dB4}f3pA<@2`W%ZDAqVzX{`dI!}UQ7cBGCZn{%&sH8rN3v#g8
zC7(mjbvX7hsGo5Xwqfl1*T~ir;u}>nP*i>;j^X_UI_yfJw|E|iy;eez4Za^OQC4-)
zJ!~%dvj3C)E76)?2G{gFR|7T598%pj`_#i*lg~koI0P-l(iE<ADi0~&ETuoP?$Ejn
z);&2d+U*IZ%v{$T3FaHR+KgX8k=6?88bV{~{S~=_^>%S8lX)cFcq)wN6yELP7lOFC
zQG;2jPl)|p8goal?m;ftYp{fi3Cw^ah^zIye63t<L4FH6vNB`7vnR9C-&PjeZg`4!
zJol+tR-hZ4^$)we-J^f+aUv+MqNx^p^=+B?+a=apKUOOy!5D6PGH}jHZ%_K~L*Vl{
zEjHK7vbTBwzH3PW<9fcae~j(U<W}FCwNx5EyB9UzVlCH@&e4jtuJzsi%2dNTaLW!|
z8FdXugmZJF#56)@Xrsd?St$cALQaL{(5k=^W?WPAJJ;%h&;F;T@c;tyQ&L3e%2K9`
zF)CUge;=H8#Yx9&F+?HDO@b1roevxi`{pX)E&Xp;=@p}7KNfv$MIZN6ShHt;%B&j&
zIAk{-^M#;`le#)mm7>4-Q_0=sVfGPdSp)C=-818FoI|%MM=#!|yk6&+v&OV0|5Z)P
zXQ^o5lry(hahBaUm<m(}>2(OkWwbb$50%$t_igss{i3$R!<x7WU86n&YaDy6wm{&x
zY6NHlT{uhC?`^%MSt01fG-<v@SqvK#)a4F!UqW%1{E%SlR|K{f!;oXczMd?}{Wg%R
zmn4m_chk`neMmp<X_sk#*0S!>+?R-xe9xPcoVu^?005f1I0&N1yOeLq*;TkXO-+Ys
zAxBGo&w8(J>13c7;yRR#nd~2`Km7g$dQ=ULb&{hMk}kOnEp~6`CR=SBrb$HIYX7_r
znh{uYIarffYw}zy3kHU>e?Ug#%aS|_6fss`#u`VLIL%@zjup4DoQLyOi*;V=SH|G;
zx3RT|4fjpMHCpGpQ?D^r4m5I3&qwU$w$!<*TjpV2>2f$gyFo?!a>#+{{IGoge0ze{
zT{hQ<1j+bZ`P#5zdpIC~qFs6B)M_S|_sjz<E?*MKIGxUR`{xS}{>@vFfnt|$MQs7I
zLKem_^1x4q60@S(WmJ_FXxp#*fKk^6)`}Yp-4v)v@Dg39<F2stn5qxFs{2Hr%1yK>
zr{Wj24gPfe5RBqd$_jC8omF{#vCa;d(po_)W+L1M7#<fnCU9yCw3eneAds}v89%!R
z_NpvZ-g1Xe=S*LNOp<4mqP6{>%Cj@SYmTaR)}@cu?R@yPIuV$*tEYYw*y*qFaJbu)
zJHuoQ0^nuc&~(Apd@>82C@uZEro_$tv?+`#PNtbi*k5{Q$>z8tf)STT-VFJx1<$$t
z`pp9i(YTnxmpDgt=ImZR!|l^VY$^0!8iAD?b-Z2YTv7|x%PT4f3ydCd*|#lF>AI$5
zOYwDN7t90q8y0ww$_3nt%OU8?nx(<#FCPr&Af(pADPlIGWM244TFp%8I}`d<%lhc3
z`@y?(HT~`1*)OfU_M-|v*^g`(g)jbWxY--VmxjBWqtCgv{CwMc8rK%HLVw;?yGu>(
zqlo`?-{|&2tt$5-0$nGAPfzLfRz8PPaJ4o+XC@qhAmh7TtMz`kYLDh;VkV0R^Ljf|
z<k}#4fjp;XvtP>u^pj^N7pEI8R@j1R^Pfc`-4_OsLef)ToP{nqhy}f$=qPd$`)feA
zi@v+Y3WZFBkN1+**yn#Hk}!ljr8>8WG(gWJaIJTq33<Hyt?#JS3BY7-*##ME>`D~W
z*<Z9?hZ2ANfWyQqzR@Yy1}g%cJD+P^Ai?;S1rmcEFoH-r%3$s^IPWdIyeV~|PKj^v
zJ%O(Gv2Uv0B{dRC-8lH1l!Pa|S+P6o^T7)adzqv7TGpg~Hc%XJaejpDmYS5x7;H}}
zQ!W}qQ6rw$o}4w<-P<$nL3eaywtULVtJiDHt={G#zJ!)QS9ZVFM<l;8nHZ=$XO}Ag
z*a>ib^VHLvBDb*QEe~BIJ;v4isp9TQzav&?u<PC0*s-IO7PIr<hq(mPUuV!agh}H*
zk~ly<>Tq_X4_2+%3x=Ow(b5RpXmf<N_)RVqOtr>*=a%$%)cTRo=D8rf&wf4};%nGC
zP`y>O(pfa1Yl#@S?R46GR_acM_iB`b9e`%aaihGvycMuvvn%?AU%^ylJG<ldvAJOH
z5tUXz^+=w9-2L|UYC@X<!L`;(0X``3yjZ-$S4F+wR=;rm??-z^QWRPk;c~p;M+`!^
zh`<dQ8>-eh2Lnls>#j6GQPZcQ`1EXy-rw(!f{2q&bVmip&n<uxpHyO)Y@Q#v|Ef7U
zln)Lfp{EPQBK0h6BLub`MI|kY@EcY2YI|+=O^HG~&*ti)GbN-s^bcR}AWXpUczTJw
z_U5olS6+Yontto;jhi>0juDv-!Gf(DD0T&dWR%Euu?<JVK^`$c!BDjuvU;Hln`~D+
zV^=6`AGd)BhubM6fybiC(6kJBNd%B%^uV3rwV2ZD{UuDnNGKPxuKo1Rf{7UP@U9Ag
zBFr#;3S~3@Dl7=)jc(J*47`>&ej8-XAAH$MS`X*+4vn81CP7lmC6~p)H(p)DF;i?E
zF`3T)c#$uHRGuo-wJ4F#HhXDtk(<*SIUcm?K+R<FV4>zPa|lxJLfZ|Qb>J3rF<nej
z1LaqmmNh%N?%OO47fr1T$xoUdAK~KHPuj^bSQw!)!(%>^*)|TZyQ{}ZXJjF77W13z
zZ`p^_GV1AxxubZr-q*FhA2~bf5=+upULU99Z`$408&r8&Xm_i15~Z2mkn>gR0g^}N
z$={Hk)h@%*ngr%b<M~0yxpgE%^JK|jRD$KhQE1qA^j&l{X#KuFkM_=gm*iW#P2s_=
z5`UkvpsTcGVeQ~~Nu9C54jdA*4vo!MS@j>xNOh5msoY=tx&t(|#Qy4+yjN%6E@kIX
z&-k5lt<#G*wlYw&CND{gXe9Uzq$f@I9C;Nu%qbRF^l@HtUC}Q9Ggu4*sW8;2Z-*X)
z@!)42La{FBuou7YdAKt_Rj@d#T(rh?D&uXGyv<DvdT?c_Z~{)PZiDJf_pA9UKi^eG
zZ+rg>t+mXjkWxp6Ask&eCr9n&>kDC!gGBI?UIT295VxZOTh9Rhn;Dv&i_>Q1$3U3j
z@!yUC*ZP0{HklgJeBua@8nan>dhiN;n1V2bT6$=MYQV{0N6Fh!yZ%}DK5|45w5xxB
z-tX&vR<gA;A=uBzqT4HT{KdKWX}252QB1zA@uEFXXoTCFA4cGzVlji;6mfI?9tf_|
zpoVPG*?{rYbeh;MNDjM(VO+y6y~D?(Gt~OVX2lh*g6L0(E?x56aZjZb#8<?oRQt)c
zFmkeNu1ooNjfX4XAwAinsVtVO(34##yh{YO89Q8se){2Urb$*BlHNojXf7EhSsS7?
zU$W}|SChklWreQ~8E$vK2qW}jN--O|5`naXI^7c!wo%*~WJ29tUC*rymD6c)a|y5+
zZ|2ZhIFEb>95w_7(MWfAkMVDJ2>qF)q~!a8f=gEKDk{W>KE<kv?`V-TsAb4%Nhqje
zS3ZcmlEW&H<MQH|ubIqvU9Po^XA~?<JnmLCI}#~r*x}|h$^Tae_6I=uIBw~^bPRt=
zpH3%5=^n$M1Q%JaGESy8K0mh*gtWUCziq#A8i2tO|L=w~{SR=34b_&r55N`N$uj9m
zEX|2td|X=sv-8Q7ff<AR9u>8JA(ce4(mC}D4mWI>{LFja$3VPj+^x4)V>Aj(NNbdh
zl<u_v^rw_~fd`qC=~sO{)2R~{X?qX^bSUhOd;Be=weLvJBwG?9FnZ&fAz^@G65`V?
z%}b{+;is6=Z!ko4tv+0ttTWl)Uj1}<r^&G30^*C3vA~#}u@a~o*t%gCz6!7L4;sI0
zqz$EJ!74LY-@`BfCR~lcj$?9z>ftg@`?Nw<uYW8pWvj*if&V&<5G<38S)owu7|U#z
zt5p&R$T$xd_titcA$V6rgxm3AvgaWnHq{=Oy$_@z5j7asJ_Y+@<l`dJ?6o{oc&~Q+
zX<hg<n@a`=nw!t!@=*Djq0Dsx5GeVFI4)XZLNdUs4+9t`oImwW<PQo;O#l$4z{esX
z=1C1m=Y`=h^ZgeD9Rt9XWqTt52Z71!t1ZxPW}tf)ynX%{{KSR1J1a*$q#5@gXjwo8
z5Js+8nc-hd$i;S5YO4c<EZA3RQtQe)(6fOQYX#m1?vAjJt`bQkB?MX{BPGFr)Vk?~
zLzYYZ8xR}ZqBRI{fv$hR%8n6ieI7LiTO%m*Cgk`3f<3F!__4_Nd%*4UakKtcNuP}S
z_l?fFY=V;~5PgiGDmWgfSZA}!>=#D~XS2#p?#w3%BZpO;llJx$VdjfbOaF6BW+A}B
zdiUG^B9QMTwgOUjV3x&pygEiLtf_GL7}YK)66&F!ajMKd`wgJ%vQB`XAohNxYkmyB
zf$aSkTD6uqS4oj|;v7=RH>Z0HcUz+?i}llPVG0wGi5m0!4`3z4#N?k#hziL~tjp1a
z=PXiv^JE#m&GR>Q3vf1b*5tWm=dVfoZ7&qQ-O-Z3;Grvx9{6c8PSV1B94~vvgU{$&
zh$kK{^cZJ#gyq@Y9yNGR5KI)tCI|u73WqXz3I*2WYkMe-2MPVekje9Nku&Yy|Kshe
z!>aDKZUIrcTSBB$l$4YgDM19KK~lO^q$QP*RslglKu{^^P(VTir5i*fB@~cS1PSk4
z8+^|>-#PcY-+i9@-2dLkbJ+X$TYIg!<{V?pF^>mgT{=t}R>Lj7<Fm|0oF4@6AW0k#
zijl0?2(kgrL7rSOx1W~GQVPekDJWlo6$5A5Z>UuN3dQO@u~znx7^lfqgy)h<HF84y
zc&H49gjpqEZE5Mr9vgQ>2ZC-{eM5ou27*Czt_Q0y8&x<ZGl<wksnA+rpk60(gEG5#
zG~uMNbB!|6Y4edO>_LCPOxE6zy*9`|wYy&g%OY$P)@}uiaUO%ThZB);*8ZS6avT^r
zo$&#(4-HpWA$iQLl71T<3c;p6H*E|G%5*Dm$nC);Y;*zM#D(Z+38{@sC@FJpmzMzI
zztbPor-(VWS6!VZU!mS6?}G~=6@5gCsw3RzrV)oB?vLXmV6X;mM;un1F&Hjr2~De0
zg4)EGL|7SzU;l+DGcz-!kM1XKqE5d!3$%#QFBIzg=Ea=gYiig52<5~XLm430K2I~N
z69>yP1%3PWP3wEObh8}(CSpp2_b#M}5@2_EKC{@byG~PdCz46t=)Bj6@0E5MM+9Et
z6X5Vg96_UCcF>Wlo7WD8F)tY925p1u0nljlHwA=1c}%$tA)A@2(pLe|mC@+jzqg#(
z{;+(5GspgVEd-A^?%n($ERarR9~ojIu&XvV#9jv1G;t{X^RBq^k_HV@k~g@(Gb=@d
zZ$Th~x02%UN`n059a2W|C<d?BQpDA0;k^{(jr}t^Of^Rrt&ZTNQGps<UMT4GYdJ*8
zk)HI;-tzDFXaLvsxOa2w9e4`S<aU%nr&rIW9pl1bA;R_J1~zf+$O@>ZI`+wkjZY!N
zNF$UES>g=c=ZEiG0Ownavd6|TMd;}YPD%&`x-BJ7CuKUblXLrrpBh#ED4x_^bSOw|
zRoZ*a3b3J_BCfrlDXjaMDA))q!z0F@`dOA;4acP6!V_{kSvW}Vg@-nx?gZh=Nb@)O
zbE(IU9M?K8pSI3c+1@O>oK3sh*<7$I@FG<ZpF`a|eKTP$26}eI0IU`=Gy|Ft5>B2V
zM0N%oYGW(ix@q=q{O*sq3NP}b_+Iauam%5n7Fds(tGDSwb*NSpzlY1Yp2wv!1PRQ;
zI~4&i<Sxf6zxSBx)v2h0&jB!lc+sK9w)A6stMNc$*AhP7u^p@F7nhv+`Nlr@WI-{b
z#bfHXa-&%3ccpIEPsPrS3{CNGKyJTyaRRsI_jO4}^qxlWo^n|oV~LU_=SXrsGw#eG
zFzFpu5DbV@W^t$8vr4aazlw#^a5&FWX85%IpzTV2iGgGAt!neLA|r&iQieUO0z|&X
z4pMy7DiSez%CJ!I_1-3%3&ptDlVd$nd~eT|ogek#q|NqdN&Wclwb}+{9b+N)z_8w$
zVQJmHk*S}6CJOl#s!b>DkA+r^IRKHpPb8;^ttEsdm7?It3N==1pz+AyTQwZhQYy!D
z-zemd&`R+|u)++n54uxYzYeqcJ4TjYa`C9X#57o4cXvExiF^ET%_UNNFY&2;GSCR;
z^Vx!#C0|Dbuy*}?-YADJ8iAOeb0*JH=y4KoWq2L#Euj9h02t;+XLG0jVUs@Pie)?B
zVu^TFnPCbC9baAvTgA&Vi#ENSlBo<I8>fN3XFkKal=6HYvjJoVEGHf(OihAdti3B$
zS|R{a-8p2q(Jd7H{qbgjz%0q<p{nVoW#zPvaJX8l@UNhg8}ykVj*PR*K${3Xm!|eD
z3pB_$h>PMm*F=K&Ma?M|8h1X<n(QUT$_0vyuN1!IE8=%3<sc3E-R9%!>S~d+nZxp1
z?QoOp%Q$_kg`!JjEk1r+bBZmoMuX*V3!@OHdOcn9A_g$BI|8hahRCl^xl(1T12%=5
z@ks`_0SqN5-FbI3$U&%>$NIQXIX>m*-cMq#L%uHAcn$iFRcRG|E-JrLD&z+G9{&|`
z<hBc06Z7;h8^fu}9Y*a~vaefd<aLZ6lA7V$kUSws)#Mw~2KvlEc}v{jfb<8@3dfp%
zrdxs1kvorOKUnhP*5vzF&r3kO*7^G$E*(>>%sE1$l^hu;A+O66DxZm~7+C81^M2@8
zid9OnxYpxZw;`TfcyyWqyO2<X6fvKwcjN;C>f-BytaIib8=m`+kO*jpC0a}&!ExGO
zK=PF(>{CU?Z*Dg`a<E!%f-~+#J*lef#4|N*&x_tj;3Q0W!ra#ko(^J}ahXtAbTnAI
z@d35aV!!D;^b&l2*fgu*ZF2D;>Oo%0w{z)JaxcUAUc;@zm*HLl<SVK0STRrt)BtcY
zs<2SKUWDdN%ReMIV4RGO4^fTN0SvM%Grf4WKJp1{KUkp{6r%G3>1wP8(Kw=*+U(P~
zM(Xl5lL=!fodsnDKd=i1G(k2Hk_4iJf!O;Zs0gtN4#dbZj)vf4Yd#q&0)ZEKw^wWc
z^Czc|yudl>)f<;PB%nU%kIm7c$F=yex7o0a(+rXD;3)q6=H<BKXWPND;w8=Hko6+y
z#fRqvpaa7_X}CB)H^i+8UAYP}ZWKMc&w4z`PT6cd|CEMb?^Y#iXR?S2qr+kcPf+h%
zL@Hotp_?6cCwUe<>~}^^tk5c|2Ww)s8eo&Wbv)&%OIARda=uA8$VJ}2QluK6U%EUR
z`oQV3mhW|f`<<3-{tw%dM(%QI9j)s<3M^H74q~$cZE#Z7h&mpLeTdo5_JWQ;;bVto
z+Im4ygN@>^OS2Qyp4@zLA6+QxFxhi>Brv$+wV}XV$fqFS2IVw*{~@O#kx$pDzgOwq
zy56)e^^owPtb6TCvMm6r4@hR+F#+TMh+0L0et;m$GR&s^`#~*1_akLf5VRRBkeSJN
zkUoX&nam126M@9g=sA?;=$HEP;={cRY0j&NZF^aQtjGQhxg+@R&iC9VqNAlzu1C_2
z7F_xO+5hg(0l4b8%c<UY!L2&W#Z25%mcWYr`{~M0AR-uYA?x;M$F(|Aw&Mp~<g?HL
zUk(OBCt`){aCzoX-tNW$9L&{4OXJ9&-@iZT?Ck75@^WnJwH@NY#-9*wqbpi89<n?t
zi%Ys~1mtysDi=Zs$PKDJ3K%4>WkFG*k?xU;F}wq$9MYeJUsKcmn6g4nrGEX?S}_3*
zxt7X-q@de{g2B&(T;A^ovB!Wy;AX*XN^3vvz(6E+fD@w1k1ZHb=r~iTL5``ijOv&X
zJ?*80(I7PC6*bVS0Qt^-@{TW<aCfO!np30n3|~}LUR|!=T;ih-<ebK<eWxLjqQx-+
zzLnKJqRMF_WV7Sxc;BESZcUl@{XG<lR6_$RY#%tj54*dn=jfnR??_^61hCAd3z%=>
zm<f8Y_Kl*B*V9e;RajsWn1*58z)2_}%O7@e_!op7A(u#O5ng%xmW3EOtG{EQBE>SK
zw%(8+qlK9`j8qY~v;T|AA`rVA+e_?G?rk9)Rg%uE@Jc8NTunkONO5U}Or5ySKOj$w
z@0w%o%;Lekqg4cwMX1Z!dzyhPVhMXBOg<8o{<LLWuYsSm`SEqYrYW4_`2Coe7^_K0
zFj7?IrzBpH*8wBz2*mrd2K8Zllosvciq6n2Jxd@VB41(Jsj+%bDFR~^R-(qnOv6NY
z34=DMCUCm_k8N5ttj)A_%=bLGDNJCDR@o{TwgK1Zd2Wb0WcyXZLPlF&N*1DLb(3h1
zQDeJ;K?9lRwYIaT7%O(Tk=W>Qt|<}4%J*NSZSYs0;3T+jlP$o1)*@VH1O(3vsMwcO
zWWY%~Byh(^1@XhoP1Fpx0+ZNmduc$LKm#?X?zct*s4xI<pE(OPma-_mp_i5F`k{+Z
z+`#`Fc3QsvdZ5z9WqefRhdf||NluqOpMM2q$#XV%^;Q#lm^Gah4mHM|M;~sz!0c|k
z7WN6r`poB^xW9h=I)5UGXz8wmWQo;7c*%<0!t$RRhiQTAN??$1c{J!^?x{>LyqU1D
z3QmfSPc<LARI%8K($9;-#XyTn@ruCQ(u>YpPm%WGIz9~X!ZV;onLiio?Q-9fFL1Ca
zq-{DMG2HQ7Q=ucm6{RRz<rgPo6mMTq5Zy<ISV=I&L)G8KLzpWrW=#}W0mG7g0(s*V
z?3BCTjIb4L#t|5j!|*jU+U$!kj%g^9ramkwxSbyyVHRor5yt6D=HcRhoxdW(Iw9$I
z5q@j)i_|y_sGI=qy%<a5oM!=mJ4uvbz`~3BXMe=^Vz@5%3F#Q(FqK^NiNETvuN?iy
zGnf<s@<N-=`{f0~U@lE{N8NbST<FdS57zk%?n6kF<xv9J!wIA-U<&9LL24e1Fryhq
zl=D+L;^ftsQi@$*EnV0rtlPKT>ClD4j8gTH#ez4S&Swr|cQqVFv@H)|P!vVCM}tCr
zn?c~ytZF0`P>(n(GDyN$hYI5;$;aPN2zwr~e*xa8%368`FIxqn0h?B-mK_0W>IcBu
z-P~Q6fi8+fkcLoAegL|t1MGzCD<CDyN?CtlicmA}xs()Q9~j)%0j>%*pVz_J;uV9K
z!~GASXKjD_(8Z{4YlP^-X7mR8<1Q5mLnH`?b3u)6OTy67yY#RX@yald{uNc)aJvzi
zc~2+q)IiU=4R~%7WvE_yKP3?amMRi{jw9gh(?%@2`@OHDZl|s80SHYKnbkkW{DGpt
zGrkA8pjrG3H~U??1Lh+SkqwIsSbbYJ$1qC|wx@`-F}ZjDW!=(S0V*eFmTXL$%w2>Y
zs3zi+t3VJ^vL`Nz36#KMCH2(^B$6-r_GNilLRk47Abk}6zcwRrdsPZ`D!+h2Kn!TW
z`|(|BpeVeT>}~%zntQweT|*Y;N3<zOm&I9p=8s<ot5p)aq;NmHV}2`vGop0nI|GZd
zPOuQY^6Af>aqomZ|E{!`v9ztQmzW<`_BNy&jzc&B<~aNStPyMZaxZO&D(lDK+HcBA
zkDybgwjCHK!W&G`+=UpXlQ7V?4t^U?!fn8%?U1QyqsLLe6Tf@k0D}lp*R6^~^+z%E
zry9Y+glZO0{_^S$V4^#Yfsj>a{6gENv6}=x+KVGlSE6F3;&geCvKT-a*WFJ4YmA{+
z#K~!k0N(39LUS7Cfy^#(%bV0UyJ`J7aRkVvmu@?t-<3dC%Y+~8jlmgjt_M*WJHQ~8
zB0S<Hu=X9$N2NXA1pls6CfGGg-5^y!H0^36t6m3tLl6b5n>TNACAHo5nmO9odl_eB
z>Mx(zz&Y|mfA$FoT0F_M&9w$8Rg)WNYu{e$PX9~UKT}=M+Gz{6Yh(W9m%=rvx?^mT
zKHu$1p`Byo%^`m^<29^+<dQm<Pp{m#J^_LE2Jcr(s^5@_XEIE7;FS6P<k;@J9dJPA
z<@abm)k2*n$W<A_<i9y-ZQWbK?ucV5SZt`yu<%>2rO??Et`3WQ*W$Od9{Q7LvdfFD
zWf<?{whh+ACxzdL7N`$4kC*+5_lE?=2g;K#KTIeS^P<8g$j&4lCBP+3)W3cEj98k&
zDUjGrfY727FXHCrbo52~-fK88C&$}@NqyVP116s*{?Pr!gpa#rzG1WnmDRgr&{kJ$
z9ZbhTy=4PVwOgp&vBGJEd~XK{&vjnAvy)#~Lv}3wU}JKpU>aGG;#HE*hJ0{Wdgc#h
z*%lxw!T~V;`STe#MVVJhRzV~|J;2aD%+G)CDYJ0shpO_&_Fv7)gzUq|?E<C-fz#Py
z3wRB)Uar2qR4OF3N6I4eVBxBRjJgWu*Fqi^jW<*xJ5m<Wd0Qu<6~GS=9WFS<0{o>F
zR{4qc`n&|{8TkL25m;Ftc?a5FYpc-9+16irvuA;RlYlAdPT6c+o9s=v!4JVS+yBZ1
z;8od$_;M=V734#MF0XHB(&!%RQVke~4qOmof~KLNcIXniH2k8%(p9s}8mFOa=l$cI
zh)?G~aj#bQWE)NEPnG87@jq(#)y(Mn0I?|KPe1}gX#14bBg@MgSS4kfv5k$5r&USr
z2we^gz}4xjS|hun6p5-jSA5eD;jfX=?e*(#sfMBc=~ri!I>b?hf9v<{{K^yKUC5;6
zj?J@k0oF%G!F4{7%*Cvu(5y}_ppk%^dXLJ{dl8H&x8!*UtM#SMs<z^~`gHt5z%csI
z<y8)``bzg+<~5I}Cahm+mMWLp%5BoEq0ro#NF2#r|NU)w&n3OD{}`wiT3lC{9%nu%
zKPB|IVSTy3#kPPs?n7C_8F~l73zGXaDJ6Ea$cvQzABj3oq6@`+-$txCvBEZkdI$fG
zCHXs61W*eQu!<8uQAh?>_Z1U(svN#J33?wPDL?R%r-_Mp$69$<k?tjlzDqzfAw@&`
zYlV1-Bh~Z6;P||*b-L`ZqT(ZP(Y%;E(eY@*ZSmQo>!4bqO%<_dyftj>bcJ85=HBK2
zb6rDJb!Jtxzbgna?BB+Ju>s*XqmUX>w;@%Z(b{ol(dmNENcG=hMCX!GfHp>)ERNAr
zbK4o(COcd?h)`VQUkIy%=bVzw6HTgW-6j>-g_Mihpu^VzMk)-siIC5=!|FPcCCfh-
z`5$N-!1wOUtnHa?!;H)BGcZ`-B*=n|tN2yF@xP{L_nJ-JY9RAB-$Oj4+pF(O8DT7h
zu0I7A&wn9Yb&Q(bB-x`D1JO^FAwbRqHkF~S+2t90m3eppn-Af@^?g%Y;;!s|I#|9h
zk9>+SejRCE)1fP_OxQLPZ!aQjkrlUWr-e6C<U!M!o+RCOi(=J^w!;aXokU~vMprAe
z+$IaX7NPH^Y+#P;^hBV-OPJJMTvX>i170qj<1l<^3r<M>cP$HPFH^k_&edxQq~7sL
zdbCqRw@QTy>crj_mP`I(2=|^8{IaplvDeYqNL(Y}l`MzudAmoD0XJ^wJggmb(Rkcm
ze?87DIcN?mG802v{Jz=>k_&>jy~!SV`i0_a*CJM@Tj<zsyuCv+88WjJGZFnlsby&|
zNG{|+-Z1>PyrC^p%wyOhEa*b4>>kn*f?xSltt9HZhW--cex)B^eA<<+bJG{5JfG;7
zzjZA)xKCGy+m%fm`OfK5a%$4XZ3Zv<pC@+_q}$B|`-14B@gHVE(hF!wOfGT&pq{M`
zPF5QG@az+-AYFbwAuFXBSk>EkZKvhq300k3tI+p;lP~a38V7Y3{e+Ld7TMU|z?^DF
z34Z?=7TFP8k?F>1@CwLGJ`><y<q!8_%0xB;P1n6cpeD@z;8fair}dpTOv5=b;t5lG
zQUZ2b0z^ZOknoK>-XNL!x!V0UWy&-C!R2xX-M>yJ8GoFioh%c&w!H%Jo_;SRaN+7m
zt;TPZ+4O57eYo#1(1VI1@tnWHkNQ69YKF5Yc~bFD!d;E!h3AG9Qnr#{b@3k>$y&;b
z97JuPymSaQCcRSq%KghF7`F7F<jQn&v`B+GXX9?eN82|a{$f{&5#Z-8T?{t1>S#F@
zz|?tv8z!C5CccC&+72+@ynOn11Ku&wOSWIuAo|_v>RI^uRaj!o>k7|FwPZ9Qfck6Z
z%yP~8h4rC;-Ju>6Wt(~A3wPUgn}+#(PiXWwBr9fD{G!b$8aPd122aZh!`<wuvUbIL
z0Rp?*b-#V*dm~T0dUqLoqb~k@dd<X!;fPQg!8Yw^#l2R!-hH+Jv+puj(=MGYfcnLO
zlji<@Zj02gs)L-@q-~&hVC-S#5iol_`tE#jzM$}e63hK#CV=%@f`{Xa`4^2o?xU5`
zVL%{&$H_?%t9RtUH>a#@T=spKe;+kUbwC2fOiPw#Yzr7@U8b?wjCcd6N2F!Wcx|M{
z)7gffv`zwQk|FsNzxpU~^?bfOFZ=JMvdA(Cr{R;;?`>Q0?IOofc8KN>kk+!6E+B8E
zm1B5Fp%`yaiyRtczQf(#x%1X+N2%wvs0*~g^yGbq>bx6R?D2)quJ4r-D!lOR?R?D5
zs+~z*%ij)*ulz_m5*Emh&BFIpO#tQ&2^uGUBh{P#1OgB(`j(JL^D36nPQ8rp&7bOa
zW`x>0i|3ke9w`mK^OeYEMKbkRO?icz|K;6*jl-_b1Ru4j{X$QPZj9OCq-6~^BJW?1
zX#1WhMbgq#H8DXGGum?;iu-xnt<gk-@}pZRO?FvDmbf+4QetOKdQZz+zmpYQcF;Rq
z18T$1Eiuf-s%c0L(PTQt$~d~FcUtIzB9YC=j8YN4O;f@EFWV$!1ebs8T#1zxKQ?M<
z?Cx?|dSC;*3MSP*g;s2uRJxeRJ-D4jW|N>7wQH{I=s#BDowKf6PI`Po_sJV*N5#T<
zkEoQ|zXcHIV$?hw2WiQscwI6`gfoni<_mPNtBo|DFyYp))>AtPjZQ>U7}nmdxQJH+
zt8O#XnxM>5bLKz@al10VRpd4JDn?@G1%S5hP1yIgkYAv}TVL@|@YY^JLjSp2BnH@r
zS-Y`<JemvrTtB}(^%!oT&Dqa_m7~3j;7e-*4h?qcDpHy=v_`4^Z(wMf{2A=E<GXTB
zHe?w6&RGB*_DfrUmd5@%AO9D=7w<JL6F@k0DCFz4XHG;xop(tQdU}*PofPyje-tuJ
zv0ozsYYF7-(6@#vLfp@XPa(bXT4pSe;<`L<UGO*0)i*Cgj!e_pltAZQw2$j9AVS+n
z<=JW?21xe8$L>0Q;XizwB382G^{1$$jz9CtfHgfLFxsrxaEvw@Fg;(y;ARB{DmN%o
zd13&hVkn+hqW4St{X!CI^Z&k(Jc5uu(O0-oLpuK7)R6D6kSI+#3D(Vv1QW84$DP8#
zDE1kMx<PC}$O=n%)vo!K#kBsb=)j~?bA2XgcibSK|MKNjYHI3aGN3Rw`nyN8A>}W~
z?t3%{CHHB7HtI8Z5QBsiDt{_?Tx=mu(hdU&ryaFQ3Q#lny#jbbKAm)5WHLB;x8iYd
z;M@exLER*8X7>2~%(V=YyNZaH#?;f-cQu_0j+-9_3*^C-rwOx^04S8ZK!>;uqzItv
zzRsY{2<ItC6n3v!oGaI{o3I>Fn4hFy8eR-*44bK*RnTwg`}RPd{zD3O<y*C1=jZ9u
zJ()rSkKuyfr47sna#oH$Y7LQ!=BU!nuSv#y`Umph(v!WuRbw)Px)k8b(^7;x;-Zx(
zV+5EOU$2i0L^tvn#RN_q)ZQ{rf^mgEoKD}cA_zCAben9<R3?|d<5o0z3cJ}XHZ{Z^
zYJV>L?>~PszW-v~vqC1nR%!L-5`a1+0O~y0|K<NQ9h?fsEfE~HFFOZ8I0g5v!-zuQ
zY9;&wrcXL(pD8-rt%iYkuj8LMSUV&tln&=T8ICJ__(HoI9Y7e1JJjx}s}hGVWD!9o
z=YR-T=M4uUQq;D?lnKHK%!Q4>D^g2BVz!|3K~y56K%o;$pE+z|yQ2|A^#z=f(^Igv
z;v9a8eeyz3?i~4%|5DG96Ic;GdN6r{b;5dhi+5gX+LiE;Djh_)0qFSAsV@A|jOjH9
zJ9)6bL1}`+5oh}9@Dm3P^Kk`6!A^C7Oi_cM$$>S8FZ&M`nz@3tm^k$8ER?<+Mk)-%
zj|1EMC>^EAoV2jpUPJ!{f=vK1J%C4bqH)8=kN$PB-4Vxe=!F>Ewu<%ZBkJ_Pk7mWA
zlXmYI=D;@L;G_ZZ@%P3*(Dp2rw->G4_zP=wKx9~#2z#oMR_*@kfwBWNKo;Nv_8k5B
zd&#>il1OlKAe(2hr2YGoFd!@gQTzQHNt+tUKoc?pa3huGf@u<w1Yk7IMgko=vj5Rv
zxN&q34cT6MdD$y*hvf4%XvEi2uZV4f+^C0F5?al0ZUIi4jszvpvNj>CM-4q*G*Tq@
z+JJRPIBtQ5xnY{fkyfLx{6IT0HzD}<Uh&<j4Ka8NxtYL!m1%eZCeaQv^AJ<weMFn2
zW?%D=hY^RLF-jwFSsK`c&bkS)KOa^>6zGYCcTyEeKCqeITY#df&a@hD+JU3UYt<4J
z8%6GNj|*1??jYVOP<{|8rAua$JT~aCyMp8nunL6Poakxx{7_;&NgQXDgJF)b@$Xaq
zt$_H1@0IO7yx)-znkRpYrCAY4&~uPF{W<MGBVRvfOyUERsM?2%uY7bk6pfbu9a|8%
zGd_UfW5SDoo71uf%E3P`vF!`7s&|ZBt1;ND7vsMii95u1SDiLjCT-@sGkCw!pi6~y
z;!F^jZ8kB1aal$NzSrJ15fOB80ArL$W?!mroMi_cKy&2Q|9V8M;-YK|Yu4^03tOw*
z@ln!8xBsT5`QFafY!j6xF^N+B`nZ_^VAfe-0NncceTYii2X4Fm0Jnc`H{moiat!cQ
zN%TPHL_i+lGB%uSgZHe)wP$k)Qbz4(7f5`;An{sDpDf7v_<{X3efeam8S{Z*MSyf-
z;A0etA}kE0q@)@({jHM%nY;7~jI7b{hsEIZIR|`bx#cPQ0fQhV*2ImVm6?q^dVJac
zXSEIZ|D%SQ%dujT6sRZl2j*<1|KU`uwptRFBQfA`IN!sSR^FYK{{42xW&GjQ#XqtM
z&kYN5Tp&4%kP)_fQal~@N>?}=hm)5YkbYq#j5a>LzOv=?rD&zH?qSX^_dlKAMy{G`
zgs^W;-%(&a^as`U&9(3#pEf)aEX-*VMRi88iV7Q8{nu3Ck>gjVo0CDU0VRs(9IEZ&
zq;N*|OowK%oP!@=o^?|%Zbj<v`zvx);0ktbS@qSaF!$RZ?GmI!uh{6{83%;2MFyri
z0Dc^ahq=gDmk{iAbT<ec2TD@}h08Z1u9QSH?c5Y^<Swzq=HVb(inzI_?*vP)c4_7)
z)s$X!JY#Gn-XYJ*aZ(FWx;^KSq9xV~Z7-~#T!6seTsD@|p?Uob5B4>ah*sl*77a9v
z2Hi&(ajtk;#5~2#u?P5EB|5%i2IW^0E;Dm?>hL-KtjuM$FVuD38jW$Y%YN)Ms_Rgy
zx7s<#zCl%|kWrCM`P0Fcs*dpszBHp}!s(ICgjG<a8|h$#a#*;x_Mv6ICjwrHjOx+2
z!GY~~7f1sfWNur+kyXZrRvMAq<}iopWahcca&Ya&ZPGm$7}1Nvl9u3{>o=+s<gOSf
z9bPw)3$cAEDWR+R_^SnX#mnz+6K{OeatoHN?6ob}(3h$k+4EWC^q1I^RorojN^@A{
zY{^{ZpLT_&37^K8>*)8EXFa<Qk`K_UtqC6iNJK~7%P~LLlH86F`$)f4kK?qlz05t1
z>*<Q3-@IhqDdfrJGp|z7>3f*z?1ZOjgSmlA6ff?4bo^u_mdVjN&?5L+JQ9qysDSN?
zG8yRD+i4u>u)PNNE7P!^niq3!IuGuMGKxU+<*NnaX#?!R{b%X0$*_fvN?cvl;`38%
z(>LPw*I>bRz2aVWss4Hd2pQDIO*b$YiTr)|KsE>Ih4<ue#Z4FqPTqJPnn`X)1jx8^
z!#C$A<x#e5x0*P!J@j<Gm!`!G8hMtb>JdnRh(mV70b8-EEw+sROte5#p>*xMXTjhP
zpB<gpcSc>FW=_z{+H>vO=QWGb!J{B+{6Rj;gx(;U3KMTCuacEl9EtYxj!;^mGcHgs
z4|_digg3+5eF7)Q{0b?+5=C3)c1}qQWeWsBJ}sqcWVU3K`g3WzxsK^~j4<+<CqA27
zW6y&8#^+bn+Kt{SExPI&%T=;7@og!@=o<MgYjaks|Ma|ZBRNO=S!nf3mZV+_OozZV
zwDCt4H9D`q@6QKXEbXj7|HBn2(Z4R@b0`C~l|WC-Cmv;si&SMn8?Y{pWS&Nda)Frp
zb$OZWw+&s_z`CY@j4AZ9dc}=_oy|}Co{N-YY)no(?2wXN3N=W08D*P0db=3*-yOKt
zvfm}UgGxtBDbP<fp_)=c4@3CeI!sWA;nV&QuP*?v_Od;8q&yL8+R)Lv?<l$o)h8yy
zj)`kOJ%6h2nWn_$SV5T23Y&pLQ~rthJ(+~r@^r!Rek6A#T~%vbKu8NTz2nk%)p(FK
z!==T`#X7wVo<`?Ecmj;<5GH`Dz~4?Ed)rEjRPCn6V*~Xi_gZ7(RuVDS?@ArwvVvEm
zy3t#M;{%8(CNJcnO!A<k9X~TIiTAG}DeQwFl*baHq;k$H(ya)vOGOk#PNgY5YT-jG
zNDC>a<8h}Hj7NS@A}Z9%iO*9pDnK|txBtMPXoU{m=F+IcJc!$>IS#p6H2+ClO!5j8
zsj*yt(~HV{KDf2}IcokvI$6p9>C_7;KFF}}y~e6=Fi!hk8vA0k!c?<{)3`N+=2IT`
zR+|3+V<tuPUE#)PCoaL)46%GGgH%jjo)Jom+v2|EZ$pOR7CeZEz=j-hKsS5Rs=x;?
zXitZ~O2CH(P0W@GTi^1_wp_#zOhHkwt<9`PO$5K7q`(FfP!X8|!tKmxp=m?Z$BPz_
z4@$c{#n4?$TK+FuZ>FldD2@ZwS%t;1a2Ki0uG|s;;o;=7t~&ZIzYWp~_&M!?tW2_5
z?v;R-w*4c*5iPE+{%EUx_9MvCZp81)(@0PKg;`vb<xixlk2t{na=^M<z+y3Y$eF;C
zkEk6GFjAE<&HDt5=)tD^L-ICxCH`+n-UH5r1x+cwBQvR@4_uNKz%Jp;2d{sS*9(WQ
z3Mh4E0pk<M2Ved7xrif!bD=SR2Xl?N$^8AjvU`|vILh|#a`=ISYnmi@^LA{mPDh8@
zp3Di3grf!H#L1yOL+4A3XBaeLU*11dvY5{SEYg(9TxNYl7L!Lm2;A2>L2C=|H%=1-
z15O-6?eqcE{Ki=*IlnD~V>+|RFRjpM9D0ix9`zDVl&u<b*a!GeA9(7oCaQRFcCs-!
zgVsM2!292a2w)Q|`Ys?}5nZ|ht_NunFY3wBo*}nmL&7Ag>amhsh}&BC{_BOU=82((
zf!v%XB;LPFxhUbFh!rmg<`F_>C-zTY0vq^0eTgaBfp(=sj&u4a!8jD>LDeUYnyWfU
zW(l$=I9?)Cv?zu3g?LmRCS8Eg+HNG81YM`|D(ePR!Cuj~hW-@@g?6~wQqAhWpvaE0
z9bqTkhUY9m{)o1SG7<6L@p|*aAdjvX6Ij>K-G`X}Ys3fHnOG96!XeE++u={P&1lwK
zip|t&f!2nAV*ViYbL^U1Cn#4a#~h7xh^;#c(yk;MkVkq&96VS3VOGPW19_IXF!U^2
zF*+(NMcHZus?wl8`--M6<B48COW2Kz5+&{c$Kbv^oeV*iEPfa=!erzVeyj36{>K!+
z1-m~Ne0H{$`O`cQ4g=EuXl4J+=tZ&#>+k)!FMjynp9`5cG9M=TFs>F|$GKF!n9VOj
zIe!$7XPNcsJJ#k>YRP*q;LUU4hse1`*9<=dFB?dX*lfi!_%TYyhE+_I1Acm+ceH}D
zd*{(Plzi*L#jo>{=nCXi)D&PQ5ijzvn+8+dPI&ESaY)cm?==g822zmFoWf=Z;MPd7
zf}~V?RQ(mIN$bym7=rOPk6wi;qnYytp*L<#>#DxTty`7E1k#u-PTzH3inJ7d>L2iJ
z{nc@#@U&_Ks2w6>5H*2Q62WaVj=%qXn5+@&WSX^r{Ff7OtVn=hcfAbLzTx*^?N)@!
zJ%h`m)*9MUB-6PjZ~Q}VFm2e3b03KV#^8=!Z>FI~0(C4L4Gy?x9V~~8^=M%4IQ-L>
zz@_q^zNCih4{py0Rvfhzx>0?JWZnR>$>Y}0^CWo0ujsM8@rM}y@o?m7)PsM!)5;L~
zVh0tFOn5E4mnl2uA(#IcPmG9NX(nD?>~o0&OlLZIBOLQt{6S&I{D;p&H~+q%dGM>!
zA}a4N612lxHfyIKv95n`ux~+HKEYg%P23ANi4#`XH8jv&UI7>N1@`A?zZrp>M4>=S
z5MB59c9{O7PUDbW^e}7(4}(FmN761fcqKZ?>?@Pi;K$*Mw>X0?St|lm6>TvZB!aSN
zrEks*kYXQ|KNt<`-yec8h6rEH2V)56LwR=+Cae=nKYjtZpfmIxA!{S!RAZHCOi6h<
z<<HY&P?kHymNM$`B3}q05xN7%n$Och0N$7F4>Uuw56wjV&;IE1+4yP>KP)T?*K>-0
z>F{2&p?Z~xIs%xOG!L#VWT}LAO6ka{w7;__ENsn$>XQ(~R4K66GN>g>2~2u0nNJM&
z{>Fk#1EMx6;9*Uusp3{WVNUzg%1Vj1wONj6L+-GHl5RED(i%g%7RIU6=P?CXtEhUl
z#T6Zb1Whe)^?c6sNeE|Jn(%G4L|@=HO}~%9J01QB@7!Nez&quk6Y|UABC#(3i%U0`
z8M<mbDUP5M%?bDPrIubnIB&klMn$>V{7+Df4g4edY2yDs$MygF%VR?9K$@?>Xxsi^
zvb@0ZQH9?--}F;Q*e2e}=afa>s-$E<`9CJAH^Hy?U-I1O{elWq5Q`wmSULK4<U==e
zz9r@$7}D5Y!;E4I#-$ZBvf|QAbDHf;Z4-Gw`T0C%%oudrz_Ie>s%qYZ(&8T<^6_xJ
zbl@TSEXayJV3E$8MBjev@1&}HZhh;PQHBxb+U&l7uBX(>Bim3-)+qj+&mh!t6#_T3
z;$bElaKinZ@UCdz1uzrcLKi<8YN8wXkp98lvoCAW3!(4AxD|nW1+;Fp6|H}ff*_^0
zzlSRZUfvKzJ|<})PtN_1qBiEp6jo^UK!<eK@z(#(q5ek<Mn+Aan7wZ{;z0mWc<ASM
zIN5eWLBQCcR@{eRVR=o~rI6namI@MWWFbWN<3NM<88Db-jUmh#vQXMj#{VBw4+mPk
zN+1&_W$BtlP!kmdJT$Q`yT90-2-2gpz>2z|T0A4m^n<gIfNN5&K{@8&e)>p|6BVEZ
zklcIJ^!fr+coPEMNK0BGDtqKvCDD!YS(QUy1b$xQf4?4H7~wFAJDq&!R1#sdeX8aW
zB!ZGu8>l-ps!q;_(^%ZzH}oYqNL>+y8|fh9Ip7hZY#E5OUgd?>gO3Tt>rsSqDYG2e
z?UY`A0K?~7nP7<fY~Uf%lV`w`x`V}cG2`hOdZP=_d;5sMmv!P0z~#qk0!ff14;Tz7
z8#)@Qz{aQk_ai>!i$QwzO}(>`1>TXeAE2QU2Wit<;0(_MH(a1+c7t_4aMo`=G#TF>
zyN)SMZvG5-1pL5N9lk~O$pWm<Pki_%)43M7oc$QO?nZ~{4V@4v2l-cqTSemNnuuL@
zdN=OY{bMf=Uk5*8aXe=CI7#T%k8HoUhViulF-1FYxtHD3WYOR+v9n>MV(?Wz+rg&B
zz+H|pnkp`g=h)RI3LD_&#q>A8Gq?8<^lXbIJxf3H=i)jWvbI%oCe-4b3CN!p8fQDs
zMb)7y%o2(1EML{X;DSST9c#ZV{X;kGA6z5D?cQwlj!G~-OnK-Bvm`oTMp%c!*6*Lc
z_a1M-AX^>K+FS#vNHS_O9=PMy9v?VbGSJgwVS4k2?lX~@iV<+BS=lgpM#Veo0q5V@
z^RGT)^Bw1-tra>=n;&TxMZbd3(SNdA?9Lzp+Oe4qqF5k6z9;PS{))17emmaZ+!D=b
zn+Bt%Qx<AqW)b)wryWf+L*hedJad%OCwyU6H|7KlJs0M*PN&s_R0`ywpwIB~3&P$o
zz1|@<dsqDgLppWXO~@YS9;$q;3|p=A_gIp<w_5p$#ZGe@LtIp}<o&43#YC36e~&ER
zPxp<TFm{YJg|SPGQMJ%EwldgZ)RijZU6L%}YFhA?R{|^OG0Xre#u){BmMEs}jmkP_
zx!v#D*QcW;QXYeZ%(|T9nB3?)VoF*QI4tg4c`4;lbp!L8H=|aETz_*|r(!JA&TR?i
z_+YBM@FNFt@t;aG+0LE4pLMQQdM1KxLjJ2n*P$DR?v6hUoSR%KTYK6Ul!#{i)ZEll
z<V<WP2#=G3zWb6pR?4ZPgO{b+R!iDWMBZ$8`|8F*`$R)X+gFRQI(U?wAJKW%QFSkx
zc6hor7s8H>Slq7gdtO&J{%(A+<lVIRKU#p9ECc^Nb%+(2Ilc*^1fGW(H@^3%JVffV
z#rem(A08*yZj7`o0&#E6{2OK;G@c<v7Cr1Y-b=3GU2#VY=1Ulx=QU<Tt#)YP8spmD
z_Oh!fSQBL$HQAEMZJKZP#%CUC2o29)V~g)qN=v1i8|uC0v<)^|UB}Es9SEjfpWqq?
z$W6O0Byab#ZN+Jh+0_YRvh(9Ef8zXFVgWdljc~q~Q?G#7o89&sI@}6gf>}tU6ldng
z*npks#@8^PbTr?jx}r#sAM7{0<9WC%9q$b5ze2?GEE>i1<y+<w_#?D~hOqY?Bg4!L
zAsc?Hv2!+w_w^;u*XyY(w#dpus&J6x+<nyaK0K9OI*|2P_>IEU4)S9G_v*-aTU};|
zPQ`QTwU+|ESS}>ka-NgT(<>kh_^iYV6I()%67mDsBE0$K{GnHBs$#kBw?C4UXP#ZV
zMCeUa&~I!n3HWda*sp23dx1-owJYrjzWPv4To(UL3lZ?&76Dz8mfz1#dE#qq^_#{!
z=XILRo~>lKMWVvMDC&8}Y}w31zUkweAqm;aI6)yZh1)C7nzeZ_C5Jjn%U)%>Ji%Fg
z;dZlRC<o^2Wj}u%LB*YzK;Y8C!v~M{^L~OPOs$M;Rg?%$68fCIG@;jBvYyYJ{kY)_
z`C0>Uot>EOP+{HPFTa)i%G@7#T|q<(Ua&{kL5kz^wxl;n{xWgfk<ycE0o#=U56Nzo
z-kH%aG)=7ob;nx6(i@sK)!&~tp~FTWfXR>`FKs=ull=Tf93Hzcg@aFe84PF6Txl!m
zGs6+ygYL(6vqQ!05m?hg(}mjgT2%$m6ctSK;)$~uM|L!(`X^xAiWLF0S=GxCSLqbk
z63U<K<Eb-m2wqV+`;(zd)GNn>F#raHQ}3~s2Y&z=F%gXbw4E<j(WDXH4o(0CXm<W6
zIFLP2B3$Tus=?3i_>pCP9XzvJBLl^|*O09Fsd(^5)jUGai@redCXl7=f(C+nP{;H8
zXwV>X``-Z>ib=U~gH!Xchcz*Sog}y$wRYZwIS5CjqNmdUYFckUPLlFwvu*REIsR22
zB7%Q-MA%!DCflq1Ap>7u_*Yy_T|bZogrC-pG3_z;IICnu1M0Ey?lV_f-VZk{!L96S
zPV@#vQ{!}xnXpJln%T0gTcm4Q>+lb%3#T(CJ*F1#89H9wepoEz72+gUH6vberbOhz
zeQJvri8FC$NqI}Rvy)77XLMS<6ydnB+ln?&H;+eqat}!P3-5^^pMMTaCxSn`?-b`6
zLe;)XeT-vLaJGRKmvtoowO`S9?R9zD8{9Lr&VH}GQd3dn2x@K555;yL-~b8=NVtyG
zlw3nHPl2>5W9KI~21{6E&YY<DHs1+i;KKH&MqgQ;x7Y1{teeT+4Qd_m-FbFq*S?HV
zY2j+7m0;z#%W%>04Ucg&WA3@X_zvoQsf)cVV6}6DYK1<)Br~#vyjuwYXOZ8O9T9>(
z<Xu2ScC+w6T=<uQO|anwi<Xed0bi^r+p+3*@9PT#b{DK&V-o08iCJWZVJra4EOZ^_
z8$7;A&<KV{H?M;!4|M_&rQmh2L!w@~hYml84vovqOdDB+BH}>C$LRX@Dg5e9NWruN
zv!ZxPH7_Ji2mE>#V5Fnze%6%fIP8X3?l}ZDgnCOV-(?JcKdjqg6tQf=r;;HmnwKdS
zsj_A(Q#bv7Txc{-;w5PrZmClvoy9wK{$Rt1S@5S8Z*Xp<(ImRnvob^5gCi{N1R^bK
zfvnfZ&xR^SCKeD8ty;Sym3?E9qvFrvZ)X5nnBnB2^cDC;aB?qkJ8rcS<w06x`%vn_
z>bZ|yJ<p{o+HAIB?I(&>o|%YuBY=P#+xhoZOPO_%n6JfrUfYh7BUXhM*Ld4VBu2@7
z1&mIRFY%4ZZ9~$fBl-nG=oM3&rNcPplW)lr2t7SLTSksoR0|xcZCChp15CZBWy3C{
zos;lh`|P%nQd2e!Vsk-<lHPXlw)ylB(pLRf8oFtM2JfGOAv<+BJL6iuoN?hUv0_!p
zwX4!oHkVqj;~Rdbh+9ocGMHaCe)Kk~?rrHZeZX|)t()Y}!FmN*DgW?&deinAQNbbK
zKefa^fx>pyOAwNL@_9If)SqvJ>hiOifQQZ&BoCGOq~1NZ+Sn?uy#R4|o!9CV5~@EA
zfZu*?&CR^{3aF{Ae?&T;hzySyoUPLI?lj9et*^d&E$`~(n<dM+c`tH*$w=p%`SmGw
z@H)t+?~5J#x%do8=&fg_Jeq31tbzAR41cR!7s06i_R{SK>!UjDjDc9w%gWfXtDb@K
zlP7V%a**#bKI!B;EocpQF%#2&&cuA(q)q=KF2cE6qxu&vN43e)ItVN~T56S6Vd`hW
z2#L33Ga80a2NJk-;mCX=#;Yw{;73!w*Bi1{F;L}WHBO^{`C^~3v--mQrK)7-OLMpB
zOzHJyJRYAEC>Okd6}^7W&G)KPYPR&Wsrf3Jv{y#|kr2=?UbjopT*7<8dxP|R)FJXF
zsB*h;<L<pj@V`k;{0sEau-?3e5qjTs&XFt^NfQfuLRKyV8LjnH54^p8g(mLPDmsZK
zg0q+OY&vv}6i?^&E9b=N+HZpkZobaVMSos1qa+y_Q_6YeM0iHFKuGLf&pJ$x2{8V|
zi32mL*ZE{9rUow6Y=5h^o+BU{GLFM@3da#dGy!G1p#81Yq}58!AZ2z9c2>_LNT)xk
z*~h0*#I{z6J9$Lc5#!-JM^5zzyTiCR6aaovP7|Qb_*mB0Bfm<U&m%&MyQ`Z2)3HkC
znxf-_%@azLlOnTIomW4>nAP1A713(a*3G8&jL82mdrE9Mmg=OE`Bg(JT9X{-fZsm@
z%=DB-Kk|jpMcq%}{}kUg)A|+3>eVBQ3vNNZOQ|wo&Nb~y5|>MDu~UE(b~a+4-j|?w
z#%YkF{QCDXEUe$ZiKQ#W{<dLE#jDbN`|>BArRY%-E+4^RjYZBa>ipwrUT33@TZYHV
zeAR98$;VwCAu|6<>51tT*$P&Al1xR>=f!S3aus<L7rz^5bzgTHJ}n;g>h+e;vXgCL
z$<LIV9?70dzh%1Q@-)2Xes+2frFm*z#6D|-^BeS>j6B(mnzM>gsyJ!nUSI$=>@VAM
zo6NF1Z7cou*LNrVcQzY;S9q&%GS9UqQ=tKs$y1Q@51g8O4FW&&y4{Vo`*Jxu^R?jj
z*3zn4y$YIsJpO{ROZkh?!XqSj?&-FA(uwF90!gsKd&pZGA?h|!Zy*P47pdNtPb)J_
z{3`8|<CoEtW;kgsFYp`eDW56L-Go!*-luW+rq2n!xBO=VH!6wKDP!#e)axgoq)@~S
z9{UO)k8&&vv%kZU><E#>L6&sBfu>*AUPwvTbiR8~&yGV$a$jZh8aLsRp}&sL&T0$Y
zBkXgeY@;7h(nTm_RQBBIa#IUkv)yU1j#Gb%v=AX+R4NEe=OLWaW#Zr_-@nP|Qe`2!
zeRl|UdicytipnKy<oG6F)*k@cg>yYr%O=;}b4GZh9a_A3?w2>D|CXzZjIeL51OiTr
z^lVwjoeLzDN%m2{o3bGhp;qBrR`=obDZLv6MQ3zO(t5Ao8)liWzixM95f}(don?H~
z;m`n`4V3;hR53UMsaQ*fpW=t|>jj=np`xg0C*b$?_$gbh{+wGqradb#WjX69bT>=~
zL(`G+J?a;QFK1H>Mke2c(WTi+B8`YZEXYnzs`uW#`T`PkgVz(-wpNKfH<23H(#QSl
zy+#OHERkdt_7-3Qjm?yBkvW=4Qdp2+nTXYGd#Mr+{PYFJTX5WUFDQ<TU%S=&xNq-i
z>Eyi;in#fr9Ome+b5YBJP!Hgr8R)A0C8_2_<~|`=cE|(Jnp1oPaX@_9b=By_-g5)H
zK*#omE7E?XlAq%;cP1W{JXYx{gsIP&n^p9pc3J`*aKSLuY|TC_uG)wmG7FGBwFJqu
zy$ZjtfA2Z&x2>9YOP!SC@y)3U>Rx^uuM7C9GXm3%<%S`Fr0TPDzub$jOkE2pMpG{n
zBuW%HuWN5T>3er22D)I3CWLo4KaP6M5@8RIzrMNsi;Z+<9vgdPymhi?Pp>nEm!F%p
z$3olNdExP;qh|JN3Qy2WIR5)fWIxSq{1Rec>cKNK8Fk^y&eyy`%L`dqeoMkdV`HF<
zr|LVt1d+={MaVa;?8qo6xsAk4UgFOT|1}7U&rc6_Dw_?WQ;CU*{XWO{&;XH0jfbI3
zFc${|x46hr(ozy?dRvD*Kh_W|W-sCI_mx_&iQn3gNVSyoD69LjxS4eghmu)5VW?t{
zH+waKOGtEtD4;CJ>YcoN;n^p@ek{;6b5s!5)8EET2@m`h$17{eonMT*E$^H7xOpq&
z*pB=SPfu>W`Oo(>^+20t+EWVtWS76>s4M+!zs;iitZcpGa%c;LI@N-jwN|B{ZpAQ$
z`Z>zcouSj|tMvF*4Co!T)@M7keV!1Df#rvR-*zaA(o-~$C;Qps9I!1bSW$J=%V>^f
zd<FxabIaI^-F|}+C-0+=-0!%LvMxZ2{xfji*i&(aBEF~P_5#_N9j#p$s334zf7)uQ
zDZ&BTVKqC{G+*jMznt6Db=g)eG{al1@aHo$p!vsCq)_wJmmh}{OwR~<JeIJc2;60s
z@ha+N7qcd~V1EVIE+8_#dyjT~f`&kp5!-(Ec>J%RL$Svhv11J0H%An!rZW(zZXXWY
z5I9yq&0hXJDYl91>9yBT{1xAU-o}>^tMOgXkHr!h%Xn^~C_7=nXWaCRru6b9>+2m7
zQa4T;oyzZ-O0hNaJFo6P5rmhi)byfwTHtB@(;4lra_(`~CSavgGs%oSY<3~z#|6Po
zoSr?dI$!e^yMWY(R8(>DC-7Al$j;sB<SipLx)KKT-jloX_bB2v8Z%|vR1V!<=kNG(
zy+?_TQ}0qNrMd=H>NjiQZuYH9eOU>Qf4r7W6?aS#SS?aE$w6T`x1T`U<Ok2&OG-4d
z>c&;II;kfI1e{H3Guyq72gCkh>(|j!9T|7WV_9<jF#86dBQ7GaZ11+Hw+L@TsVZ!8
zo?FbIG>THg(NLBjA&P`GA?Y7{vV_&hBNRn?ju7<rYVMbpz&iI_&l_Ll?H8!QzbKr0
zHAXn;gozPP<>(Q{6V`h$8%*f)L-*kFrOYj8&@pL%VKRrD%qec#n4P^KD<)ab#ry*M
zhX&ur&T%;vm8wyj<1d6y`d=aF4jMAp8{kXbkoXj&vTa<T*X);)y8nQj6+%)gOl3B8
z&zbK+Ao2QZ0#X=!j}O*KE7P~-C8-n+o|c?Za=4S_`imhW^@0iL5K4J4+p8|5p%H@j
zAV%7u|9Ml{)63Uyl`g|cy^!h?`K8wFypl?SMw*P|d=3?vwnpiq)<e;Ra%dH^N*|gc
zI{9iA_5ZwpC~E$FzQ-cX?c0|hyEsmZJ@*24PMLgIQIoRlXI54h{v2Bm;0ucY)cStM
zi1Y=QTu0#*_-ig|`z^x{Y$jk>!ckAjxp}fb6icGGdpxD|$cbNR3DnbDs`AoKiq0hy
z>~Ag!pPN?q)l+-ur*{WS+^}1^AQ#{)$x$~8*zVen1?7poF_kNuzxT#}YcJ%NUdRzR
zx_3VIkv5pEy2LXLlTX7FoOq?b5DE8g9@N}YJ2j9IepNa1>*6~Xh~4^N;_6%wX}d?l
z7T9qil!TEs@kQO3geoB|e>~Qb>yI+!ZDZNQ_8bq~tRF%SD#4PBIPMb+lngP8D>i{m
zJY$p-MClc|J?x%3#>U15??6@HU~E8h?U`&Ij-t)+#CkZ@IsR}R(Y>z&xa2uP5awBg
zRMTm3=j-zh>(27t##$<*)|k=0U_U<eAt29L`u-q`d<dybn0Nyz2yK)>k70&2mXno%
z(~~AfeTK;OWDvv;jW5iuky+i^;RDBhZ?<Im*02T{k*6Ojq}54&MvYTecS$rsQ!@KU
zSk`;~f&taf>Ue2q`S%YJG{R>Ff&MrK7tT_=FfbxGFaYRx&i7v)N?#CcQVt(a^J)4p
zK|eYj<>{?5+2rZF37I*5t-J+i`g?jZjEs%XT9;K;&S~?K<R_GEVCldq5v9jm1-YKM
z97lmOo>gTXDP`ilr^~C3wGgF|BFcNYnrwl^f>2+ytgP(0+|6Il$7+4^pgF~a*{vSi
zeO~GK>MK69D1^#3Y`0)h$d$w*9xRfHMP!QTX<PcH9};?UjUpOz6oF%oI0E8pNy_jI
z0hLxO-B!vfz^?{YTaKBDr!iwcG@3WLHXz(4aAkZ}lHmITcn?|Scf`09>AN&p`CQ})
zyBS<sC1+_kEAZd|QMY3jniHE6+7p(NhrVXG>2iP<>aknZ)p~b1nY?apZxnFw_QlEb
z!PVFhf)_?Q)vxL&k0Z${zy0tK=WIS9+%|W0SfdL?y?baORoRR+-w#??1YR0}8|{ui
zw5s?h>+gT~y&3!4t@A2?+j=l0udb1|yfY#>5LXTTbhu1%tu%1oUd|DrONgH@ipMoY
z_iUzMchKC*vHi`!8N|iTx^!6HchHKY?0ufJ-=>=w$bH+X#Yf%VxZR5cQy8J^N0m8(
z@?j<0)b6aV$p>yokQrGqtFodnpW#%##!t)aMf&UD0sll*$C4whUvr@;O&c}z5EA7+
zyKP$QlLN_xVF8?TChw#p5kGy;2a?vXuv;Bz#P8lC)TV#S8AM0`fm+v~Bb4jOp)Tw*
z(x7vqPryxy3M`|eT{65UrlpsJs|rFV_X+POgoFw>9P*%WnTOl4pr9ZlYH4Z7w2g2w
z>ql4(UJ5HKvloF@uS@ZphsvQp&jsRU)fU-x4cowfsivQT_i-flOjJXk(2IhH>OFlU
z-@qE!d3Jv`MG$yS4Ga#_wu;%b(=UI%cOniwMe=uAl`ZsflvR*v4?qSo0A{e%<1LrU
zRbOI0d;^{B;qm;N57Q5I1)ajY#DFwTuPL-L2(ZVAzAm%1rfzM4Z|2Zq#{o-i9cV!%
zx(+^?SEBFOR-k{E$5B8Jh2OwubRM=;f6G74Zn<tmnHZm4m%~Sv2hs#_SOi%Vu$QBW
zm+xT_#K}7oxO;l?`{60u+1nGu%Kf(Z4Wv!{e;)$4u_^5=VM)b`*9>n^vId1xLaKk4
z%k^x6@n4gaTfISZ>30A5FdoOIBv*o5`{LKkB6wy=Hr9eO92%zf8cJ+t@YqHSf8HJH
z0#EYm9OO%eG56o+!b69%$>XK*+_-1}-^WB@9>Am0ny;YCv44QN4&F9U@eo*SFcB#F
zCbC<X@WD4^;&%setC>(I;;OQ$;0_;ajTg?-qKQ}vB9*~=Tlxl+e;45=sNs_Loh|p_
zVS)vTA*e^a(*`#@JZ7sB^N*qx+k}|)ruodBFO=t|`87=Ao*fdZ-F=%@^oH?lBO_XS
zg6=c|U8c8O@Oql?daHa^hgAb{Z&5=29e>N{KOUp*{Xq7{sIs!s*UWcBWLZ}c8=mEw
z8f@}Me2D{W5D(JiJv}RfN%nu79c@{hUM5S*kB8s@EhYSGQjq`tC{G^rS%8v6YNh7l
z;lr@htk{&#+0d!u)2nqc><O9+O!*Ig^ic=*_1-VXfojj?r@5tBt-V3q=Zx<s;KB<W
zRY}jwQ!g(szl{^cAoBETu$6$JQ2@`C@|ru!cxfS0!yq7d`<$cOYX7XH<OfOV{Di_%
zAFi5wqY|qK!eCZ0>pK74eog<id;N1AYd|{4;Xj+6*o44KqwAZUpmzziZ@$uxB${_>
zz0d3LwLgcjZ)@Cpj`?<p=ZHP`KwUt<4M^cG>6c@fS){p7=;FU1xZAV$iuVKFHe<pj
z464<I4Cu}nX<@)>q}g*CJ%i(`46b+ODdEM5?AN$X{oG}N^*U;aiHLCfOfdy4vnG<O
zqU0q1mhi*P9+*9ly8+I<=W(IDk-4$<p4;}$!y(l!nf0h&Uo-<!jAT#uOyC#j%B!pb
z#-j_3p8Up5FyLSS24|Rvv#|H4)dM6w0B%zX&-jPTb7ft5IlK2>+2{6bNN9|NkV>iU
z&IkpZ^V@++@Hvc3_~!?74Cel~CmbOHvfmo_ZgbK`ev|^^ju*)Pzk;4skKSa0QqSz>
zjUQ;(Spk6h4sC*!?kUO<@yAfUF*^lL)iC+dY?tr)PK^@O{XzpUoh_4Az#z`3Ty|sl
ztj0re!5`k5EapZXLIGRqC{4HNoCUB)2bvtLW;de@82^qhB0^%V<>J{QL)Rc%Z=-P7
zSy%ozE{c2)UP?hV-p7&$jGn<w$SpK)cH(i6bFvl$P$WDa8sLVLdPw_E0NBKNUZ9q>
znk}2>>^pz%8|phA0i#CWyGjpewYk9t!tw_iE~0HpCbJTF^t7A#)2i??Xd@|y$)Q#;
zk1NBMUvIxKaxNcG(<6E^4FbCUyI1+V@oo8)T@tRbaLq(g1fd?a?;}}rHTuUODh-5P
z=gjWj(^Dwevp;Gd(B~ix<^<g*p2?0wRWkrhE`2QPas(U@v+7jyab5Ki;C3cG{Q<L;
zfo_GhPY~6C<RNrQQ*Z|=FC9c&Yd_Xsn}AMyeeP8EOCPcx*!+e%E!{~*aTq$k2f(c1
zF0V@(yE8bU!0eMWwE{~)WnRnA1dlxFGYrqY8)4yl|Ioe!#a+f{J}sHM1rn=i2}AhX
z54lz|l_DE2E_=m3JsjoV%wreYd=N~plrNaRQhW{f%Y5=bB^`9(5}(}fNx=+;ei(Fc
z*S>()?DGjuyd^_ksc7E{=tFb+<yg*H{|-wa&E$l+Mg~w=JnmK<{jZ_6=bz1Le2Q=H
zxO1PMU9;;{3W>H=vAtsNH@9;7?9))GD26QtVjrSTv(R`A;E3Wb42agVO}(K<{1fa<
z3Qh*cW?V(rAk+3G&c40G%<1t`<>7G-LW4-^jmrL5NmVp3HWy$GZtDi$8yuL>J|>l0
z!lxARIj=5V#EK)GCTeN3QL~vNN*VNP`zRKFcWBzvixZ*019p`Pf-mo=EBMYnz7VD8
z6SR;iZaXF|EBOr?L%DqCo{T+b<BDZbns}s{MujMuQ_4Q~EeG=Qne&UUb)Wc--!?J+
zd(SgpM)sz}sXgGyuUz$h;*a}1R^f1Mf1ys|eaLiOY1RDS<|OTE<du80((FA}uYG1w
z$8c0La5tlQp`M!n?vt4MzC>}yiSUH{Ec(KD+}FRp^8Y{$+I=4r4Q&|8VV>^ozE9o$
z#ay((;*X-KFY0`ldvrZUYC1|pA-VWEy$5;Q*>u@;{m4zR-@sRBqeyu4l%-;Py1|=8
z7@RBgDvW3;?xJh$k3^6ig!O|x0hMnIBWh;;_06;H;bwlnM%3t=7s(!heqaoodNVJl
z%N5$=#N6k**^UgbWItzi&o!&_Cw#)OR0Op{4j^&dE(XEzLBqowM~ghhjxair*LEyO
zQB2_1NCmw}rym(7Ocy(SPIl4Nh^Eci$l_TDNz2b`u+PXPr*ezmT{C&rv;imf>^&=p
zkcn6Np=(^#<i>z5VPtPg;9Jw*K%4W{ti(ut<+*LraNYpSR0j~G8rFAI=;de18!#Ux
z4-R#o%n9Gi!0gjhycEZdth|!33X>ojM){;H3o;<1$em|7QAFDVb=y&o2WMaw%<wv-
z;I}S_xW78NE1bhlqpmYa@-BFC3*;3wcV09U1ICmGHbEC5EZ_VJ_9%KjiW(Pr^)Fw&
z{c5qKtyLJ|j%RIYJ@m>BNv+v0E_$7QENp$}*N?&g=CK!{-!ixK*N^sfck2;%ft)7~
z8qTXzltcxD;8<sLT|UWlwQhIGHFC=K!+>axNXYq>Gj~k1O>h>~?)H(TULKxwIJ`5z
z0yQe{c?)D?LcP`!OEyeHN804zoVW^3$$2n+fj?jWmVMTyK;`JQT-Wi}lfRLRNxr#|
z$(so7wC5)PB+Rrb%JcI4FbVTU^fndW04>c79jS%$OPP-6tyyHe+<IhByB8BJ9d*pV
z=sx~B@aACM$&Zx^VNQb`8h0qyGWl@-7kh6VRrS91i;_|T(ukB2(k0y?-JK%cC`yM4
zC?Y8>-Hr4F6+uctL^_=)f}oO$2nZr@pC7EX_TJ}x&pl_{d+vXCjJ?L#YYmvpIe*`H
zo=-h<XPAT>)OKZG2Hih>2;D=;0q+#o@k8uAl1MAFagoN1BFXA8?B~P$Jq0DLZ8*fG
ztPz}}G;|2waK$hB-r#~ub6;(ruZnd~U|scSi2b$?9>?%=4t7~3IvF(2wz);_(^`KL
z9z_(4j!1i}`Y@a``Z^!Xa~Mfl_BiXK%Ka4L3baQn89NpHPYLxEs=B{?bKj%+amiqE
z>8CK~3gMkb2q2N-x$19ssjPlX)t9D9i|Ts*K}Lfh1Bs^yf7s63JKB)w)I5BsB}ET2
zA=o>$L-xz3h~A`3#uxj)jp@r*-9+ni$xg`y;h;Gg5p6=fkjJwdHL6c=gKiZ>_m2+t
zdu6&(TRs7h3#Ul+AxS(;#>DvX<MuGiJB>8r=d`Kj4-I&VCBUxK{YytU{uXgfi4V>x
zzmV+JM5_p)Q5|GzxtZkpqIS&SzArPzepOD7FGrNJkw)mwubJ<kQSCoWitl(3ek9e;
ziv3<5G_aUHAoDrJ@KeTyjuO6Fw$QsXdQ{Y0NN-g@tyN<GS}l}eHa4H@K^m%uT=J6H
zl;T4^{H?;7`bXy{|0t+woV!(^Qd^{x5B?eU7Z28bf+$TMI|XT2ln$2V=%$gFKF${s
z+d6Yd%jCn6EZGy}qs}AdW8!&sY_OBq{nuseZ)GJ>vtlIDi~7VLlpQN3MSR?!NXfiu
zbeLQUxXU6-j<ud~WPt{-&;H0X$aJhBynBS|puM|^QQK71Q@9JE1N+?<Unh>Z3{Fw7
zYjL=HC$HrD>-kpV(O);e^InZ>=sgmPW3c*Qdr?bzzg(5<zH8E2Bg2rauJ<qkuthl{
zu!u3=PlPjwDVZp$AFyXUCl5vGDS3aMq@7=uc)9iZ18#SUv{i=&@i&wn&f76<$L0Vc
zPx3(KImm$u-)*!g;1u7wdi_jkbWG}(Gk@Xdxd?t<$;_o9mHsF{my5C=lV9%VpLF|p
z&Y<`aD=)2#uR#_@;ueG~rEk9iK+7lnP;x`d=|Rk3MPILMAxjqZ*4DHlT^qH&89Ti^
zC?Blyr>Y3IeCE2m7YX(%&(5t0Hz?lr+0hCBl06*aTd8s{N^G2=IZezhuBv@I$l}`k
zi@r-iD29MJkIj*>PtlUg9Os7C^>{zL3p82y16;Fv7^cen9Fb*(-ZBx<b}zn0h!-RA
z&d;UBT&uC%?R(biV}&xTany}RPm+r}9y0D6746BXL>4C5%s3`bfm?{Hk4p<XI7K+=
z?PAVn{N6ih0x&7$GPU`e2R)ph({*pAql+lFQSZZfqrRML>rrbf#*m_qBkHwEOAwaS
ztF29E<6Ibj0M&>HmSu^D>z#!`Df>H_BZn$k#^_QiPft%<qy65i-P-)bWCQo7SQ9j{
zQg<S65)`SV7DDgKgS1Xu58V7I*Yz#ftE9fSHV%hHNp76!^-J>g^^NEg5W+Hym{SWk
z>}ypUDbOIK6}0uwT#qNc?WsxBjLq-r(5=dJi*h>WDWq|fI2w=KGUTUEmfWp<@!PH7
z+ph65D{-LAh<-H2(xMnn1;tee1aC~b-=^FQ^PttvpsXx-UL3;RM&rv>8W&yvvvx7U
z7dTt$CNY8V+rxqKS!bcLBa@S!TJ?DM$|OW5*hH|Uh4tkT;g;u}(KTwE@Howx8<c_5
zV-i=mY>ou%96Ei*HM*d#G7yPG!?pe^>4MF@yu>GTy?|h`HA(Zto#E|fEOD7RbZK4k
zMGMatJ2HZ^KaDFn5W~``eM~E`j{kHVi#(q7!n8AI#?F)75B6*KB}B6hnr7bS9sT*5
z-iAlBWqU?oWyVWLb=u|>d3g$k3VJAAkmZk2rmD=C@hDSeSx6v93^a<o4bUi7g_k#Z
z&1EDqI*n(Xog~3CHmFm*GEXDiR%JgDL+i=Rnc_WnK5mwwA;Gli>X#iO^R_7ErN^;v
zqPDH(IJ)sWZ+1?EIn!&KT^z&-reu2YGUa#Iqs0#Ii5$)+bgBYES6jVamKWUY+>VH#
zX}EI#R(xWX7tYMDw-RVa5v+A{5spbdQt{Z>!F3k;)*U>Swj_FCzf6s(S#`G}4k8k#
zsnFJ>>@XD_l>48dZiQO|B2)L=@l$VxbUGE|wkc>{kTRs+j2J-T!LVZ=%K}h|wckre
zF(fMs`5dmVa3P=0?6GvLfmLh3jdvsW%f%MMbbjE<a289<C){}!N|xa#<8^Ij`R4Lo
zt0;nz5l^{&*4PhL9C<jXe{NK3XX=_^kiucQ4DPvBxwj9e)IWZxi^aw!Fw*E~x})zZ
zF`;o$d)95@dF~!yN%C1pIZJi9i6MYC<6cGC&eslXQl3`v{<w~AwG>F3L%E>vBopwF
zu~C&ieixDQk7(hY(&qtwkCD?2;u``blZWI+*Yvks&X&|{U#n3Lw7*OrP;fVTkmrpb
zK@ExYpX;G<vjbv0Uy#)8q_X!ZL^ENg72m8@O!>Y+flIjAeLj=TXtN6`+6X_VH)DQK
zxJpax_AP@Y`=Zx1BbMP@<GXh2n;jfiihkwN+qMhPH7A8~mX9<fi*|B85~IKEx;k9Y
zw)%mQqOk+t?Xe6^YNyr3IIfrY4&tx0==UF#;B4u9jdGl4-)vmS`gv2QpS5=pJXkTa
za_s!xmorf#e)^a+JZ5K-xvMBhvhJk|4Ul<;7t|xQHP6w_lCi^6nCsVXaC0`fMr3T?
zrG)xY=j|7}b$Miw%CjBBX!KXANV;yC6b)_<0#b|j>EvLoT<^HV5Qnmtz_T`J0kvL!
zv5^_YYaM6n*JcTmFOHyr?Ah%5)S+}?Mlhx6Cp9<E{3$<gWKvknBFeGnnG7!X`ItgW
z`2M5>OQ+sB+WXi@;DxVuG>&EdyxG#&|9yE64D|-AN6@hdIhw-}**bhy%=Z-;gGcP^
z`tP03tlsP;sH<_4uV>(PcaK4zTcCF2XXz!On>m`-)N<$dSe~5_zzFQ_n~Ks}-MUGE
z5ca~ccQ?8Z!*DuzG;905mNY%cY+t{8x=U7&&7k<Zc*G~+67Srt1Xjz6;OT;453j_u
z+FMNh@|S;hlM0(Fj$V6U3Y1n_ajc;d*7WH&OQ`#=1Xc+Nh7WHO)x2BhS4%;oF8o$X
zYWsrPV-l0Qc1JVSnKdCtzT_ZgjZL6b7@9>1m)Mil=!wD}I($X7`e7hP>d8iZSvlLf
zO6EjZWPE;L0ZbEIpwiRO4Iku`oN9ht`gloD=F#<Bqm_<sowgm6SZoca$fuFl^U2sl
zjGT)?1|=8y&BVW~jpXUyvRx6&MopYXIZqUue`pn;x`;j%bTf|WJQslz-SaDcqrZzw
zHhTu2G;0e4&L5pNRBFVTZZ4J<u$UrAH*nj0@{o_Ds#rg=WU^f|%fRu`^+7?QGe52<
z`AA?b`oE$wb$G+xS7WsQAzA&53Ksxm)?_5Ld_b*DjI8a@2yfhts$bY4%OZMr-t*br
z;--pqAL)x>1~@^Z?HsB6d3|h9_22!5U^vDKMl)fNAGYh9vJUDQxIe4Rbtd|Pgq23y
zRw%}A;Sp*{r~K)oS3e?Xo|x+n)!3Qf<;pY>xl>GBut8(1B-&c=ig!gh-I?zQe5MzL
zO?uS|5)AVS8<-;r(%Jn%jvS$*?>$8CiI-dTO^b0td@xV0UF{I2UKz8@Xa#mCIM+J5
z_}o8=c^n_Fg?n(76@7`uVmr=#wFh+<w%^R0nm4g}b9b!PVCJK?y@U$vO<x88^c(nE
z&YqbZy5yM7LT|=*Bu9JJS&B$7yxQnDbTAwyrJOUepL#=!M`p~$+2fY#Jl!oAZ9MvI
z2CEM2<t_GF^K_u3@ez%?t+;<~O8(N!N0c`A1~v`7_s&IE)tjw%t~{!bau4;dRF}_G
z(#mX|7&n&LOnUt!ME3>b;=L5x_VI`ZJQ`XvVUL90bbrH7^PbDlDa|m6CSk#9zR}xH
z$rOC?6^hfkw3|XkMM~G4J|LlQuq-KJE}4?>D~n{JStGw|G_7Rbh;M@R#PiWK!c_4M
zP!u#2{(i@DnfdOGQ#M@Jg{MiRO+P6AT<L47?ll<u5@euafi8}CIAT4wRUN^SxWl7O
z5!e3Em&DUD%v(Ivrm>JIwzTV%;O>#wk^KnjDNI=`%i)@Q_wH<R=d}K#*d&!7%Bs8$
zeM8OarMH{o6c|S;mLAYFO_;w?q#u|2t)y#U@iSgfsDwc??vv)5sTa;tl-h^WzE7<>
zDD5de(hFujD)@n%QLQAStoa9V$*v~*`_P&7c$C+d)TsU$y0ohxpV_UF*?QYW;yERP
z13$Xbj(Jj+SP<bcs?g;xIH&|qU(H;1HQo_s5l^nu3*NH#FDx|V_UzRi%{26Sd_FF4
z{s{_YB+nty2PQ9~y|fkDnbq&MPd*rQ9yXl%>hw&&c`!Ttbh$k$`N0({?GMS+S9NdB
zIZj-Wc-J6%2PE-CLW+|x?#Njiy$~>*HJ|LK<z_0wnZEP*;+9+14&aaRO_4W`8WZph
z+mYe2W)`Rlh|hhmz69a4uQtOj4T?<rjiJ^xRG4OV`WNkwbXh%rhX+jjE?}*mEy*rb
z(eXxo*6!zDexZJysWG-t(=N@CW|Q@7Ky3@o+V=|+0~yb@Qvhhlwn^fxUpKd2=bzMR
zTEC_{nEYbDNByUc48EH8v<-#In!!U-rELN=cT#4g`IbWX5EEQ|_xX;P=rEnfftdD>
z<Z)uJ3H|cYvTP(~F<T4p(#S~o($PKq8>aj9h^BWuc<L4bGMOjLG?K~cMKnK@`_n!g
zeoAGHjKL;l`Ld?NQ%b9<T4VI7Bj!zN@pG5#T&tN?EnCdhAe>k&X{Z3Aee6}<e*h@1
z-t~ie%#JD7#dY(NMYlXY2rPO?%Lh6Wlije6!#llu5W=d-79%4CEtY+tjl~Pi&}2?7
zyqcn&0;^p9wk-oYStk_VsA^ezQTE}%{-gdD)geJbzKYu=-;9MW2ynLFWZ9_O4-jTh
z$+eHdz28FtWYZ5xwPcVBrE9^)EB1FCuW@SAvAA89E?m2!3|M=qwMS+>{V_}*nf3-j
z_an>GEAMHu6!I(hyb$1W>kP-OXOJMH;qM0Q;ZnA2rm*iDt<<K6erKmA8HmO{J-fw3
zLiu>Y32U;Z<AKnOdc>|;<aT0G>4OZR^^gIHH;<jv&<%{fp;lfzD({euHy>}&d^-Iw
zn!noh=7Q|X!^E5EHO_c5^-e1;n}Vv|Ose~)YFY=?m;HP8-1<k7y~Qm&&kFu7pTx{6
z&L^Jv{E5Cuq2BuJ?He)85<lq;JA>(5hJrNeM3;*^$s~J^68qH}iuJMU{P}F>I>PFk
zmnFF-{NB&3{{cYc4G*DTzRrrCs_LGdL&lX;gSL3?XOmPoyHwC}r!G5&^VKgr(^PAq
zD4`FlV^H^-n21`vz_QRn){$A~LIe%5pXTdn5jOdK%h)Fw>j39oUia6n0eorF?rEzV
zErJi+@2t$X*;@T^ty@(w;Y7=1j+6{HAGkeaj(I^}-$N!^^k7zO%5`QY>uX88tKona
zoj4xZu)ff*prDt+7Hi`c0|_W=O-_lC8xKzTuxgal=XG{EM6%-LGROs8qYv;>yPW6P
zR@014x*D0$Fwf(4BW;?;YH&z=AyzS?)WiFwOBKwPV&0ZW|0O(}yPaa=Iokcq5gjJ+
z4Z<HYh%{Hdlo`I42;IYem*UBE#gj%Q9lbmtL2qEbsd%L>B~0C!sov}<L)}=-!Tan-
zH+|kM4?Hu!v;~p}A{|Ie-cV)z$rGeSSMuN&6_%)tkkI9aUT>d^&#!;z*S0n@lI5N5
z7fIjcpm@F|w5x!z^90bE+xSYHgn(-kafCZPX`kLDkqcc=&ER3OcVB0DVOdlg_RCP_
zh3V}oncUP1i0r6R_JvGxk7;J>uo$4l)wxK~K%$=qp}Jnh_YOw8CFqrDZ$-}Us}~Un
zD3ZErN+=cvqNivRlTa8E)=!sOGeXG%)Vr6+NV8m7WiEYYCXvjiB{RH+uH+orehR4V
zMn5jSE{jSm{+$W2&oipAuQe@$S0Fq`XWauGC{GI0M!3jFCc^L}w9AeBCw%6V{h9fi
z;`0-wmX}&oaOuy$bUT~4oV!1iNpIS6)ebOy-E9wmfXt-JUM;G@U+z_1OXPn#7oI*i
zf0~O*z+es-55hWY$59vF=>i`2(hMO)RFi&ssl$jo2`!EmT(irhAkq;Y$GxdKBUe*Y
z^qraJ@F?k1*NuqfVsos?&6h`ya)!Vhl9!2k&!ODPwAQ-jJw1y0Ojn2t4zK#Cy~{bt
zb!Ta1u066JtS6wnD+l++PgR;`R#dyf(^b*++7Da}ITsJoZ-eB|S@W%#FIy;6pFbiT
zF`Np|Di&}h5sX?dpg|`chW93fg?_^+zkTpKLNJOKTR_DXA9LNy2#Gr%9ebE+ZDzI{
zZ?_95sIJoDkG5eXt#JiJ%!IGTb8a=cj9h4Xf3sgArdrBDLn^h~hkOltJ)<$&J<SHl
zrWHW=jL~Onx6n>epYuK0ao5uggM=+di@Mq%zT1QW0NMz5nJ%5>CYn^g6^`{??)PFB
zT?17d&2GG+Lg@V>h`Kc`QPr#hDM1zwp=z6X!l+J3m+#i)hdO$4`|pOv3ArDinN-r{
zHwLO%L+WXfez*JjxJgh-tOekx_CsPYRC9Re730!5v|t$&O-xNabnYDE3y9ozl!U*e
zY>(?eNzaV^P28zz-E{%d>+Y%*qjHiQGSL!?J3v-lE{gQUput&xU<xdn!SCKIGih(^
zy*uB7KN<%4Un@~f$(m7CJtztvC0eEVaET_Ur3MW{yi*VBH9tW1Yt;4>iEhc1)g|?S
ztx$}$@8Cd6&2?K06CU8jTIXNC&bgnp^YI2~?C*VFhg?MC`C$LieGJz`vDv#2Q0io1
z(yEK~?3ltOm!%Il{SM#jrG}_ZtTI*-zRk-l(b@T4tvgkhZ@adOkK`H`eqJfd5OFst
zILPob8(>ihQ|{(0%Qn~+g1q%k7r}Xv((@&8#opByCsCI)&#_W{k)#vX+?%%Q!-*~B
zNM)DQAY&mAG-x%V+c!s}188L`a^f?7*8hB|CKuezW=O+$P7`D2k)g}ED(n;;7#rfx
zG~-m9BUW{w?N&ltozug)EDN<mf_&o{gsU#6(J4qR7kQ%vjJgO`y2U9x?9!?htxK_l
z%<B=#c-BUD<DThQeB`OX*6WkeGJHjj;a9%X^<JgMK+3(WTGx5hNy6?KwoF%Njm8>1
z$t4+dXNA3SrH7WP=pg@Ewj~!|uJTlgW@fY%7wMW=F7+12n}*M-KYMQCnrUTr%0b_;
zS3rUO=t0s7htJ5QB&4QZ4E8uWLpcyL#i|#7qOR)y@#}{KLx8MXgis;|!FVc0Hk}Od
zsI{5T+_uregnAxoUVfZxA&9tl3<@O*ngJEd+Q|pvEHs<lbRi{Jeo@s9lxH{NYo41|
z(u=>hQfbSOoK{VV%5hka6GgiIhWCZ5&USHfQXdVz+T0ac8|?zh<SwFs^u9g4TRiUa
z3UxD}T^8x~Nx$9%LM_d24peea@|8#=SQ~loe}G*3-sDT4u8@PQ(c(xkv-Pae+!ab7
zpA89c-Po~c1;a^vOPFX(gv<F*?-w){Km+tV-aXL{!o=p;g-f!fN^%SCMvN2%#i20H
z{j_<seDnE-&Jm-iOT6kN9=}7IQ`l^1jI@>`-l_+_`=xjBwX4)fLrR$Vv@(tBmDLAA
z^#Q?|EbAZ{<IDI81Ly57*kKnLY`?nXaX(Bsr60GI|BW_9Pr1~1Sm$-DAnmTMqtK5@
zUo0ij9~}^(-LfC4<wG`#HGh*)iQ8IZ?mTIOvAXK{r-3w%5t^7^;5swMPL2K~lpxQ7
zZv_T8joKJ}Kg{0-4a@v^ZaZu5js;v5F7uZ@^ienUDZV2Nq*kE2dd+U$9NX=}jV%{Z
zUl2%gdzg((Oi=M*#1jilUcGbR)tv)Wh=wQ3vz1{F*JiCNAVo{cnmN#)Gqp*kYs*cx
z06kfXbtKRGX*-bU5%+()QQ-SpUP^HSSZ;>fTYT#U2UmfR8wXsYwi$HLl4>0nf;8l%
z%qQw>ySqHL{P%L5dbd~;GN1r;G6Jc%HLUjb4J?bn2%sLn5;|@FVXFf(T-i5^&lk2q
znN|bF2)PvPE*g3B%QDfSIr8(Y@_z|I3b2Qq*Y!j}ZXpc?8~+g6@dGFVLfHS(Ehv#C
z`{n)8_S^aBRz>EzjwuZ5#52q08*73_eswi3eOQ^lt5%%hN+*!eOp?(Fx_Pp<G?a<+
ze&Fg+>l-RV30N^c@FocaN|!+}@i0X}A6`8(8kT;7aA1iDdsN7BNAlzw4t@e7H4uzd
z)>Ueyg#k8tmRXPxkz|aAB0y>A8y-YK!;B5j?L3c2cqbh`<eLyYJeEqJvE8ieu}S09
z`O_IjEuAtP$C21&nI!GZF$m2qJ^b<TPC4jW>mGF2MlzJvv7f;v*t!Q2l8NNsLY}U$
zNmIXcmw~#2RgW8?m7{rnf<%isY!qZ9TmxnAazRH;CAEQMS0dz`UT@IUrHH-m)2S2)
z06h*?l+5%_a0V*jSzkCi>vNv@Mtr;G0qs;Ayzy(Tv{{J2qEdMdnh-rurq`n3piR9e
zl3r<i{fiPJ=!s*8!eh+EiSM(fC54q;aQSqYKj{9s)(WG%UZ0OF2gZL;`AO`#^PxTQ
z3_rYL?<1;n!Xo;!@VaXG7Rbh>RzG>EYdAJ{@JJaSOG-(7AQuuvzP|+dhxlqf(_7_T
zzB)X|<i=n$L%{AR%-+6!PumViFOnQx-)l_IxJlwAwent(4NBvzIZ42``+g#o3bZLM
zJRmfo>(46z8IdqQ{QpeTUymi=b?ph@b%oQ?Etf}kHIix2<*&mKO$d!^ga7wg<*AVv
zW_(In{0I9Wa4Uxlb$#1k1HO~ys-6vf?DGdMl2%y_LE9@-x%_5z@@=qI33a-`^Ciux
zzQ$Nuc46SttC;RhP@5@5Fi30S8uES0_Xs&N8}d_o8>AT(P}f+%7IT|#b2~y@!0B?E
z2$wVd^u~e^)eq2|RDjh@o7V!RRt>85708><IA&crd423?ZQI&J;4&1+v9*5+Q>HJj
zfci||jOGSxMPs?WKHYIJ>t<!<lBzL31icFe2;A(s9dh8?W(E12Zg?68%kdWrq0+Dz
zr+x>LU6q;?iUdN0&&;(XbuNngH>94(2$ISq@gV+%=uaec!sSbJfv(PGU?||H{9K@#
zgNq(9c~F((Xe<5Pqix4JUu=j&!}%(FG?`9tyIa5R)(IYz_=<!6;h;)7^c|?T!kdX-
z!2U>x6w>*3U`~19I*^@FfiH!8TW<g#PyJ?OwKzq)nxUW*2K=0JDRVlXQ?Ux=-dTWE
zr78kpu=6Ea;23!rrWHw~mU6D!{RT^Lx4r9#3gShOm!e!>1k}ii0%7Y3kBG>*DzDZR
zE~a**2*Kt}kf+9~YX^Pe0D0;SWZl94<dH-4yr2!Qf}jwe;c!t4VUElKy^yN3CLv<a
zaidF~q<;dl9t(L!>94cDxD_jTW<Gsvbu*PW7#JsRE()5UH*W8I{N6!;qdU*T+*xrL
z3qvG?*gzfpY&@hSJ>R{R$iAV^a0_g#nDh<oI_MiPh1hn_i@fxe`g3Hc;iHrsQgvos
zM3n?j$#B*F3i6adexaMx%)yO3CE>p=*>h#@-e~$|Dh8un%PK(HV!rR5k|`Z{GvKB0
zDC7uANk2T?1Ch))Q2AA8G&-8mwKrq5=v|lVBlzLK`LmOER-y6?H~<K{{OEV>kwMUw
z*}O#Qiwo6^^CjTHZwEC<5LbvFxFafl4#~+$_ihb>MMTy1krf4qXh;#UYs6|lBFJ__
z;z1Kfi?+tE!>sqhRrO?c&X<~Ky-(2Q7?w|ghrLS}am|V6JV{WX(zy+ID%YB{Wi)TJ
zf(#P7tr#^;Z1(V};W>{wh4e$@Vw9eN?tbEB6a~=~Wbv1`9R6M<nzDhw-FC0c&8vr-
zk1nVL`8bFVoHY#8zpizs8GDz6RzTi1sPPK>Cq$(@pJE{;JERWF-6xU<<zAj8+Ba*O
zXz0>rN`}jdN&cD>Q>S2{!p@xcSZO8%j~xV8CgxYR`)bHP=J}zhRP5m#v1V)RsEuu5
zpT`@s0R#16;Vyd5Fd$yv+OIATv0sPzl~Xa!?B&9ii`Yk-o4qFe?5b)ue^%$%xCHHT
zggA7$d_mXv*2EK-#vHdx8NS@e&oNUln(gXl%+8O$PgB}limRpD^IDR^v^JH7=SmBT
z<F^v2j8Q<mYqgU`m7gB<H~X#J!wcpqI#L{nfvEb-*G%^sPU+rpdqehk^&%#Jn3ABF
zjRd$Sq+N_-3w2!qOh9wj^)c%n&lNPkPBZa9nTi=V@<BI}k5%PMP2qd8JBBhT(a5TA
zJrC-qdWXIw49$Z1ZoP9@8D+!hS0L@9p%(@rM8N@vUyZ5*)h9cKXD$Ld9O5%;`KqH-
zla?_$waU^VdxG(-jV9-^Ek=Cj>RwoH+1D<Vsph_Wb^oi^W=1R7uQbbRQLic{;t)UP
zn->Uu8xy9b(v!%dTH4ddkeD7H({lEUTCM5;Tj{Oc`ADKY`y45XHIdV0kac~HhGBcD
zSJC3}K<^wLrYk>&T)E>TMLLjMHms%z9_vptaW)o8_ee|!LNaJh?uHkLm~=7bMP5_I
z>o9+CI#9g`NcmY~x0t<`ROrviTbS=J%PZZDFDA8gMVn|SUg8VwzJ5SW*#LEJmcAco
zUOeV=IUC>qc)$|>;B$3ZDfOlVD3<Jo+yfUN`GGU#0C2ccYFz8mYeA1bj0fykgy-Gb
zF&FUv_Fl2Yz_)RVpB`K|>Fw)AagtV^m~~+hiPgD-x|XMMdkHph&o~<lNP*@HQ)TND
z%X!duDrlx8LJ{x~uDt#t6Z4Dkln4<1*%`E~)fyJT7;(|`0nI=0$6PgWFQK{n=CQx#
z-Pr2jbioUzG#wSzYWrewo>yh|{F3h~;jaY#H+;X;-&odCnZ8!7-FA*K{^=cyFRV2S
zdue=A^kc|RMQOt$7YD5j;XqppZY;khkzX2d8;xR^orA6TK{C%3Efy0|&v{k06sAgJ
zLG>T$x>xoCXBW$*l_VqyYMnfkf;p_1%qXY>28ul=L221>5o`3f(&*y0`jojt;_D|D
z@N#bqcSx9UbznNuOQ;(f3m!~L=RtC=z6lJ+u$xCR2~4+6lW-@KJnzZen6`gN_04|8
zJ?U=W%LseUfgd7vFCQq%dRa_da&U~<{&D(l`A>>XJK@4MDc(|g7HQydBYe`Y(L3JT
z{@xRN*6;zgYJs^F>7W+ug>-Z*f5Gsj)+saYq1@cJ%Z=x6UO!Y5)|>6<FJShW`>a>-
zm>eu)_)rl==}|-=$G~~yysxpfo>AebmI+mPcg00YQZmKwEk|3J(o>}hYbEUuF!u_5
zr>N_L4H&wwMt=RcH|U>|TT=mZrx~qR`7dfb+~ImTF(_NYV2@~hNPa$>zkhBVeu?Ir
zRW#1@73${`ZoQc;z+O`MDT+u9GX~6sY3OPa`I{nlz=Y$YD<;$HMp6Y`yVC^QFy2ZZ
z=0(_<$sXEFp%KaoQA!ftv{yB%W-Vd3WjmW-dcEsRPo9W-TGzKm^x2X_5V}$O<xj*5
zAtG3TLI3vsE55@Dc$|h`H=IZhX`5rHc?rEcNm`Js<)J2)$9g2wE81)+x$#yU#E-#^
zKr|I(RQ!AW`kgA2q0r;Jwr(Du!YKltlzMo_G%IBc_g9`S#1oyhrFjQo7SCMpEMz{L
zEQ{cDqtF$VsIewJrwWRt(~_cV8m~S#{ysBIa!&tw@nodN&$O;9E?NJSRZAI+Zqs$P
z1!sOg53rB0T}DdN;mc5sx?ON|F0XvOqc>RguKmtYp}uLR(u#<R&-rPV`NAJR=(Ef(
ze_x<8bX(UZUlTVwM}k@OVr^f%*T;gQ22_F5KV3q+J^h9!;@b;d^S%U*l1VL?a8EyP
zVo!Da*!@~s%4S7Kg$aElX|Nvi6}b7XT_j#x?o)U5f^)jeNxuagCeJ(qX~W8LA}}ZW
z?KfgVUtXpCb0MAIRUC`|>Td^6SnP{u=J2H@S_+HdJ?~xUIMjQw^==HU=0WOMHp~bU
zY8-P!d*<&pEu7hU(2%Zln&iC9O}-EkygZo+^5sYp20EMHU%-4v&rKR%1XTgIR!l(i
z7{S=(?@-cgWb9!}=;eZdr5B~A^%3Tl7gz;rOGwq+if<^?;Wcz(Q}xKV{hH?&-=dW+
zaNYz*w4`pTvT<ycEI7>kIbWLI#}1vY9ZJh&YR4R4OfW@h>${;7CTg&Aw)9X77uniU
zdyb)_y>^eNm)XG%Isfxu;~uU)Acy*AF&SeG(8OCm%_Sy}?*VC~XkQQ<Om{ghemnYg
z3MS$i5*4Jf$AM*dYz=BMFFI%Fg;EY1iYLwNiMQLn-_mu+8NJuDWz*(e)0JYi9%^{b
z6Qyr9h1%UY0QH-x?Gi`nYz=&F4Jje2QbN^k@RgS!?cc^uhI4Mk)*xzxqZN^!Uk){N
z!R3r8MBZ5RUwDqSoVe+>og+L0wGMx*rC%#&?-!(_X+AxF`f2eP&~!QT%y%xzeLa^b
z1TJeX83wVGn`o3Pp*x8X)JY+lU^xS#?7DC7rQ=)vB2@Y->RFBeNg!QV3^Z(dZBLCF
z=_NNR)h|T`+ETJmTjqkN)z+KH<FI~Ea<%;G!HOwima_o-e7P5ERq@rWr4IY5<6f!<
zF`fv%HwPE$i5X!y%+&J5>l~6+^d;&mSfvAYzdZ~(NIH<ZXOv}@C9D+eCkQ|@CZcch
zGz#CwH253V_u8)Ij-RGtDX};~AnJNjhLbVN9?I4L8?)Ooh&XsZ@zGZ}Uab{*SST3+
z95F=p7d{?*^IrkylIrQZv-d8;3Jyfhop_lAVsaN1FkS%2B4W?&8In^!TRHn$V>zyZ
za`JlIePnnWZ~LFkk(pLLB@DSz+u}b+65+Pjv0nYxmX6dqWzBYxShfMuFT7do>fJ_I
z**3LC`Ydo1X%_{3MYT`Z_ho-V93RDf=PrIbVvyYUGo|0n_GfEMm+H25LpMN%9an#O
zGGhU|-&qO0bczqe^JB55u@!=ygdv~-ZOT}DrNKdcNh=6ATEnWl#2KP99*yH)L-e#T
zzJEpThPZQLHM<I36ChH5wx*2rNacOYe|k^ihUI^zs_$Fg${9N}$>a2nr2wG7=FX*>
zZnuvG*tO`(8B7EL8u08p3}Y+ozygP7UCGrJ5b8=!Dzl}h?a-<A*tYxr1nYN$3?x2l
z1yQzAvArj|mEoAic{Y-?{d_2YAA-dib4I$POh6}95f9Y<lBe}l2F4_{=+WKxxb=e@
zmnc>DvoH;kRSOP0H&5yHu4t}a9VgQsqedQaWiHiqQLGTmag&A7$#PgI&Fcka&zQii
z<pu$TG6U>_GNKnUWp4$2YP6VrZx_Q$n01~lwGsN_N-(7gER%g##<zK6qNy2UQD>vD
zsJ^VJRkaR=^s=e6_`b3xR(LWni%a@C`eW{;x85YE_J*dJ4<D*G7-%uS>wm=Fzuq*F
zfZ91T3YfAWXqS(|u!wp8IB0dbu+9-U(?<NA1wb4IcXIgp)HzRK>oe%ap4I#moS@|V
zuA{2J??$TuljhE;TrEyRI+Dx9Nxq2W#LdJnS7>t83>P$7-s)*3=r^D!&AUi{Vf7}2
zicxJ{Y`~JB0Vn!b!fy)OE_Z@4hLVd<%7e=T8LtvvKVsR~dh-5p7>h|mlASaY!3GUI
zQ`4w;hRYCL?+dHG=};Tc<nq&0fBFiPgnqy_2}Go+Kke)4iXfVRz!FC8LqLv=?&6Wg
zfA|__hqciA&=h^vBE;}2F;xHa?{x3{HpHETd2)HFev>_)Zs6_FCbfJ7HE|2ekQFlm
zENP!j+%bKD{>$DM@jtcq{WQ(^_1^m-axq$7!h~_8o=s@CstKMWp0nKwQSl870=xu+
zZ1;Vz5UVl_9Q5VEq7OpU)X_Ce0nm`KUGu7k@mftF`0d%8kNb@r7P8sgoDn-u$lV4K
z@9&5)1v%Z}U<!I{!h7NbLj6}K(3e^vq#pH!DUH+w0br}E`uOLISVS<)m&@wvB{5U_
z21P2z`!2O7NDPlx90ilgRofrmLFiqj#wSiWZ|0zOF~96u1rQJw&bfi59HW5vP}jIl
zL#a@>|5WXq_PwT`qSH2a%|d3&Ns-gB`&p8K%N_25v?F0u``(D)zS7=^qlnvudKpkZ
z<p=(UWBrdYsme!N$_EUP`lpYY2Gk+%G2hcUEUD<0Ptn{UonSD28p&DFesu7iXiEGY
z)Sy(^c;;9tt^;{;SD`<5pZ^*sHme6IQ~I#(YtW#Io<Pt+fKXD|stU#7{&<20v@7}>
z0t>i7AcFF*Ei88E4qtGA5?#D9DciXILE<?;Cw<MB#6^FYqg)a#u08uau5!+k@^+on
zcdn|(<m$$U<a?j&@h*{*kRU=~oi@b73Y(fB@BpTAnLzSiJ=!VDGg~zIDZC9k!`OOG
zT)%=V_HiO>pJE1|%v_TynF`=+5ewe&K-?a!U_o6`#n_=4dsSb6fA=a7aqS~`pm};;
ze8BtV{GLW(w+i`n*N~FFbIlQNx&H{-<x{;?j(kS~*Vs)3Ff{6lxJ4T0=Lkjk$KBSC
z{0bC;LSjcyTccSvXc$o(gGeA<Owp7wcXYDP55QJ%roAqxldzD+BUup}kXyt#9RH%&
z!{B=yql_f`lrqf~qX)IB)Thv$R5291mQo@_t{OQm7u7@K&Nc98$pO44FU6zjXc}-T
z%{9=?+2QqYnnV!E3m`=xns=lT=QZ_!l9YWv`v>5S?3O#y!6;#KXMbmXMjX@Z<^?cS
zGUp#?d(eTbONb`QpY56jb5x`7i_t}8OT8qz%SDckkxxeGZ1z~^_%X$0P7KtwDIYad
zf_Hd{@&>nDD$^Tr2YI9*>W-#f+-nV)c<zVFFDYT8{+_7H$Qf;awPU12wdd97<s6j}
zcL11>h_Cg>8o8^SdG$i*nVD{3zvr=O6m5(k!|c16#$!&t#QCbep>U7&%0mVzQF&)s
z-ap_FujTky+<?d;P<DhculDHR<)&WmP1|E7a2ptL-moqFq$7uIc;72HL1g{=6Ktv^
zkJMWacT90n?J~R0?luNX#Bvy)y7g}Tn^kS{o0&MPy%Aui?99mZ9sI?)C|lCk?d?0P
zizy5+gmT#o5f04gcl|6T^9k}FHIsfHs>H?l;)5->Z#*i<!p&}WXMn%`k5GL!0JZ|L
z$l%wPD`_Tt^pUQZMP!1C0=%AS`;V-HTrGAmC2P$$U(>1TX6Wz|kzQ;4(PI|q_12L9
zIc#~>A;cu>UozX7Zjjl!-bv0pvwoeN3d#!yqz~(B+Sum<miM>qiHY`akLu6HFl%7#
z1fBcAIv;bd4d4{}8!EB1`jmL_E#*I5K$6T8ZO@lfidGqOdUju_`n``{?0C$VCh#(x
z)5oI0W0agx#~jlrax@C67K>Hmx*)z**Ij9H%0cON36>8{bvMd&OoNReZ@FoYoS~HI
z)|UP|RIHv90zSuk>fS6p82Hvv1Rj9(z9YIZr;DNQ$jpLpm<SXs?x<mnmB#`r9F_9a
zDe?(8c`t;qU;OK<zMBI;XkwAm>xA?571`=zipi6TjZ=Ny_|J;<O%aJW7yk-t*HSge
zesLl0<;v4|t5F{<skmA3BGzdDp<cbY`;#zUu5I6bEhFd2Y=tPjF<u6<u0aQ-t(5i_
z{g{6}<E*yW2TE`N6VKCpGGN(8Mot#nVYP88moE31v3v(*IDVZqqsk2={wHb^8Dkjr
zdCn=5;@Xc-uPR*M-UTd8!h~_9f7lE@t4P`%i)B>%$H&j;nzEjj9`HIT0S|fg<^BXa
zU@YfBNV;0ZxCd@V`G5JMypicfip;M3Jm^O9Hy#@-u?F302@vASt`Sb=K0=Z<%PUI#
z<oR={T}X(dgJFpbozTfJ2FJUWj)c4w$<}eQa2Gzm*p+gHVoDZZSq{3kr65qz*ZVwu
zl*Y7q|0h|NVhT~vqxW^muKo6dkUWZ+dl6Thv_9ir)HxZIDQeWu;pbo3OYgo#hWdtb
zc8_>L_|(hMeD7>(aNnJY$(ta+yEgLsG?T1!HOFi%MHmd7!wu@Sujw?pILr46qA;W?
zU0^9*IlM#r<o9uPSWRGkD{ewo6@0O@Cr;Hi#Cr{q*g{yxnFJ~Yxw7|JWLFY8eUpmH
zji}~O8gzm82hj)!&gHth2G$vEpS1J0D)ipyKQiKvP4C`H<iXTw0e#^Lzh-DRFQ?2T
zy%v#ItMZu57+FXd068G4=g}UeyU#A;7ZICw1ALd6y=)IJW_u~}tQ(tNF%}|bn#{#e
z38l-Kd?B2s#|uvXWmZtCd$`<L&<f~1&V>jj&ge>&=Vu+hdVyBxOaNSI#^3<$N`IM#
zNm~@449N;9-&h+CCe^q{cSmz4%FmCB&wya7>MF#uDVHn}Q;o|V?F|(Mf969Osu43t
z3RACxubzK8YP$P1@%oY>kR{g_YYc)G^DKadxBmrhqDxI(gxoK`m3?>$4R>lT-(%$Z
zqb~8fdOg1YA>O*0Bu0v;kf;JxZ}ZLaoc!Zz|J6tlEyZeu>Q)8t4`m!L{FW-><4Ky3
zp8dlU{w-nqZJ^<2UzBW(JNUxnbc8@h25Gkd{`S@}p!M(j*@u7m(^t8pXF-6x#V|fw
zddhL0zfwhBH_#MuY5!dK)mLd-9jJn8?|T48&9mV{cJKKsVsOyVTx+sli^lH=>rIHm
zP<`yM@Y3uy`5eFa?UfRJ0|;A)4jFpW2C)J}EdE=Xa?)}jzseDMmFZ^YJy0yB`k44D
zxd7WlannOrcWm%>+DH9ZM~U}3(Uz#xvGt`3kDkxGUNS{<#Yf*DaelCEM7k03w&C9B
z6}PeQwCF%Bx=5I<)>?IN^a1DMuIl!zIK<i>i$2WMzC+|q?vB`59uTn+!9P*_{$P+V
z?puw?ZG3Y+k-u!%X`&|nZ-e>2EZ6LY8A0P#Z#k14#SES|k3^v7$2*;fYR0^h{aScM
zB1|5Z$Moec+DI-YD0$qtl%sT5+*)~qbTF+rVetWgt^G-b)E{O52e)q+%(zs>5q~R;
z^VvvHarIZ{GIw|Pn38Vv{nK&aEwx_o)r-P`uX7w0ofWe1LY}c4+sLnOWgC+q#9@Es
z6^4iZW|}-nYS1bbN}japZ&AsvxG|dPmg?cLF|0)TivF_e^X5X2$1~KvmrJfZgekNS
zEO#DgvyX08^xPW1#K|i0Y0GD#xI7}*ELcFD-9tQyI_Z=1S!L(%iXNNk(0*75wq8|5
z4rww!s+huqxbETum;}oJOf#SCd&fJk@pyMx?w)w;_D`;qf6u^3F4GcUiDl`bA1Ux5
zM%<P>dC5AGeIi*HET3cPzlZ~rHy)BCIv}u|6h9p$rm2ReyujJ6@tsYZK*#&#Eg<39
zl#z+y_HPApo*AhnJ%5ic-PYsym_9b%s>R2(qHy=I4#EXeEOAl8L3%oQM{F<x{&`s@
zwMM#E9M{tig{c<>(ZPVY`&{97b9KSohd6b0(r5uiO-;>!#k?<U^1<x7zaZvAZXB5i
zcA*nm$?GBz9*FGGLx?7`JOGz8S9GZ=Q8^rIR3Er~Zvd1~3Eo+0YUSE1Wul;RnT8m@
zJ6Ee4-f-3LBOM2VW3Ih;b^{E|<v@t^TCp?Mb9O`N$`mZ>w`Nw7V^Y7*Czb|df4x}Z
zT<AWy8iVn}al~`|fuZ-;LEaXcwmLC>fgxxr)gp+cM!TpjJqo@I<z3w}s|_pLa)C&a
zFIdfo2*C%<U)VD$M%8%wiY<#^<zE3Kv>+C@h3n7_G(aItVk)?UC8LMzEJaXzD(+jp
z?e671!1l9k<Xe~B)W19;WXn=JIXPKJ0ur9tDP@OXm)4Jl@i_GL7gSACQh;g|#zZ%c
zhJ=JjLJdD%UWp>6*66#lT=J-AbL3-PH~|HFiiSx*slUFqRxJa0g!oAE{`YpdG$&BM
z;}@~X+?D<U9s94#jodAVfQl2|(59o;^gOMfqJT?d<M=4TyN%o9T{a->>1HiRo$Vm$
z6`fX+bGyP-fI*UuiA5w?g}l_M3r~a%V}AyuZJBAEZ>uyRbtB{A`gK7X^hY~JS!;Fr
z8=qNvg)@qgG^(fGgq{|wnooyFu47To0+owTU=u_<x!$F!m@ku@Ut)LGADpM@_D)~k
zX1q!iva)4BmEY~P>dx2#0+sDcgUd)Evn!c@GJ-y-Yei;(A3i2E5E(T739(q$e`UW)
z-@o3HDJt^|+<g5giC*v+&`9sj-G%5$t20Z95tyRo!$q@%o65ReH(s76U8{4SBKjIM
zZY{iDxQfiPO=FKD5Ty#E5X_XTnfGzrh(r*#r3+OcA6qVVxpNmjUE8hQoy+K7z8`l}
zlT|^XjRdsRT5+Rt8@t^*=Qx3xBm=So%7F{pfxbv2BknF0)ANURiz$1o5>8&ELnC2b
zcu*40%weqNpVzjyeX#$Fbc=FEUiUlEoS75|WbVR>l@pQOd<G1z-Rb$$FH5Oir6}zd
za~Swu?0!meWM&%I*F8rf3jxYfuIhUUr${c#Oo$JUNpz7ewgI856zPx|GD0jd=Ypi!
zDYX^XcXxe;!MIlwJ&@84gIb*gWAW~0r#LTCIaOHI`4!}*om*P&s<?UM>pS6wxaYcv
zd+8B(2KW?5hMIGV4&uBE4tRfkeED@iO&*HA!g$Df8KyhV%=jntBqNV_D~A7t+WWS_
zZ)&MhQ>#6x)%4k>pEvt{OZV*wd!kVdd#YtVxj`!xj$^bLHDGWqltieujIm7V272xD
zyAjPPV`GEdEu61F7m=pk45@XQpQ7WvUOVy5q+x&cvbOU%4?P<w`ZSqT=+GaZCUdzN
zdTH-x#Zj!+?hSpazlei07yw$a{Mlm73o1#SWU8cj<d&ucxd7UKwZFn=j~~w^xpPZ!
zPGtAJOCGNf`ql6_<`?98-7o6X%G8LTp4fskS52I#5%-eK%f0M$30D!rDQGG#WlGoi
z@1Pb;f^NUzETen_WInf0|NTv^&;@??y&%DWWFEiqtosgwy(y7o*&L9C4CnJ+8HL*A
z{j32&<$cRsc&m9w3*;q9lkExYwJ$|~tUu9;8l5tC@(Wl!o&72?lo^2mQO>tT`P!aW
zG#2%UdwAWWSOYq<-$>$5<#RwYjExvYO<)d>2CwsQz5?6NSCsQEp@u^>nibw&8idYy
zxBR|HYkVf%6WpG-2X(aLAkFI6j19nE95nDEFP4gy;Y!ayzbN!MB4{1h`%d5nGSV2n
ziy?7A#3u3@KTz$T84T<AQSBuXBZsAb?*8yoKEVww6AkFHAAs`*x9&K?q>V-Xu7zc)
z8j@5svooChzFr}`-$i)gK^NK=!ZB|0(Ve4buI{_Ij-LVj3Yjzvg>vlgP@|5OSL<H|
zp-S}Uu#phg)sc)40U41NKWpJ4=xm!7ozAh>F5LhIcHY8AJ;#toFJ5F#V;OhM3eM8A
zGs4(9;BcsWO*+`c!}1v-L{{fO>!S-M>`;lxm-ZW3p)wZZ*VVIhL38Evl@bvY{xrYW
zGi7FRRQTiNCcgr|k^Y@!``7Yw0D2f5{N5(AC$~Ih6Z?q@bG=T#C3$eilBN3iBNg3e
z7N3FcPF-fB16$*}Xw}vdZxgK8DkL7rv##@$#>03+sdZGcg>3}Y%W*FdhilZvm01UV
zPTklAWHNGlL`M;E?pHvr;Qd~aNi(JDmX*p%(4$;fDV-uQ(v>^#*$o@Ky;a17)Pky>
z(D1s!8Azoc=|p32BCk?B{YUoQTJ?O^@3F(t!%ev`oFs%KWt|B7RjCQI9^SFN6^V!3
zz@5v{pyPt<8l@kNUP#x=4pj>Ucb+j4oWp^zWv-i|M$f1y`4ah|&y05ZJDTJ_gRuZ_
z*N0t3QNl-v4A5mc;RsVFjaWBJu!jm#>;HQ8%p=Im7gfmT`mC1m7}Qar9I)D>3}_+%
zV}A|D;6VXQtcdu&hR@5(JD+%`QdQm}Sbks`&W@A7B*+^2f0nl+Z)*C;&E1{oGaUq_
z`)}&!|0d6usp7w$rh*8V?sov>RCS+(p%Q|)OLk>NjRJr!WDY}9AhDD(sVk|>)$N?e
zsMxUeN&W`>mNf;$Cs@c(#sj0<`!C&+x0R*|#D<5$FdBqmVS42gb$pq<9t(RkduXRS
zJs)>|o)}*Jh=tiRju3rG^a{dPxSU2*O5Hgn$6^&#)krr_Phthg4x>;@B~bHzpnjZ%
zDQxkdr&1%kd;ViS_#(xSQ%;47^4_tsB|jRrUu31(c8jAj9A$3*pZ!wnNvCUFo0@ll
zo*{tKCh1yZch=>IU-0?mk23&c*CEje0(z8yo9Xd#u0Xgz=C~8#Xh{>^YY<!Boji%>
znI+3tsQxE{1r&PlqX{ZDNQfh_iSrB0_@jkCWKRf~WP}EAx#>Gm@*BKyHh(Y7Q2tPK
z<z>37=tgui-WMI@;kv>%qsh{}kGz^ZmMAPTB1^yAsrzttky!mJOeMdWYx&zZlRC2F
zwKwur4&^NC9@zlw(P9StXy7E@Dy)^cOJUG?O`y?q(bKcD-x|vcJl5JLOq55_*e-yX
z4zvILN&Y>Z#Tm4USlUP=7`usV6}+ioy^|f{S$>n7!|FItigMt8GZ_k=E6kNbGW5#B
z1nqkk)hKQoDT8uA1iEiD$^Xk#s+STl0qAX_Jjz+ZfV~Pr$e5!$Alsq<GKa1<L3P1J
z;4Tmqu4#KL#=t=w8{`u|z9phWI!^-PGG+d{;?-bM6Uy1TDzXdt0@F_@K4WKR=Xm$N
zN`#RMy@SIl>G7v1oi7oA7jXk6H%4c!t`SJJ3bM8;z#j%{Qik3ueGk|hh?k3^R?!5J
zY;nLJJ*QrN99D|Pf=75nh8RTbjL2Pt;2y+4IcrbKn<G2wIs|HcjpO3LKUpwT$N6jZ
zE57~Fa0HJ3`!B)e`G4-0U_9lq#@L!$PFY-X!=QupgHxnf6ybOH)S<^=Z)azCk+&p_
ztiZVWK?CH=l&3@1rR%1KJ88lrcz@M@ADMrPSf2qj+Lo-_B7c0>k6O}yh9f#b<3B{1
zRR3dA8;tj(`c3=yVfvqEzgjS60A5|JJOXW`>T@t1XRO8HYh{b;6Q2|1c`Jh-PM6x{
zfxJ0LghO^)!k}gNAH;|n^>@Si0>iB;>8~g`Y9VMU(9|Xo?_Gx~F%r#>X;NHxa#a{~
z$;bafg|D{%V(`$CD#iK&5eRXOfxf$^So9DMMDqD2R^ZRnMorcoA%986KJ9q3sv}#s
zuqznZx_ZGUd#1YWRF!4;aoi9J^Y~o*FF}g`U3S|`k+<$MyTwg-!;oB8p#66>^1r%~
zAhw|REtiopQnU{#{5V-P5XlKLlmB6#z{4yYS4MWm&C3wdL(kbrXq<R#VbYWS|2Ktp
zeo>K9Q*-lup=Jn?<K@1WJjt}l=n@|4Gsh;c{`?imb-J^&$cjdSYwffCE9I`natx_j
z!@Ai-V*5n?e)NAGh8$B6DBAkpY!FC3GluC$#ziV|1|dwE6Bj;%0O9y*M`+~69nWys
zZax1z*#ASjg51^JXgGJGSw_Ggx!l!|e20@y2!EP<2`-vllGx$VQF6vxhuw#ko=q$u
zNIC__nP>RFHdm{mx)N@oj626|YUI**3A@4F?sMY`YzJb<FoQh8+522i!n@dzDobJH
zpU?1rzc`V8EVFeqGx}r?3H|*={%08Jf7wGIg^5owkEjGG44eTR$G)=I{}L|>XT@bi
zy_(X(!2_LbJbs;A3X7vj0oO?uV6^iIQSq;|PjPq_^b{wV%aFnT_6D!H2=a|>JPf@Y
z3FBMG*pd)48#8p6LHjW>wvl4@?}@=*xd-(dMWuMOHe40Cu=hS0zb1rJ2A>UTE*65O
z6Uqwq?7ZB`7m8#O|GfsAu3f+W>%|VrdA8~S79v4OfftZ4=BZMK3tk2pT#tM1<l*d<
z1>qD+ygZ?7(cJ&pzy9~3oveDw$U$ZNIOszIl6E5Uzw=9w^trNP0Kq8*v7lT3VPS%Q
zv083XW*7%^xkEEF$uEXsR#*r;OsvODNslO0*&w?=4~xqFX(P~8uyIsQt{o^u7LnE9
z{?BU2Uii-Nk{nAt*mfj0Rgawsxqan{3Et+)SQ4KVx~!cc6yYu~k#MOI*%z40e<AvO
z^|ufX_@8YCGV)ezGZkjn5Z9qM94QZvD*zQ77!*;CL;-(c#8HUTu>C9Ha}@5{t7>0)
z6>dXOf_nKM37<$tXp<-D+Z^Ox8HGFg(Mg6W{F?`SaWu_q(zQr3B>ht)%Opm>jirMo
z)St_>9EuX+S3_O!hcl7BQ~Pm<3A{#r0aED}_s!^iHqm=}yrel!vLpZ3YZFP%hHO;B
z#Mf};Mw;~eZ&qOcZ8j$`9{-Ex=6`aPAfcux+j|h=zVTXa0N}94fxbhTh*cr&@2tl0
z6i&lk*=vuv%p~E_LAp0&bru)?U5CDhI<#%+WPc4rAD<qO{W>RtiM~`jp)?QQ>rE|;
z9siBF&dSEdOTA4F6Y!tZU`i}R@gC}HJyJtLKf|RN0+!eLckVIF(vr`@dfJu4x^m?T
zBI;>HTBcuC5WN47m|Ak<W~G@)47>_l$}ehzNrf-u3rD6S$gy?tNNS5i<P%B;S(dOb
z?8obM^8I>h6Vg%ABP~vEE*ny7fFui1w(G5{jq^Ff<DNcw-0Jy#_-e>f6*0$l^o57r
z3adO<tTLY&DFWbAxw@xou95~;|Dl|R;s7(*CyQa0m8(@1ixmUGK~kq1HE4>e9mY~L
z-c%OTFV$E(?pcwD8+FxM`-X?e5U0M57P|G-K)o%b#*l40G5_N8gV8YOu{rRAAp7F4
zGOYB*uiv*j5;8)sC4`wc?e=9qAVbz&cn5{9^!Yck4o%L8(0japT5f6qf<m%q<k>!7
zh;tA1sM%KdqY>_xDgMY<J%`H{?;RIGYF$;ajM0U2qefj9EXp(%tBb9QZxDO5e+;>p
zJbBv|A>UUkul2z917AB*#QvrkT}++M=30$Vd+bW(Fo>S7R@8qRj#H@<da7wfEk-D%
zN32SwYFxoaN?{3Oue4$~>{nc4Nz#tq*3cKP&YgW?@BJgPuc<iAA6J?_B1-YOx3!mP
zw2r+xti$xk4$oBLj;nTb{P>BE+q+w12tDta9?Fvr@yZa=jg!qKorL^}BJ6u42?j@u
z`n`H-?vfixGnA@xx5h@f&bD!Q;y_es$z#Gi<TSw`phe!b$~8uJRRmvXHit9MT!@Qc
zJPs1MeU*3%nQ4Me`d*)#Ni>Y=E=|&rW?=p(+@>+T_YFhgT;=6<=B}M{{<J1`#a#1d
zQG=NU@JLM6i5%#VZ)8d01cunLQraF+xB$+zg5V5uCAOFs&I4IL)bb~BP~9rq-#^On
zsATn6(DeS1xj1Jx1Y+SLpRaA7=!`R)lU}WHbM}9|i6L{{x3XBaEohr?CeQC12TJu(
z$IiJj<0ulYJvoKGZ7%qk!gc?@+RXyHUD^02y0b_+ZI}fzA_Q`StG7(5LXNN70+FXa
z0Spa}M&#v05V2j!XOoEnO%=OwYlmAmrWnl53tvnX|3MfPsEFmuo-lqJhQ%?Bg_%0b
z)WMa9NsjSvU`Og7Ju3FaJR>YPytx!Xzb3gr3Ptgyc?Jn2B3#1fv$_Tl8_ok9f-1KT
zHXpUe8eh?^_)PPTmTGn*PTpc4qDBoTz&WINLjr3^tXiKD-m)|WL=O+4eQUFh37xyS
zf491S-!xV=hQqB@r*T&VffYcqs(mk<Vd%VP_k&wFJK!|yoK#rx&U1*#u0jm8adaXY
zKW?o(KgG!eyCfGeWT}1P{~HwfCv+R`?aa`6=VK2CNPgPVQlbA-nOB~&SunC_CgQ|N
zLL(wnXavhrxb$?8FXA=saYr}z?;TxfcMNOx<{U=6zIr|ZzM5rG*K{Ldd9jsYKSjQg
z6Z#In<8_~Yz<r#D<oK6o0GT33shouky7|u)KM{Q>kp84av{3e+9Ae%XpMb=Buspqx
z0R{5%qeMhk&5)UlUWA``pS&N%LEBROl*d?~v?jd{8h?F{_P}8e@A32K+shs9ByEjp
z$N@C0g~JNh%}NBk&Hym`NGdaOVg=d08+#^BT;E4mxv`?6Dp-F&XzG$2b^Qsr7c#Z>
z5Qqj&bWBY3gYy+DNGstDMbuwiYm&8@-$in_LW<%(x~jPq^q{jLar0HBaMh0l)9Y|E
zeQZ+RLyqkHAMm&N<ux-~<EHAK<G>3DAW`CfO?=quik94{%epv8Vb6^Dk$`o5sc&cX
z`6Nf)c^l+SoTg%gF9==+BEeU#=97#rYA<~Ic?-Fp5wQC>J)hXk)A&Nfbsq+Hz4^@C
zCeL$k+k$XF`Ch}Wm9Cd^e&#wEiFJThhu_>;!FRWb3I2M)LY(3){=%C-Tl}Ct^|k>#
z=kF|j$n&Oru_iy+%pmX!P2Qn40(l_RREl_N$FCE*GiCo-PZ(JP8wsH)hY^uWH@V57
z1g?bo1wK`f>M@v8{QS9Prwa3~G(#%GYjiXA7i&v|`?Y-WwyhaQ0E;dU2H{o!-6Y5p
zOR|I(ay~|lZ2nG8pXZ&n|3@H+K?mO%iU2;djUY6QyI31KY{X;_NLugPI2RaS=-9Dj
zOL$vafCFq&(W%q4XWqbdrexfCTyj88`-$K4B0}4GyEvXazrS0-FnOQQMfoUgE8OOf
z>?XJ4D|PU!|H%V~Qhj`4q6A!7iaom|FZHe<8~y)c?#<(|?7p|*%jJ@(GE*{V$ShID
zkd!$YGb>X_M49PANr}v3hC)K-A@f)nqs*C;A+y_XA=A4~b${>s_dMU{eLv5C?>~J$
zD(AWPKKI^htz#YQShm+dOyv}8g~O@-?lf>5^BSL>Q2EXTPyXKdf>mej2ViN_RCON%
zKtPR$dvZB5d%EFwQydY}zo)_8@z>_@TmRdyZUCnw3wq3{F9M_gd%T-?7Ci#!Ah0F)
z4}a0$zAVOV=Xa{$5V(1c6J~&9H0mi*+X@=vgn@{*za>%rUNqi^CN?63#$}rtd|nfZ
zS=c7iLy@BpN{4t5;R7m&(0DNc3}^o|Yyd=N8(sp`s}CwA8gHLt27J!>-4e-z_8c?+
z5iUor56vh4cGBTI{b?N13M}qWPP<s~Y=I$%4(h?QUcd>IXgX~V5sD6C#aS{XT|`)X
z@PF)nTL%X{QaS-`=onz~HXYn4&TiUl-Z<1?9^-(IJ-t*62t!H{8@zswj9nI;i=1Pl
z`Dqm|7`TYs;imoFGvM(Cs3&KGhUdGM;tz9&XzUN9w}=~~1{&F{p?`|VDF<b>uSl%C
zzN~Bn_d5DGbC@iEk`TY*=e3;T%LZrtix3(}LPw=Z4?XMahn{r{Z`Rq}<b2pYw}TJo
zJjeJqqS#84=r^61k0uC59&*j>bZRpUw44Qsjf_fwM4ySM%>L#5k~FH25}AYs6=HRv
zF*(UaDvZEA%?MB|<ul5M==f65J?}#X&Z%0bRY*(_nurIl9(p)E0d+6MoqIrAuA$wZ
z<*gcP`=NZgqO25xKmAolpp8Af!Vr1+--lS~4|YykuO3$eZ6Lh*$){A<%&tIBt}bOH
z`Jna;AewQ0yl$VF&j}ew-lkQFFzNl=Ycm+8IK(VF5<pS5fAk-a^!GOTN5UGs!e_~k
z^%PMZe-)TF1$0Obh#a8|y<AspYG@^~a)(|DagHEv{%wTqOZ4`Je;Z*&*i?rx!hf7^
z-<Z>u=dbHL$H$~Dn{}U11a<XpAN#o3x+0sEm&pBRs{#RVpj~W{^kw*}bR3!!@yDwu
z9k-gnl{L9BiccfJ!symOGmtiMijWAxj6u{Ry`f^$O1L||iMSmi%Ai`kuVKHf+-7_T
z3%!c^Z3zAq0{mwbj{x)4^a#na9n3T4`1e%I9=<L0N}m$=om6Q3=iopIdneKbgBtw_
z4}1X<mQh?h)~1%c5DX_c@ZEOq+#voBL2ED)AhR<n{@3I8e|k#zaj85Vg~Omg8Ptoe
zg!3HcCjKC)zwfj^NGhZuzsW)gfr_ZZs1)>O@+yH&t66I2zg<S^mg62TLd1Gw<-}=E
z;4OKV%)Uv;0^XIVv&YzJdSu~zY_l>JH-WuNGPDm|##5y`{G|hzatc1t`{<UUGgWsK
zG$mGeP|fVfRzAFy01=|Mdgge4JRPf4p+;ze@Ej3GiCgd;4(IZ?6AG$HLm2P#oTOJ7
z2gaBVs0bq>{AcBki;bfme1A-r1zCS-T>G#aaG;RR6+OT&v%O0b(!}8WTmqDwdqF<l
zsEljFVr%8QZNnpCTM(bre!}eIcH>y-*eAQ9j72)pvZ-jp9_Tnom)#CxQZ)C6Gp;2M
zoU^&0@dDYIx4M0~)I1}hQwL&?8SuFGUO8>7Y=ij4@j;Lw_}dn*HWW&RUQsZ*JQ8}l
z{T?LwY#}vC(o~GS4nmSKBJ|5p$8jA%pQwhj{C8@$r{_S2S%ddo{v`uyicLcUgC6MR
z6=Y$#!{2ayGbpLll*p@BM=vuqM1n^LXY4wnqn3Qp7+e;o%zNxo`HP7EBXa2fD5ex(
zgtE~xD{SM0Ze#wD`=QGWDlJzDO}Hh(tbufiT?vO_%gVNrwk^rhf1gF2WuJS9@l<v8
z2D&ckV=|x<zC7;+)JhuhDWv9C6msyFq?}iu$n3tlJvj>U#5q3Dy5LOO^3u}zuRX+E
zFH=6fU^Xp_cDqEFWNMIoWycsHWrL&>3C$2t1sZXl&^U;$WbA9|Ro(%?mip4#`{PP|
z?3Rb;`4@R;SFMmpT8rIe^!+sOi46a?sb==?N=r*M1ZERimd9;<)2i|pj;DQBBurwx
ze((d@pk=uLG4fJ9(&>B<GXD*B?+639OMtuK`h6QHD|tR!jm)&U&^S?R1E|8aj^3cQ
zmW^~b*qG~8xhY);v?V2D@ch0zi{VT_itUooQ!d!<=_Rn%J=2j|>eqpD>2u@kb82sp
zhV6OAoBK9ez&Hy^g!n}74wtQbQ|8S{TVZNEmkBd^OZKLE&6`grA@Wct!S<T0@<BtW
z*~9M8w<BS2#h3m)tG^L<&8<O3MJ<lm<5f^BG^ouXg<Be_{7Ju2G*2``dr|2-a3VAi
zu`;nln$s3gFLN%#CgTi3o+r8x4$TBfR~(AJUYR45*^<+|H|OwhCt(mW_zl+9FYjIX
znICv%_p^gfDwN>Ytek&y4cKY|kIx_-bb8=PR3?qAD8ez&1A?+<K~2!`(ETnkhvJhW
zcM6AHa_+sUh-9U=smE;k9MYNfm1qNTa4=sxk<N43kBZuvtiYbm5;$`KxqcK`SkfP|
z`dZ$#=t(>l@Eo%)qme?x>?6#1Q-pFQ0BdsaZ1H`-An1|Mp%-ru%ZT#5Y0{s!x&7FM
zWhBWM*pR*^R06umFe$Mwr$1lP^)Swnl`Oq=Z#~|*9)#76#<vk7%o7GDMLwnz2o2Xa
zsY5R?;r?sJhrUb-kvC{p2cT0eS{g0zC3W^$I8sV~s2cDy%&8uzkDBX8=|aCoo4#=4
zdA0@!Fii{6T#r_|e?EXF=``~qIk$(vBKpAm%}9J|vBU}YZ$7oGz6W}#ZBPT>I?l~-
z6}ZxFK1en3l8RJI#9mZ$vM}O))P-}X_(7wBN6f?`0}r!vqLG9`-CGPS095o6J^Nb4
z4ro=3ZDys0`<(0`<HH{)p(aou;@nzkBqL>f=sb3D6eu>V2k?0+o0Y7Z3Oc`gsW@Sw
zg-d|?MajFFYi}cRv`=HrJ3B~^*WSkZfBo>a+rc0DXdhXd=_DIx>6(8^lQbKq@-=ND
z7Y4meLGi0YWoHCbwn^=^9jvbRaBc*CW#g(Ibuar_-V+3DRk}I8lH<R(Celd)qS(CR
z^FWS-D=X*u9}nbDk0|mp*N%q*3+z$q;PbUs210OwLCZj>CpszG;OCiynG;XXLw$kc
zhaqT9VcvN*%OS5CN3!12Gf)H-gHnAY<0sC!|G0f+@LLG+j`%3YlwXqW$;+0^;!YN~
z3Xy{FD2Eu!WEmhVJ)PwkvDndhRV03GvEM~=83Z1DO?J-S3EyHqanZ}|`p~BX7bZyV
zxqo4s2kXER_Q&}yA^tx&-|kM>6Auveo5$)Gue{s?HthKq4XGxsSt|^wxP|NYW0mbB
z%Y||pjwAIKVtc(Isji>-F2{j<!gY`80KMzmp_ag~Q@9uYhL~2khfC4<1I1P9bl)Fr
zHqZFG>c8SOJ>fTBXE@#>1(?ci$ruQN(v%oilR9G$PJ3Ep+H^YB%s_c}4CpT(2U|u7
z?Ks)5+m%G<|Mm}%YMMhYMWaKO<H2*a3?;!w$v2p*Q{1jMwS8#%3u;@HRsClJT%>jw
zgh2)Qr0lH<r0&l~34I^4xchk<6aeDl-*7Rkzah-r{<X@I=~L6Jo5^saw!wt!E>xI3
zem9OSN*)2htjE@|N!EjykQ*7tPcOV?es|rv2?}TDN!$lQ&R3^eeD_)Zz@!v89Vj2a
zw6tV~)B*62b%^(z<$np4g$jS!>n}Pds2G^kj9Kq|Hr-K3kc0~Cb0s&QhkTX?X`RFN
zSfEcqd8VH^{03LDW%QSa=f<MEr*F4GmmRg#b})pNsJ{wsqm>xV&A<Gt+p!wSbYg(C
z#c(Zl(W2<;H>BKe{6t_#N4I1JZBkrm(o~?rhT2bPz{yJXoL!j|ky3U9bu|K{S6~oj
zT{&Z7DJfTR^|vb`_j-tlXOc%1E-+XI&er3arNo3kuYb6GQ8PDUA(Hx=O3Sx*i~U0N
zZwExK7FS<CwlA|vKK>zQrKTC*uFn@S*muAi|0a_4kZB4W;*FD=m!c~g8$x~SpbR_n
zpH`O&)V_%F%J&Fr0|SHbyS&x-Z@ScjK3F5>+Q2eOhcelwNofKMim565ALyb^QF4E_
zYakUNog<AU3wFxzzOk5#Al(*hL6Ph^^!xE_xCwL80pgF=t(uzQo^GYyfdccO4=w)E
z-KQ6OfQp#0PE>j@Hkw$scy^MQA&-3GL=!-c<Q9JHpk1gNOD<_nB>aesxv=rZB@i62
zAPi%?+9%p<W-Kp!?((}Emxj;xDY6xu*o<;J%~c+HnTXZTdVcJU$NX%@PWkA}+qwOX
znYA}Nv$;#TzjUjw9%%K_M5>Y5>%jdQI0uKhY#HdRdKFwoH(aDs?QTgDQGLAg^ZQ$6
zvY=xjaN^|<sfKzS)EY<a%X3UW@2W)l^|I7X8#o0d(df;_{K`Idet_jKH1uLg>yjq)
z|KZX6s_UT`x6Ko)ZRiaedHi;OyBBx@I>1?Ozq#C3JQmR)p(4Knr1ST|(b2T&o*CTI
z#40O$RUqr1O)5|(i?mc!eC6+D`yr6xYj@R7mDOkZ-W<aV>HfIUsl=nFu_*LK(x1o0
zl@Dg@G#-iU?#&w*U-tHP^Enrw)LA)Q#%qUmqv^rmRgvT?K9l`~nXGpP3W;211Ugb6
zG))x6R!SKgx!S@e$3xtQ6xG&SOBXzfhw>8>A=S-0S`~T*jz;?@g)IUH;ks_@R5~?M
z>8e*@ZREHG5;hQNsgaZ-S4p~O9Y-SVLcesd_E4$)T353>6f1TNWq4W;_y(P~ADmo#
z;q#?vkw^OZg#ms$;w^<3d@?DGxbvCT!^hT`)Mgs32bT{Box8vIsFa9H*FUZR8KFjt
zUwaX@uv%m?I~8)M&i?q*W6&_W61#gfPpZ3ZLZ$8#*V=7VXGWE0gdU^26}J|awb=4f
z*iql3B$q3nS1Pkah%^g*DC&^lz0u}k5Ixlv^Q2LKL-2dmBJf2%&~`Hw^2l#kbtW+;
zs0Y+t1n96FQM}Axu4xp(Vd$M|c9Q}PEhd7X$dI+U8!Yq*&Zno&yTM)YsOfWns@GWv
zN7v38wfR)O6ZYyeS!d34Z;IhJv|`5Hg*eIP{d8O9C3Xw3pv8WT=F5CIVd&~=_U3k_
zecKLaccIXu^ZcSp;^eX@Jj@+D)1)U0Qahz;WK|@Wa@K(<5R^t_p<4I;;9fjP9)u@Z
z*ce^`GPFf(V)f6uc!M&>SA(}xgBM2XOZS{d7$QeKgzsMqz|Lc1;?LU~h+%ZPn#nIZ
zlbu9o_+#ChHausvJCmgS*B*=u{@k|qsB764PSf$Y(@NRj*s(-&#-=^r>|6BCtgCg4
zRsK)c!OUflrE<4P+x(9BJ{+4q(DX)ClSBT_hUZ@mop;K(LQ8PGqA6UDuY;3dE0m^W
zQq3}U3aQvMnL0`@qBuM5F1OrUb(zUKg)r%Al!7jWEXtRY<maXX?_Tm3x#R6;A~>xw
zCUd+!nRxVB=%}`6@7=NTnf2{g^<E0b4xwxIUPSmbMGmFtOsjju&W@G1n+j*&DS&w7
zZi@=Dw9K#Vm(Cq6l}5#MMk3zpYY^MeCoxOw&J`VRemoO@Y&OcOv-p(jG4WjduxaYP
z5E6@my(I{|6lxFFzu=4t;u08hT3>&AO=xnR={OslAEI5gy7x1rW2^J#^5(RIB(}jQ
z6M`G;3M}R@lm=xx)V27LmG6y>SnN?d{+aKlR-ub<O}m*Gk*fv_DGO<A2J+q>5&kiM
zwuwP&eb*PCaGD8(_NttJENR^_H9}nz*a8=B3b^{F@=Lt(l={2m%?3M2&qQ<lFROcR
zO??)xrt8G2oD9$N+?r<J>hOahv0!Dxu@0Sd;1Q@VA;nlV;jJI``-p`e>B>w+Ru0JL
zh<KJ~gdNAj2(yeE#{5-${FS}EmSMS5de?H87Ie=dwsMZrzhl{K+Z4hW)ox;ByA9Ry
zkv@9HD;vPp<%#3ax5|n-=`=;ZVKSK{5u0IgehQEk{;g-~@z<B79QUS)w+4Osa||;~
zz?m>RQ|48R^m(y`YG->Ui^OFq^G`?9@gwVvw>%3w28@{SFyt#YkX3w<S=}N5w@ucw
ze=VXP2R5eQi1IxO*}9W>Xtxe+f|GKv?_bicEN(a0v`1Qmz<;JuLgk_P!r8FcZZu})
zWiFF&3=4~(W3-Zj1zRYzP2y*zsibh!sw*BtI$a?;V{)9czB5+FVZ0b}#SE+i-r2F$
zgbv*wx|u}dIvUKF6`ndaZZwe?zbvYAc8=AV7)^;fDQX)zH&XdNT*g+Pd_@U>70FJ_
z%AI4S$R7PgvPDb2#$UUUBb_S%W8k?>_+K8viMoUm#0wlzCym9~S%;&Na_Y~+2h%Xc
zL*Lrin3~(5W5MeAp8b8}qR{H3<aN#!9?eF6SYj-BoeJvK`)>4`y}>wkthHT!FnsGC
zJuJ$^+q>Ljj{pX4rux4xgBh9yvqFg-SS$trB)kQRRI|Yo&cee#B9>?^`~;qnri47h
zt|&cS0U<4wAxhlyo<+}15|Nd?_W$`v^+btG5WbO2H2wWJwZq3zkNx`9+DrV;phD03
za;QYVh3gZO9{oh5pAPMo+fTk7L?whrj&{2zvEXbBOWMMPC)IBkw9AT>r6=&(@f>+C
z0ZVc~fBK(w0ZZhy{n)aiNYJQE<H2kf$6Jm7N6U8w9!A4RObXtS`XWSZb3i6f-SaSt
z(Qk)M#n&fcc?I@O;py?Vwa`w{OQ<}*L3k)0trA6ZELN~Ke}Pa@OQGuWh+@8rv8><c
z?l)>}ZJdi2Uigz8CmoP^2;0R`AxsqGU1qQIw-x}oY#oI!^myWZD%U*G_^@neRccak
zLpSOe*v)*3|6Ii}rZ;f-X_;(jmMeZG^RPLL@>o~(RO|M)RZw}c)J=yM`Q^Hp8B#67
znyLQDp$t*j0k%Q^oPynkLS_~<8j8Ael<$mTkIZ-+3#+|_#&w#a-9Dy?1t6!M1zjU$
z8auekU&}dmF$Wf2!|=}zJV~`OMHjt?vdIcWC54$SR-lkI4&0@o8lQ1E%NW9nTx6FC
zyxeO7rxQ9isE|F#{~xRGwN225Z}ExVs`Z%)O3;!fzF=Zy?SeUtIvh!quP({fs@K=n
zTTUJX$gZzr`@N*HbxL?lEQ#-Eg>~Lw;~w{^tCMLO`rF44SkO9ddmw^|NULUxDBQeL
zaM%Sng9(4mU@30mmOF9=dH-{KxZw=8*s^L_l6>)Ld&CykjfUp-8b|@hsv?}uEH-B%
zmQZ25G4!pPsSB2$KekMe*JY;|z(cSKniz6NO)M%fqW999R_c_!XkrE73^*#{#?lsW
z0c!G%8_M1RKtuT!d115N<s0NMcezvW2eL1aNuXKObxGrRw6HisPU@DIw6ON~H$uf>
z#04<oDr!9fg=~0%34!Ck$NS-bju%RpbU<-AgZ`pd8Ps+iUeaGqz-_kiz@*UPDR2W+
z9jSb3dz<3nLAn1g4}u0m*GDQIXa&&;x=17=Gk}#t;Z^<WJ5#f<5tU6<mt7b8&$g*b
z^+PHC{&G8N`j4%O#V{+v27ghX!o$Q10CDOy!aA9hw<564%z>v2dVDel`}I<};Ue0N
zSZD#x%T${nADTr(H<{pe?Ztj3_Y3fmpJVJWuw{Kkh__(B7l|W#^~FCg-D`*!cYZSm
zXw@g%rxS>g{7kn6*??ZqU2h;CoYo@{M;sUOx39oR$&2!NGzuud9b*e;#w?85_c`$v
zj&vS8CJo3jx8C5tZUiq&*+ysU=4eWL;@GkMyICxRVC3McuaP$0iZ;#Qm0@*;JI-vv
zg{m${>n1arC=lyVn?E`8&1J|wL*o8VS%O1bEuvxet<Vyw1Hi1%-|omk&K)y>muJli
zf9=ImV#L{{|7Y9pJm!$;obE`>4uNvZ=e^V)6=2lyyh1iz><>VxfgrAV%IWjxqpnwx
z$vEmF2F#jOEAlKnMw|0rk0Icd>AdoSDF^a@p#|};%W6oP@7%*ba&$#>XHxP9+GU4P
z#-~c+uMbrC6lk}T4kxIY;dy7+#)sAv!d^SRy@DJoEc@R#Roz@~cGT|k(2q}8{I}iD
zTX!(@me}<_@1^~?aXkh4eae+0JvKRtz812KM_@;MrmO|<q>^o4RIy=Y4KNUm^?-J)
z-$e7ncV#tTT&e@;lVxDUu?djgczkd+2NjYAe)3B!Jp2An4N<7gn_0aYK%P&)Zg#=8
z=Zt4eJoMe8t6!~BU1N<v-&^dLUD-?nF>=Hfnby6>#yV{0JUr$^2+u6WTdU*ESRvFS
zaAb$Io55_kPa8#rzQXvHjXzm;ib64wqH{?<l#Bs#myHxi>(4AJ_1lFacYi#_i!_j5
zzwmM}bdY(A-u7<rgAGbDh$5>(b_rtt$7kZYd*!G1ImzjEL*<XAra#?c0g3=3d}_f)
zs!ue&tr0Ovfrsg08#hD=@-IcEx*$uLKk57aZ&xARCn@e=>YQB$sr<Ai7t3;b7;DkJ
z<qU2vH|xP1{fy&*XmNgxtUVk6o{Zp@A&nRd?1yI?!r=VN)|n#*I7e!1l-R^H_?Ho4
zEpyYE+<42V;p#y7z9FIBap#mH+wnH16biG1%171qwHHuy^1)mw!s7#hb-M!SLx~o}
zz0ck7o}37-h-urTAf%gXDXvT_y?%G4!{}<O<)DM+_OmGC5DtDNcU5J<ouDb!*(%$m
z?q*NXEY;rOs|;CxWlx(pF?TP8FWC&{?d~F7JacIUO<rm*E*$R6xBjqj5-A2TANm{K
z_P<%G=+t}zj2v-evU<I^jL@UV=G9CY=sdUPoU3cMms@xPe=n3_Up41r^?KvI1#gRo
z{J9RMr*T(2G7{g=4h8c>^e7CoJ<tns8t7NFC3A@{TVxNqUGt-ZzeLa@*y*BPw@(|t
zzxRPH4BdOMKX^!BirQ5w^Wu+O2b_-98{35}`&!QAsCPf^EO@)@Pt7=Fs3-E2OWDAZ
z;bCl9RzQ9;;9-ByxRVhZ8`c4ncAR-?<P@JJ3AN`q{~9H`Jjyqc1a1i4*%^INvOb)=
z?XXK1zK<wgXxV&%Hx9pWq<!u>=gu!&X^TouUoelBW?jZ0O0i&-tK`Y88FujjT%obO
z@zp%7ju{wX`BeV0sajn4TzAdgVV434MTuzb<a_-WcCxs&L|uAIO(Qy$`;;yg%EY^(
z-J+VqX<@Zw>*(;tlAd_oD_K)Qb@rVpNL;TooDeyF8Y_DQ<F)fxXJIDJy=)YuSe$_}
zd+Z>iZ~))7z+7tl;pQ>fx|vF*1*OHCUtcSfgBv#8A+^>%jDX6;mAA)JUovyN*`u|K
zh+<G_br9XqU#L;C%z5C>PNt+%u&d+z?z*Ki$qn68b-cQL%7gNn+r;A>C9*gqjwhm~
z8i32r{6GMabEUc_2~rccI%feOnGpZDkNa}&$ET!CfKO*exXx2Z8uEwk>KSDVCf1O`
zvZ|Tda8sa(wAn)6piGK~bB<}VORrp6Hh(C9dm`V}_@Jw?(0k;kO9|Q5*ZIh`a{4CQ
zR_$P8O;nn8dHCGxfqkbTe;=6}Zl2;9nxsmm!R{i<O4~UfOBOvjqOReFW^D@?8?KzU
zy=T8ek9>}u9Nf;3fc*W+8=j$F%`s^O-7Tf|awx&Bm90R;jUneH_pV{V(zQiqCaT_g
zbs)^^%8l#Vbjp*HICRy0ShPTZ_83XV!yqx!W@VqGmNeR5uZrh{bv*BO1&s$;rp0@f
zdogp{4Yq2lxr4MBm3HNEzqiWP(IrC;shpHy?dRuA1jep1gwItDY$mo@D)YRE6t#I;
zYRXxl7=B9ziJ!3iFa|d$aqSr_v0G%X$3YO5<1*&G>9<$-1LTRdd3E2N5$nq}t~5Kp
z?uYNu<VQg2mse)4R7*I;<Rn<co_?-LUylp?7AHxlVe7>sWC-1I)L6pLb1xIJo2R`4
zm$5Rz>sm8oL923^P9s6WZIxQ=u7GHfr?O2p@kF}mQ_2b+@_rxBjk)v?=tnhPpLv6&
zyH_|&@X+#pBHF@veLWF%XY4U5N#|hoY9!__i>IElXEy5W4#H?%YaZ~Pr;uf_gc#J1
ze`Af$vRD3|6ixm^!6Ah&@{}|OMRq~?i?3xhcNoQ;(h%h#KfawW=0ggGtB>FlSjd4b
z3*zfhDr!?i32Nd~)<wvawcjF~2xoFztV?`poot@lNDmh)e~RhPEFg(dQEU2emesKD
zNjXc8O$vIVzm65KvD8ZvPZY3>SI8ReERI%J0vMqi`T+6yS+?O2gY$sf1ixIQq-GFW
z^_qM*JNr<D<IIKIUs!q}TR47n+n;yog|Ik`z_0Zx+)?Uvn3Z+DBHM+5f{v*dM_J8C
zFCRFoh5S|7M|*3?(zgs}x2l5bRLn=5{5e5z>)PM%_SGIK$H%LRJ+HAqmJ>VaUg?(7
z{9_y+;yNBPv>Qaa*NrP(9*D$Pkoq3b2~pw2O0PG7hwV)s2q)hf?RLda6p79rBEBUQ
z(;)>^c+aJk^Y=A}>viDvKPwv1Ln_E=Q!jvaBVb>E8>|U4X*~?x!TnhSL2Jd4&((s^
z*Uh(V>~j^V;OMjJCE^byrY#(JQk!$n7{JVV5O|S}r@!YsqhA8HiE=V)_VLvf0lbJ2
zu)&eoy|v(ELv@{Faonuriyw=jbSMiN(Z+}cH4>GvlH1pY^P=7E_8cn*V{?ayysqtv
zbVhk3JjV9I>_cU$aKcJ=a)CFEXJPRaSbmYWbPDL%26#nM=KFVY@-OxyaZcUc+C+&5
znut3o-8M-Ex0_-Vm<w$kYfe&mRGwnV(8Jp_O0*0Uc&{VLo4_Ha@x6_DaWgQt=jiqs
z4)P1`)Qj&U>6?rx@`nWRr1Webdja?mjUgJh%q;EJ*G0Q2%$0<jxM5bF$vP4F9wpjZ
zC6pWP6oqgt#rI8GpbP2)^;4W3`;AHg;8!7ggBJG2;T<at%gZsB_7Z-1S((M@1oF$2
zbwv;bm5`&(B2iGvUWJMLWL_*dImF{M5fdknlMCDT12H*xfd#ma8@dI@2@^0Yui3n=
zuz14y7V6DJmi>iZmDI06GLgv6wZNk8|Cslc)a5c8T&{R9-vjiJddZb6^GQB9X)Nc^
ze?GgM`;;pYDLkX~HvUJC?-W(M_@V4X7P8>^AK{5(lQ_Ks#jx4X;~2Beik(p{2@Mk_
z=zFK13Xi;k%2O2#M`A2}nw6<-w(O1W=T200yz<7cM<_o884`_u%*-eAT>x*Iw2!I5
z()KAT5Q4NI2+~Y#pLu;EZcmidTLkG#TgbR8`*0Q=&XR?k3j$dMqtS1A%&=6J6`wX5
zPP^%GGPRI5V<DV1BObR*Ict>?2p+#i3%9@oFl@CC_V@fLSDDXn=iYvX#Eiw75OfCV
z(nfUiB7qO5u92V=JZl1hLE&44kbpya`}xT;Y<N{iwVj>?!D0LurScU_pv_^}yM5|X
zGyn-0yO;x*wbP;qmjifMj_*BTfwF&llFwlp)>(efi!Z@a@vexK%)%nZn-EyQXBOu9
zeN3&2^+k&Z;5Wxh;Wr^{KJ-EVuYPkf+Qf~J!4Vt&#Ecd((HCG%YI;v>2SYGRm(g(1
z6!zq{rq(1CTw9#7vV8WU@ztvkCgyjf(<OPqJIO;P;mke9?jAJ+gmeyI`cN+BQM`$x
zOegPMs(bG`Am)38^Cbm@f2um66d3a6SU4xob=*jTASiwur5uJtM4D*GxM%{(Eg1Fu
z$Kh@irM*#P6+G9MlVAY7HiPrprw)VtXRt?k;b<}=!_rVe+*I<n1@J_Ae<#GZOi@L?
zDZtVH`up+#K!fbW#4lWi+mocYxcDtdG7>06ux8~7{)Q9)%(0<i!N&B(ryId!Do}aj
zj@1Fprrhwb^haVL@Wmrp$A=DnQq=LLYAPXNXpbCx7DkP7_TPu+By58iU*$J}hqoTM
zw-=_56MfjYYe`8v_*f92Cop-si6Grwh2I(T>~u2BQ8of8|9_1frr|^-kpykxkdbIq
zk~z&K7x)g>;MG2Tr^<0Evioj~qPE&5Z=B1GMy)ThjAy~Ym3%OF8P(bQjLj6Wx~GwV
zPOKq%?hasG4>mN#Fyyx*A(AsYd1>fvbiKDNw;7>q`vc<flf-?wqyvlZ!4a?prtveT
zwA_CB!<!?X3yGW7(h8m7`u>(_r~C|sen7plNXG|G@cAPG#uxA;QMc}1LM(-gF%~xu
zK#B~qVfewP6CCbRvA#<*o?c|aO@k&90Z`I3A40`p*)n4_3=y{HQCN_O5*9ba?shi_
z7SD|bUiLVn-NZ5hm893JV1>B-gK!gP2cOond56La{p8{Neh^>_yZM%0WP>QfeIK|T
zJ*GVGza9mjGkf^?Ib=gD10>;Y`Z$gcen(Z;)0RYSM@>yflN^vy?cYJwMzOwIls($b
zM29X82}EsA%cAnmhm_Q`LVwPl-R-rgchDNmxui{?@-5_I^brwc%Mn`u3+6z?01;8q
zsrKB|g`aunrA#R)Q1>d|7!3KS%|fj34)Pc|$E8+ei+E}51|4{^!Y;wrXhLH?Naacu
zGQosWS&>WUBxS6`gRJm8NQPtsaxQVETuQr8L~k6rKk7ieekP)%FLwpRn9`9h86|+~
zQ!^B~8bMvYEr@I31WJGp_!j9mp9od=?mOOlvZxQA<L&HsqLQzItiGPs8&%>OLUuPU
z`%Qnci;hKgI@8a9PRfvhT%J(Sgw;E(D9|keTS_f;<`c5_eh|1}?^L;GN?wHcc@E@#
z_@h*nLM^~k3s$4k%X4V}6&Z!Zd!J6?1O(szXl>yu{+!banKJ~4d?o*cr~Jx&B~Mh|
zpM7LohsFE)dhH})fKt@qo|?n7S)SsAg}&J{Oay;>y9U0`bibSkAs`Xj=d3%9x&qxt
z5im%Fz2$@bJP+u>)vyO-zC=WVwBFG`37QJ*Je~y>w_pj#=wxRk`$(J!xP832+{rkZ
zZZOZYq(2@f2|@M)(7*gh%d?1A1!yYwNuKuK7?BvvHMnS6eFH8YJ!ZXJ*v%)YLX+5`
zzKK|wU+#z=9hzmSw>^;JbPuFX(>>OIz5=h6sicX?^HH1^usByQwpEfM`YtPFcNp9!
z!{(p^9SIJM+&x=pqL5`jRBHP<UUZI2X8WlJXbAe7H$EZLL*ys@?M)9}?N#VG=h)PO
z4MbYr+X7BjPks+{x{ngXfaB@;KrIB>yki7j+nyt?)~lv6Fx-uzsaC+_Sn*8vuL=g^
zg+em=d|o<i7Z3Mj%fNK$_Ci1@lxrrUq0_W-HK=>pj#jxVdo<r)9yLaqg4|Ow-)!-o
zRM@ACy;13sGYC<Q|5J90OV4h#fugB@@#0_!uOg?M*sK~?8^U5b4xx@v@Gk2hP<2;|
zhKKf%^g=J_VvlcuL?vz24p68r9PQpImD|yg8&Xr*iNb9{V;Ib5M`)L!ySisxLkM>;
zZbZrFFam`yzl8pE)1TmQ6GWjAG)4WO?s$!XduBzF4^X_GY_GG{eiXV3l(UyU&4OoM
zB$bP9LUT02j3np}C(c;Lqv#B3;!_uaEC*geG=E#7Q!6*Y><7hbY+;p<x@j!FcS>R<
zbZ2DQht5;5{1bFWdD$&*<_ruw)#2HvI|!MoN^16-<yzNVor#+l6dk!YwG&DQ*~ebl
z96wiCZPe{kyK^!k8h6V6k;G5NQl0Hl^`TquP4}2FHoQzzNO}aUNgZsq_--mG0#nf%
zk(f1YSr3l<L)W1g!7F+3vc-&{sDkLE@w~pFGoWkAQUbFZL2Le%b0+preJ|`(P<riv
zng*#;)|fdBW_zLdW9bYiE;$}6k|*>0tTfsQB$VM$R{7Oc!Kq*Yj{%q1X+KhdK&Imt
zneTm|xEYUh!Z;u7Ao<`$f4*wX)W~EEczXK}6LF{{?pdIQC7s4vi`+O2UfIF-@d}H4
zi0e0jp)IPFvq2Q1Q5R#H??OgSWwBaP@hSKe-Ma0$DIq&)3z3W`X-7U*faXr4><e)L
z=~V=vdntAKs%M!J{p_~$`q!&7>lWk$Gov7nN?_#C^|b2#n~ae<U;H&(!j8%cw@m@Q
z6KA!4(?-%MX^;DEFA(B8uFwPbO(+<{+nt}rioLqSf9EM?g+y(@AMSaUP-Dol3Kkm5
z2MYq)`6<B}0usenh?mt(U3c#DPoS0%<7*2W-G}RSS|h)2S&j6CN*-4wUE)y%5j<i2
zHX#Df?X4d#{J-b9#KS!xsOkg|_;e!Dz;tHBbXEA?#D^*o536r0yFWhxD>E^suV$H4
zu!>wQiI)q@)fo8s0TZ;&@Uz=0JADjK>PONP&hBWt1TS6{G{So_7V&1`-S5pCh@kNl
zpD!fOL-3jBQ)&cvnJ~M?9NitG@pg>P`ejUkCP->JZiQNR#czOEKBZ^K(HRk&*LE%P
zMi*}hPJUH)mkHcUssz3ZZ+kUne8C*T7@`+BM&-ht%A;Cb(hILmp@_uYq(TN7Dt#AK
z2Y%p6Y|@nIF!PAmY+6La{jN1Zo3B((lat>~IzPm9pKA@6Q8ZQ-ABAh9N^21{dhEh*
zsB61dzUMe(GkBKX6%G5OmdK(6c^a(Y+C}+#ko;`<w7TCX{7{k^(T{xJ8}amGQa2Ds
zKR8e9@1Nh~*%W8_Yl5LU65J(KoePf@P`*~gJri{Aw-p3|u&^y^L-=jM#6{2Op2F5u
z-D1~KM6=tx)IJkY1C^|DhtiowyDAXr{J#J56DUWAMxcg3V~9#xdwmVj3%zjtN%3jV
zawytq+-dV%o1oocbQ7LcppO>WsEhd3Y`YU&s(^O8PI2*Y>-K1}{9<0htk`C@mC&<(
zxpMb4v+fP#R>aJm$v3M<bC}+11ETnZOEwL!?iohvXVRr`IlqN8ToY5bZ|1-rsIR70
zKxo$kT-()<jPL!}C7X<j`?Ifg^Gq234+Ik&93PX7`F_D&qon{JnWV19TmF-b68AM=
z#ct+TgNSna0Xg5Muw_Pf^^vHgFKmjnV23gh&fU!1Q?VW}0`y*e%M)4^{Z?-6ckV9^
z>U>VPAh`8*?ATjS^CWa`9@;?k>wm4!QcWh~4E04TdVEsF`-{Z0T^0ukDmN<6L>>3K
z8j&_p<V7~``rwGAlKhUELb?VKIL42#`((1|2Wz5dvUhjA!5l^Icc%IkA}va7K@UrC
zKu~hh!Fl_XbVc)WURo{h)Ja*e22RQN;awk%BfMu34|cR7-2$=izE~e-BOZJ=O$}mI
z<!y^uNGg!O18LVazfO-^Pgw#>>W`elBu^!StD!o3GxNJOaX1nweV<7S22hz-+KWxh
z3$Xf3&|@MP@^H)QsvZefC&kTgE@zh757DYoNsA!}dXC|Hv-e91ZvpK;#rd^Sz>g*U
z+biob9m=<&K}&yS05uvro*JoxPMpuHtr1^Xl(5H>$_(Ty9lMRsj-{jdeh%@TW%&tS
z<5d}$B-pWjA;$ww$>BZR9EerzVrdlWl$d{K#>f~xd#-VHs;%*R+Aq4>qowUj^zR4G
zeQsSSDzfejQHhRV&A%qSp}piw$kd#pU&N_ueodBTrtM2dnF42x81z)}lloTlagNEq
z9L~eEh0DOt$x}{@?xv%HKq=1WLt4kzPc7;@z5xBW6$!Z1TffPBA+13DoV`a_mxIDe
zRd)6c;=;Y{Yh6xR5as+%KVxNc2wuzIw)6JReIi>;@YA<HREYZmGW=o6*ne|K)-mMn
zAH!d{3E#gDbu6tzg3Jp^M$;+leMsjaf7Vtoc3O}(C4JJO98Yoj%A?IM5dmiyG3}B9
zjYgihg_AD@<x!SfS<>(HYg4b46c=OOSAw#1FX%B--%(kvKsqJ0<R6=T%HH3%EO9g>
z?&1E-!~Ls!>lrel%>EDVpK;7HsrHka#ikk*-@YEJo7<nMm9cW})Dp2p&Ym(^7c^H=
zQ@VD&DE8*c>bE6U-mz0%Z4V;U&YxAPHdYZdm|e`~sv-6~=4p6B7>R}4*k#G2Uho9H
z`$XC!d8v8h@CqHKiiPZzXIz$C$o)G0g`77J=Lv@XrjT%pHN8CbJ_P6I3636*gehl|
zP|zg*64Sk%$r=x$Tyar{y*y|OrI2UdS(D((#{7~J*cwqM6MqbPbk(#(a7pTz_F7%>
z=$kY1D9LbUI@~o;XyEKnhIGHAFrzg<B4xxz2U95mbp-PuFIwmfOC!nD&GH_$bIo%x
ztxsk?5|CSz<zR*_2JG&;gZ*$8_7Yu^DsljFd^wPQrIxi%3cvOIK4md&6d_R2Cyn-h
zRq4)k<sviyy&ygkA?WbQpgd%$J`%{YJ8YTTKMQ4}2lJL_;=7l#RqyQJHXFblVc4mj
zaWdwdf@EO$(H$hJ`9*^C4-?<JJ3gLW<2o@%)N>UGQ@`@$k!YL64sFADX=u>H2sk7j
zaK@ul8494K%Mq%}&H3FSjFJIG=CqYfpw&&SD$FWBdD!Vm>|MJy@43lV(P#Wlrg4uh
z$qe(x-H{I<X?<QGv-_PaC^6bq^&m54ojIdE^n0Sqpi+zD2to!+|1J-xnM3VtPTX$z
z?B?wwRNc+_F32ztw?{U`2z<~fWrW>At{4U;f?9`D0k(1;MB45;#ukSNJ_^qoeiYZD
zvB1E8yZsXW+b4bJj6q@}rxuR-G&UT~MtdvnYAFSz^@>fUC#8R8s*p)pXFq$6L?RGc
zrva+LmHk*1`Edg-EIm`xToYzIV(fEzQJ&=q;XAPec+8ju9IyNuF$DN_hXFne&=p==
zu0=O__*w)(2)OQ?N>4I?NA9eTz>UBTC^kh>^MBq`3h#M8|6~~X@c|M8(CG>oSVL0!
z!IA0J2Q(>reNiz%hjxf1)B`4-nRkl>LD8v60ACQf`C<o+ksZ!E?G(!=yQ5b6oe0MM
zg~%7kf}(c@^2c?d!HxX~{&GUJC^82vbpZqv+F}LV`h^Eu%r^Y;s`S=m3~I4I?-$#9
z@PjV;BD!oFi$fRAoX8tjG~Kg$bs2CqjkZVx!3cy%F$lZzl@tbOqD>X2N}jUES(gvl
zX7byiFxew=&IuKOd}N>Er$b=cVJe|U0$Ks1w_xrlEgT{GTC$8}U?qppmi@?M<58k7
zKGm5#!VvUuBSxX?*=LOcfEM2ta`V7M>hRg^>#Xqx^EykejxORVVZ&vCYCrc{GSAOL
zKexveoXNf5a(Qovu6z4qvXO;SCmtXR<utFt|L_k0089#(oyDHa2n3@km}EGFApeGA
z{sp^0`j^X0MmFAv{5KEdzafws2pYF>Zz>Bxp8Cb?mJmc;#JJ7}D-y?wwML*$*_{Un
zDsvXEa*wi(m-8b4w1;~)@DEmnoTzj(Fh;o9@T5-H4?ci7&Wm2&e}+`DqD;<m|0ZmJ
z#X&Z%7!3~D9o#hefQ?f8!A6=%52oy9yV7kzl~K;crWS!m&LQxP;d}KFki!P}rY;}(
z1c=8~2%9qM<7GDFy^Ij`c3pp=FZl6q`!58*vWMMD-tHgn!D;M_X>XYM?=i>ofSZ}Q
zJIsK5VgNj<5WwRXf`Gi>)w6$I2lg@-2lBDK(1b4Ks*U+ltMQ+6TIUMuR0FvHBuUAG
zppU?~NQBQkbEh}7Ko|o$NUwfvE;Fa45!#$IV?VT|FjiSCis=|0ysa78P)BuKo&_l*
zpk#vYYl^xySq`nsNIKK3C7k2C<ozj!TeKK2Sk)QB;4o7daS9Dg&%n1Svjc|c{D!Al
z5&4TO*&o!%KOR9t!ASTVsH!@0c*ypnB2`3;U<Wm}l(Qn|MhXV}xoD=N1Rkg-sgm2{
z27eKJVI~A@KNi7A7`FuA<ZRNjA0XbqA398C5xn7L4!y8$01Q|-RPo=an}s~=Owq`M
z1mp|eC4%ORS{pyVHU_tck|G~N9*v+PP>b_W;}$^XZzqs_FJcU1>dwX<{S(HCGs0@D
zTp$0#m{>hKKZyVsD+Xr0SvpzPok_N!x^5EV-;V758aRDxIiH#xwBaGzk2;Zo9>G`;
zP5dEBg!*=A!V1dPG5T7zMQ1{vYpOcu4CQ8Gh{rC&uUz4g`1p|>8#B?GejEASa4cr!
zZ;C`3``-!YI<PI-Q2Pse<9MkNC5#5XoCv^OD$a|V=aHGo$)epr{6QCj3q{5HyNI6w
z4Fbs59eMR8kNB=4H>b=)q$dgB<S#<B;Z?}ByMmy)A_T|-{L!nZZyDE^MgI^Wj*@6t
zjA4*$p9RvuQCB!Ym=t{x4vZ^kbz)Y$vZPkw8iD5k$cR~C$RsNCMb4!rB$W`w!cQTL
zL(PWHf*2m!Q0xfeiFYIjTarkxu3p|6&q6X-J(*O}D@Hj&tB-#_CKY*1&WS?g#~v+s
z%p_B}QEt-vi9w=bWfp-g)@8DYzX=Yead=+P?eP%54=MUR7nsl#Y#QFHPeqW0Y=twJ
z{_c{N!AsyqP)FS?=FVA(q~XonJI*9^LoHr}`wahqZ+M|{hS=}5=(!GGgYjOu2+Rov
zn2Ac7rMcCY8}09vt|e=-B`9xY$!Oe4JR0X>*Z0+K<s^hvk+!v~qhCRL<7_h;ugaqj
z(~n#Ls{{~d<V-r4>8D+gA^pz=5rpSz;QQXflOpyQSUwejeaiQ7{|mPZA9v-w`na@W
ztB0ver#J0-s*P%1h=~*?saC2~SeMsJIy1J&Eb39W8io%CC93W0sb5`ye{vF8bOpEx
z(m<j-hnLp_N;gnU)`n2bKYNY&%!5nQg&Q<qj>R0JF2J4rMRvmdFXI&&{)%s=EfNT=
zV4m<mh-_+9MFth)JX<B=q+Z^CVV2#Ymv4Gkx$l?pU7E41cwrrLF8Ay-K2H0O1YKOQ
z&Jlf6(=xNWw|25y1Q$0elbMUhmJO~I;5eEVaGo=Xt^teH@uf}Thb!7kTm{<eySq6d
zAw>)np)oPPzmtV33pbXFEH0ht@{bU`#;d4DsKAlC6N6FTtO;cnoL>*YBL}JRel8mX
z@Z$Zvy~BZ}+v}J@;(<ecSMa@QfkQr>jA8C`*ePPXW`rxMp^r9P>&X}#Q;vN(Lf?C2
z!bgdk^s=CSA^TF~FrtpTl`IHVd5R0-ke5@J;-bG_ESL{P=%12Yp@d+lWwn$HFs}h;
z373V~n2vpHqbt4Gc-`h>evL|;hMNwKWgK8ism=L7288t_3UtA%*w@$bNS5RcQ;$)X
z`U1i@HOYII)od-1<ha<t7;`b<pnT4dzqfQL#fkz}iErIBiVxY8eVF#6Mp4m{vsG{r
z=y_y1(&j5)yL0%>V>x;i&R*+p)8<<)5sS9yJ444UpSm=)>ivN=Q(*E&*Mvu0_^R=p
zhemPG4<QBK_O&tf$6*Zo7GUagxB({5g;#^`LH?W1rXd)q$!^gbUPXHP0-K4JVGyc0
zdXWye`KB(4qwFufrbD6@q2GKRskdgRF|WrX+&T3K2NMomU+ckHox<x!JS*MPlZ1!2
z&Hws`3Ko_0xSkWa01l7uzyHXEJJC7kN;BDi{nuz%N>YC{<O=&=ABr(TOp8HqDD!{)
zS3bC`XpbcPw^tICae}1)yg=r^{_736TlkJb2LFG5CAlN8Ym@9mFZukp0g>m!D(Ig8
zdcnUZ_~#wt@i3Ks+ef?qWetc}%wQFYga$7EmsiTxiNaKJdx^IHSmFP<5On~|Ti?uo
z`d?m&#oEDCjIt?K4`H<bbyQdmScL{N$jSYe$^8HQgMK;Ti%8r9dahy*P;i!cQ%IId
zj6iUCK~2zbCG_ekTZWRftU<sT@;xu?Tmfk2Rs1>Y5CbG12)^drU`e)Zf39&+(Z{`i
zzO*u86YxIVL{`Linsj}>|627PZivQu60z?HRzxrDQT(fsc|Yaiub-(a^gxo-JlHTj
zNbdu_J+O&XMIkq^G5~G8-$7g9vA0lpqqn#D@Zf#L&(}dc0G&`Vy}tncgGqa<JzYzH
z8c)z4R#I)`I`Uu+NG>`sgA!M|k!Wl*CboUGN9$T9be-j#{*I)0JQoU{C^6+iV#y9V
zplfV=pt|-J;BmHLhx6>qkV4Tx+EQh_bb2=T<(0p<c1+0$WpVaeO2xt$W0!9VdyO)?
z{vOaC1b^pThgfPa4!2PYJp2^K-6hvrt!%x#QnJj`A!3*InjY?6l_N*mIB5fi%GIPY
zj0&{#Z)`Nl>|d9LInN>X9fH@pqZ_J2P<U1J3#R9alob2f#Zsl^0??4TzrTE7vbQF4
zP=vI|`ur+BH`(Wx%|^EAp7LktfW2IiU|$;dYfA9p$+BeVbUin=zc{uU0?l4O&jp;i
zd*+)@`bbcJik$yy#~2huQw`ahT-v({nMS0AxPE>m?bwB{^S|>%$az^YU}-wc*I=<f
z(9$03&9kPZG!H)~N?ypozyEY*cKq@ZRDG*S?E~j|`p9vHST`wwYZ8GJwyoRuY?K#=
zAWzX?*Bo$Mh^D#&7CJFz)C+0o6qIB=BTwk$1UpMKqt9y(0OXlTCY*FcDRkJX9N*6=
zcuy(>vwe9yi>CVhq#9vpL$_|Ok-o@YmHV2L#CUA%^qYjctw_(*r%gXv1vYZ=TW&Jh
zmb~)6X*pvKc@+2e{O>!`q@YU5bhLa`u9aG^ugqJ5h5n^!_Hu$>dp`c(2Xp0ytouvX
zLIgBn38nDu!>l~I4K-y1a+r}dzKkc7)9huZ@ltQgeg~Q_*?pu>c_IF_X?ke<KxW6e
zG|Fk*B|%@8AY!B0BK6b6^1JlsfTahzBF)sU4<(nUC31}0L{m4z>q0S{97MEq?BSlJ
zeI~iAa>ciIC`?DR_|CuO^1V{@)8r|^cC^#clfvwqK*5^4zg#u&1Nv*~sc*KD3OIL&
z&wPj$oV$5ZWi%S<s0zQ#Aqb{X=+%*-vq-n+k4O6N3-(rd&M^fL%p_C|+O=##nl#Qe
z*DG|-rQ+5QCzflbcVg#CZsYGS2~$l|03XZ1HrxrhFu7^-@rA%1ANS}T0kG?)nJk&~
zQJj)PC4g33!-CQ)?M@)?@??h@Y8i_pjbwoK_5EJC$^Ea5WQ{*4Z-jfc^aDxE3G#uS
z>S#Ysu>ycoW)cOYpDhUgbj^P}lQ8cE<=5Z0Dc1uH*%$MmaW3@tNPc&)J0sJSNV!mi
zf1NTVd|uJ<8ktJRVgC&x3#EPA5-(^7;*Q5he0TO$vH01uLF>Oj={$S2)iR`|LKNpx
z9fZ`L#kwv-H6}mTh9w_ItR0~|WZvdhV%IGq>WuG)VH3nn{mj2AwAvCDUW7u!{Nwvj
z=spb^-ci=~p@YL^!Rs5=lH{DylW8+4`e}@cFRn+p>R-io3)fwB7|g_!s(ILKTGf_Y
z-M5c194u3+g?<rL!fl&R7~aPic#jSATOjfOp($I@0o#=)dbI{Yl#UV))C+b?&H~(Y
z6rY%K{H%&No>XPV3-M(nMVUuOS675@O}P*xK-qlG_n3YgxvFQu6Gs8?lEm13d5MoG
z3(2ETP4AYmguYDSVrnM!P?Y`j^0aS3+Q;MLQ21ER5{HSLOqw=;gr=aP)?$c?KskF=
zE~fP&G^Q9(`BqO)95AR-f&*Au*Uf=@2(-)XOwSKTR}@0e0@UJW?SZ%3u|0vZ6$Vf9
zkI!gs#-~*j<q1}!Q-g|?eh5}m+Lb6?lXo_@`SDc&9qxiUC-LZf{QmpeJ$tumCl9;9
zseRxB`L`(<G7Fi>x-qsSuRjVlwEfGQ%GLNVTtBqQe@A18vmGjjSLvFt{xh6|8*5z~
zqv|mq_i^c|&I?r1(Kr!bO8uegGIp|izC_X|FFA%_&R8N1w*5LO*`FFNuqJeAwJ&XR
zY<huDJ)zzfMKa!^<Pq*KbRe8oq8r3$(L*IeUA+LYcoNch$0@IBRoSFM6R(PW2a0hX
zS%z=U(ipOrS<hXYYvl_j8UI#)rJG5W$)E_q=^C9{Hm?|wNfIx#NT<_TWTtnaM&$?@
zsfWjwqHH%OsGPS=)D2)_5%=wX6H$@FauEqLGfSI!k%?Ug8o0xzunNhzF=!$X;Njma
z!guR3zZMWOXzIb)A@t??T{0=@HoFz3>YtZSK<THzTW(46SJt;a8$*7~y}ITiluwVp
zOk=Wj`f@zL7}x}Izo5>CyZwFS*}L439+S6KL#gz6L=Tjy=u~z3)FxIr=f`shVD&<(
zi3640Et($`DCAnlm`YjLTt;16dH~(z9&KLBYPz+3O@dR-xYDSQf0?PL=&$KFs&Sg+
zx78~`zM0y<e-xbhdXAegCjUu|iit&_LqR%R`yX>V|AuQ29xy-{J=EKX(<IEt(zuHV
zp_yGmH-=KzMZAh#o!I}mQ@nsO5lNxgGvdK=$#Hw$He26?oL$1506R%?3N^uGwa$i>
zN6i5v@+w^kH_HwSA+c5vxrd!|wuF3B1cHH>@D7sZj=#2C2u(@j&UVwO9KfqmURb<Y
zKcQz^Ft0e;&7~rZb`!?zfC5T0ls^eTdcSGvSP*R<=(3b)>+(wYfJn=u4YRpMleR~+
zs{%Dn(aQL4IMI(iHF@hqk>86y(pIW3IDdA@Tu!*vKm9k84k`%1WlXjWg5m+V4<DhG
z@T!F5EK;NKq^P6<b1iSoLl29S<!i3YhNl8UH@o6?l&)PqdDpgP!Q?LF`|axY+Gc&Z
zxV|l`@LovKay)1Eq{^^!QvVLLsD0N}r0EyXRrF;)ye{PEL(w3{x^w+Oxjz;bU-bj8
zLL^4TC@8!9t~9>eG}Ar1vzbj!P&zwVJtMR<Dtx)JN7Ej4Eg*6TPlxy??YzD1>es>L
zQ_hTie|yu|WpF#a^5j-uz_2Z}Q=puRRXzZ7?qTaf^0Z1ob$rT|OeQx@<NN$zwK`?A
zq+M7pak_-weD5N7k=Uh$jf&xN9_!8mi}895Zl-KABkELL1WRnWZb_rDn!Dv5&<7s%
zN9+!@JtC%RFrQBPTH^k~3=+d)o|3nD7t;sWs-+fp5?UDMzT$V*y!b|&j8q`HL=WBC
z&Rvgfm*P7|8x&kj<NUBlfJQqT3{Ul~^o&>KPu?X%jgZiM*QvkS&vhO}cYW`uwJ)CD
z;q>7DooYNhS%9LfmE)Qvk&|^3vBU<FPam&ag`X6)ESnRbc^N?$8ybU4_;!AAt^l9H
zY74s3#+@|bIrmMO<!X&8nNWE^JI;nsM%jL-o<h9#B`RsMr6w0sY+EoeRmw#ZC+T{3
zbZ^14t#q$OUJ|SKw0ixDNI%W|tsrUAcZ`^75GVpq#q-BRH2A(Us^-ad872NDtD_Cj
zqgvGU(@AR#)z#+EK_CbXwb+&N`)bbiS`QknHwX_8FP+r0wfY8CKEj@PnS}a?uXXc0
z+&XtAN4&m%yesqO=#<ac<-CuFSzyFs0Je*ELu!r?gix68OFJqFW3<2_@$@lf<F$$r
z%O#JuO4yI>M?H{8-ZunE8hqziXyKidfTXTteGKEHFjQD|BwU7`%?m9;fJd1nk7n5u
zYsQU#hKy6BEB~zinRCq@Nz#&A!m}3kP}@xGg36=xBv${{kJ#0<C5MZO*JLU;Umevu
z-)6eM8nZ%A`7+pWVf0Z~<+;0O<#<|NL{?G+t-m=p6{BPjPFuN`<T7ZZ)Z(`nbz}tq
zOp0l_RW_=ifwpHfybV8aswJFib~O&|qQ6NG$mg^{_;vvXX-8q_${rywA{i$>fJ8an
z5jJ*>vk&HastlfzUY_Lm!n`nQf^;8syfK<-?I%>IPCu~)F6C5uL|veMINFU@FSi|N
zHT(`ow9A_iQ>JdeApyO(+iNPVKF9mb3xEQN;DL)D;5kZq?W0Urcr7EIo?cM!d*U*p
zn4)IX&Q`B8bN~`P%`MMMtezfu3yn0dIG=GCd5%g_eRFGO59#a__w9k&w8h(E#;Qn^
zaRId0NLr6AYU8h_N^NY+CLUK<qucbRQfvI@1O6N06=r7<{1(H4J$kkTpQ_M4AgQ&o
zctIh+elUV6c5O1zMtJ=m@PNvp0-Dz7+ENjyLA+cad?sn?cM+loeK%g>ImJ#fXK#I#
zyfPq02ysaVE$kKAuiE7;iU=ENd8jG0@zm2+KBo)~Gf{I%`CxPaSi)yU2w9$PmM&YD
z25t0^kQ&wp91SbxY7%m%a-Ln}XnJiKDN<P6=L5wUa&^s1iD&wwJ8xNaR@k));x+xG
zJVvfOBnBRlGmibFQ%z5iyF+q$K$ZE|<DqdoigD>3lk~VRx13G*`#K@)x-$|~jX9rw
z^;OFYeMFtb{eATJya~&}OeO2SHw>$L<ldbrj5mt96YN?B7h+0A@Ez8Fe%0|Fi>-4t
zVckNmS7bjBrOATNNx~DxhWw;Yk?*Pejcos`{aP;Hy>hU|Ykn9P19!R@X|FFAIfwL2
z+%4}ocB*qVntf*TjlHeWpl;aWvOePR)tRI0Oi<ObfXBPRPT}Y0>9N9=cApLA<o!&h
zrd7};g&zoX7wsYoFD$?F1QygSe>q%<g(0UaxSopi$2zWbMtQvG80M9?Or4#$*z}o!
zA?AhhX3Ha*dz&hQbc_>xn;^#9rs3fElysb9#1NN&GP!>#?AFZX7Zi0xc3AsA?w%r8
zk1CN~PD=zf&=e2?IoH>C#Kh;D9j|+ajjI`vogqH2xGd>TXlO(xK6{d5rd6_k!tX@k
zHBQ1nf$&ib%_Ke2Qc)&hZ|WV#mW4^5Lq^OF0Hs!8<Kb0B9qOlt!qu(1gC(uHl<A38
zgHN1gyWqZR;oMU3Zc5sdl7*M{QItd&lViAUbl+gxig}TuklI{Z<cNvNwJNAKi%To8
zo4pxF9U5-(sB1{@ZkXq@B5xVbq2k6eXT!>C9x|;T$ekV%6J7OWi}z9_>;4();JmCN
zSS@mPdD=_AZSxCgh*_$W_9>a*+;OFooqS!+y%%+t&O^$k8HytgJ>D+PFIPy?4$j>A
z4{$=ymg0X?xw9<${)+Z&3r`QQ*HRF&2S8z_n>%gQpj#HnbTOxN*hwWF|Cuc$ueo<>
zoN9a=lDdj8+047+goluRc1|hJ&aM?_q|EmELhT%!c&%R}&RM|n%(Cmx&4Rk4&+W9E
ze-b<}m^1RQtxhWbokscFA^#09rRamTjOXa|{s)Hm7asF}^Q(?VvZHZttT8zfF~BU$
z`8E75IncY$5H)pJiH5vw6%rm(o-4$EMo?TQ+HaJ)el!i?7V3Dyk&p*jwvhxSrYwQm
z!Ph?2d`dAwGP#EXRrH1ZM6J!H@MqY4XyJ4JPkYxM4rTiH@pB4cvn4d>AVN#olpM=i
zhpeoW<a~;ll(U)1c~R@v$_&CXWECRHB&Si@X*W!viOq4aOO9(MGYkpu_g30{-?x9?
z>w2%>wf=Z6*YzCl=eg&;zxU_+`Fy?;5aD!B(wVR%n*>u4|GBWLQ2uF(eP8<K_0!aj
zL)&pSMMvTGkU2OZfXI{hp(s?TGy`@2Qy|8)4bB5+y~5i8-gCRADQ7t89~UhaAo;zH
zr2*yp{cYE-vcAcIj8EmnNfY5u?UerK)DPd_KlL^Fg2^EtpNx;?S4ZqW)dT)h+x`Ed
ztyW@)tzq>LvF+mUu;MJcuK5a4<Nx+_Kt>Z}p>kwU!_iX!mt7Jhb_Gb|kaT6{>i-Xv
zyvQ2qr#-HSjwF7Lg>dXDVCh|=MEcsi{MW|Yp^5u5MM!j{M;;53&j2{8`s>t?zm1(2
zErt?hU2Orh%sNx;f6}$W1}I&Kl0EY0AD}(!bZweR^mWTKd2jw<kBI#82iiVP5z=g%
zA@JZlDeD?}<1l~~U6s*u8mw72bsDG+ZJjPOhGDA?G9wfCT+1c&KP$Oo0-~KINx|7Q
zBa(tm5Hx4_rG22ez`JO2ii66QZpiHta?7R&O=f29-17$@uBY<!BTiNQ@Ws?`cTT`~
zeRbwFB(6G-D3{8Tqcj9?4}|+53HSpHy20rde*ySI)lI-NDhK=0vH}V4=E}{{_K1Hq
zYS~H^ob+BOFIUM@MT|#OZ-Hpz0eZ^L(*^eMh~evxh<Goahu^a-*2dJ5ACV<sC*Wh9
z5aa@bXyy(`Vku7uQ2<R2-vcJ5R)6Nkr7Z5HD^ts};Dse@>Z`I0vWBviQMVN{U78C1
zIlF04*E@0&gb-c8uj>LUuF7yG&bRu_uL|a*(I}hNS0L$0P>e-*Ib8c2re5`1_Uj)n
zsMkHzly$xshg6WQ^;}3!$#k{==3D%b*T^bWS{^s|PuQrdHs!p5D8Cao=n9rNGVMCL
zw;md;z6&`3E>0@vJ(DrvwrKg$1=w*`q>+g9MHl6nG243vGR;WjBPcB@7>p8XD9E!7
zi36{cF|O8`huY1d3YV9DW$+jFBPenThzD_3r<b5)c}_&{NDC?C@afF!^YHPc7*r^?
zSO+M&3acB@nA?yYppA$yhw&upLaSk@^d6S;A{+4dk&&FBG3W3epL=6ZMi*fj`3xcE
zLZe2nv%EYjzrB_w7SRvBtR7^g$e}o*Zw$<Vb;x$G2K;*x$Flb$=5`$rgtgzuBma|)
ziUzeq2Biuu$&KS=OQE?SP$_KA)d+)<z1_L@Wl2)My|ZaKs?ZT<u;i44!an8CZHKog
ztT$B$`l5l~+2JD;d%2$mZR;aW&J-j_s>@x&0aQvmF~@GFayNCS75tzX6sNSIJKJo=
zK|OUoXh5eeS6;BoMsGBoI3@*nufbF7R~+o%&cG!yVP;?PZ`deUX?}-utG(#)VhX+k
zN9YK{&q!r!F=2GnWenQ$ED!MD;i$r~toF546IpP>^M}E&fV*_=m$7uhdB&TN-d}W;
zjg0~EA@Ul36@f=L5!`yTEXMysdfqmGUru;8ZRE6cb5*QF$^kmKgWiHpWvoKE(Yk}?
zv-(D9^;=ZN(~37^2VSo(hZf@{G+Z8F9koB7zxPz}M7?Sz9=xyOIiVL0%>i;R(pO(Y
zu7~w=Jg;Hznfq$p2a%7&NSe8D-`fiV?&Br>$YOKjd`r}*{CpMoRow=0!<pr2Ut{rb
zI@<<!oC)@nvbz<&^t;vzX^{jT-Eg*fr$4x?5APc|b{rGjyd9U%s-OBoW#aeU>4(6O
zEWv{YrFzBNUKomH%q(spyQN%xwAAoOy!@A0P}f*ci_(7rcF1rLrsPEZ(>(aHdGQ=X
z(q(r8nN<8h^t%N%r{jicSN;mOm`%x=n|P_Zm2nAv@FQ}K0ABk;t2a)Xwt^C7nr+Y{
zJ-D*GR0f`odzQ$-)|2*0dnw3(;*yqq)p%1L(c^yA8XwkE=(5+}#$DSB{FwV<1Un3{
zj`c7F^oE<<=$DUgX<sF)=o+j*3h28f^Q-ckXAUZWm;oPY(0k09G0v+>eCneXQ0fEZ
zCJ%fINs{k%-Yi%Nj?L||mY?17=Xo`nPgub|YNn*;XpzI2JF=KBB{*89PNXe{FU*+I
z-pdu#e$aJuh{rBiEK7fOsi#pXEOvjt7A+OZPmfYLT*Af>1R-R-1_vSWRC<S3$TyBt
zvDip!U2ioG$U0<hrEPzuDY~#r*;r;;*A@GtVuVqbCeEy&PC(+u`3mFrmLO<ov(s`H
z=>YXlWxR44KdM$wOJ+iLq;I+n`AiP4_}w+%G|?g-|Hme5iqp)a)fEr`==T&(sFZAu
zlFD^9eyj3WlVe*!Ae(8CNXq$oU!0pOuMs*oSq(9)+;C$hfc;PUEXdZ1kS}mky+!#~
z+Wu`0t!Ez*Y<W2;hCE0nZLHLy<vriQ?;J9SLgnc;rzvKthdl0}65_{X&AVW~8EPw*
zSE&R^F=S(};+17T{Cs0g{DqLnMSJT{$r-ydIY{L2vWN9pAJP1mUyunXr534KTGO*Q
zmalZ{1%<Mk=_W~q(b}WeQ)+O2-vs|w7j8!<`|uc<5XimgVmvk6Jlwbnr8jmlkKqM#
zg2x@hWnT7>A^|m()yp<`lIu{8oeeZ3wQ+MhX(XDZCQ+b*_*r(-&$)fgL%ur%M5NVD
z6xg%$1tNTo5<wa%CKxF~UZKaXu}0P={(@)$eBr;sfEFuq5b5{R6%`#X>KJ0}{)8pc
zFyUpWiRJm~u2V-2<05jXI^Qs5R9Pt$*?~UI$cUWjmyORm0etaQl6(N@R?GoNXSzG_
z?xLx2uZ36mc(!K(?_PHMtWB*Q9UOz5!hYVMI-){~7|M>CT`|OH()C^2rC@5LGGx32
zYfb|f_U#JqkrF{4$IHWAcOyc5^^{&P+>PhlWEQQA<b|4J!tC#AwKz-mvYKD&aD-V~
zb0I+tVo|4y74LSkjNd($ik6Pf+&JK+<F_-|8krE5fp9>i?_EOdlmOUDUwK|g?XwZ#
zrI$ORAn3NB*ik%Ep|6KP1xfHauG~GCK~~<bO0)mdOvEo3zbQfcIdlw+;tC8)v}K0Z
zGgrWfG9t(W0qsR3!h|s~LCerGz6VqBs@#!8a`k_W37lV=_Z}~k+>v<hCj}zQ&(T_b
zPeL8a^96(-^ghp2QKLDh>}7bp7zu(UoRH<sgN4+=wxYBf%~e$B{li9bf!hucd4nzx
zuzBPt8sfjx5t(v7TkBS#VS0NM6LGjBi`3IF>VlhRdndJTu@~3hw&n6vlzFOtvc|v5
zw-06W8M3)<lIwFQOr&!%m)k;bC+HqwX<|yg#aD^lOzuviR8P7e@1O?&(^+bNQG#15
zH=e{j+)>2Uwz-)Q_*k<tydqgbt}v9(B8o*A6px@|A7J#uC5-5cJHP(7D1N>r?4|<~
zjH9>S>UvCd7e2Xk3DaZ;j?hWHK$*;l=KV<IjBLY<i{n#!D8CzVp6OWOnU<9`V3oXI
zlO&m5iUO6|c3}l<!OBGb%AVXex>Q_-0>jj%wOo}YTA{2nZY8DXSVTR0b^tk_PFSjR
zLM`NN+ek57Z_HYCR^JJgNSHO$C$6fFQhc+zXjvK2G49t+&1~1<<;aDHHQ8Ef9ot8A
z^j?MBw$5~qs_OXm8mmX9+J_wQ7Lzv^kHbr2q8I6Iw~bd9+i$p(a<`cm^azBr^e4`l
zr#75@CCW2bc7fGC<79ri@On47Xk)S}Qu7u$URN~E_ZH&%K(m(~J(%4Kp#m)wf6Bi4
z#n%dn`Nz(qO43LxXqja_(@2xlS2J7+sMQKIJ>DrTI53hWSmC|kt4Dufu5<^LG~2aM
z1B`s3BaL=r6U-#5<4jw1N5`j2b5q6KhS#lD8gUP#j24uw%Y2(onk(Xpa)1H2b}^G1
zfwjZJT_81P-%YRUyz5?<U-Z&YiN=YB>OUHWb!H2Cfu;hf!Nwbgdic|7?@yaHZRQ`C
zvY&`BR2PV-=oDQvQBD*f&u!h+us)RW36RCc3#^%Ji{;`l?OeO0)2$sXMZ+cgLv;@8
zl;@TyrE`+JTBV&ZY>kd=u)%DT&^W;RGN3L=vU_ts0jO(N#+vJhzS*IsNzpL<f9;cB
z7sxo1R5VG(3*pMcldTKVLbV8w(qsB-gM@0*Ecc+`r8{6;k)XFcQy(AlOcJv|SMUs&
zQROjMLeE;*zJ#>;edoN-_JI<0#5>a&QzD*plx|(Sf2n-riV*)G*hsWM0DXzBy9i5~
zRKfAM;??in|5_vX<;F@-(mgUzVJZ--rw}dGt@gfUO}U>gw)Rst^=yb>Rp|D3lkAFN
zt~^~L5tFgK7-;quNARy(Iv7P}_|=voaq%shqDeu4_kP)O8<^mz2*sTe2Y1GtORp9!
z!KZ|>V9~DkCao%yatS1LlQ*xsiX}0vqFS~6<kYvhK)LPBU2q$B9+GcxPkn#uy$l9|
z+$WHvNQv-3Y0b5m7i~Ii-NCuTD(R2b9j2Fk)b@#g*qN>h%O3+(JH&upKip)q03+pF
zDN9LavTEZy@3a=<5tn3R$a)`pc&ze!r(m)7N0tKp>UzEUO_TNW;0m=nm1$>v7O?$=
zgXc>SDpYGa{oYZ6kN*=Su+CFMV2a~__$80cd>-^@R8sAbI9cdvH{yq)#|IWX8(8p&
z#{IE1?^XS8As*yZsFSXhBXR4}M-cZ7obkI$H&TdyJY)U8SQ*%vTOz!F{qpNO)($qI
z!qO;GcK&Y>(qFG-=sQTpsosvW{$CeG=SpjVhK`xo_}{L#{sf26)PBWZS>}^_KlQmj
z-opklr3>Yo1wYXkK7oIly7r3y`KllK%l};mX2;B^m^GArW)Q~*f0kx8UsW7+kN+<(
C<j{fu

literal 0
HcmV?d00001

diff --git a/docs/source/assets/kernel/logits_vec.png b/docs/source/assets/kernel/logits_vec.png
new file mode 100644
index 0000000000000000000000000000000000000000..373eea45c23ad86f392f2cb60c545c50afabea1d
GIT binary patch
literal 17475
zcmeIZWmH^A)GmxAxVvl6V8OkC00DvpcMA}-an}$+u+X?eaEBnlHMkQrIKf>S_j_RG
zotbsNwZ3(K+@H7C(K=PTPVL&Y>)B784pCN=K}R7$fq{WRmwog49SjT{6u7QIdJY^L
zhb$>!V9>-YB_)+*B_*kp9l_?7HfAs|Z$jcU5w%qN2-0<<A&^mt1RPJ?99)nb6083y
z&2J=Ge`OOi1bi=THT!)NSkeaOtlzK|-@c?tRTvw-WMtemX)kM%h1I`^I~=+gN_)6>
z+7PoCPOHDbfZ1WFH*ugSf(5k{DC1BPbP}Z{cT>5*<3V8Y>tHn5%0Yvn5fO0T(Q5qI
z5AEPyPZ>DHTHaMXv=S(^eDg<y!9jI+-s+!#c}xv+B$uxB4hcr3TlXy+-T+4lNEZ(N
zlY1tb1(SU)ng!#LXuX1oFzE^bj9P(S867Ij4;F|8-f`u(&pMQy!!#|*2>ycbCJ(o_
zA>LocscV9GO}?024~A1ZSKj9(apv@`o1+_d>8(k<(9)tBURAsk8$WK9_f2;m4dyAC
zu#t6dvKi(8*E%vt_jBN{7^p})qjl<F`C&oV&#|&&Z7kiRe^T(Q;D0wB_GjxH_O`R9
z>y*X{4uxiA$9hZWam2!N>QQgu8B;T0oA`VoL_v|jiRDdsfx8;lGTQJKIVE>(#OZ}&
z>nlUdoO;=px%ay^urh^_h4>Z>d7!rezboG)lA%|SbSgfpM~;$9iRzl!+Y%=Y;Wi*>
zebIRgr=kUspk(Qf{nX8}FMopid^5~;n7}bB?h<6|YEnldriB{G5=lAytdRz*BKo-t
z!n5q`=bJwX5rP>vzPg~kmPM9J!(93K&Am%2m1a0heRg%@V-p847^A{VT;-$V%XB>?
z!W;?85j?_fdghhRFsC}~ict3>Gb)tt;v7aWOeMrdG`+AtYZk8!-1E1PpXpJ-YJ*_N
z;s%$$;CVzM8Q_9AFL7yL8Hq>;=??MzX?p$0P4G2kAWVm>(UHB+l&FUhqo^<!<=3-c
zOD;yY<Mtvw{7FM)!R$^!EG~IiItko!`}{14@dYjUX|FP6HQi^dz>q(k%30)gUv5@g
z-0_;8|J(`XayV@F@UCSLeo>7}fo+E+h+NBnv?=Es>WH`X={mDt@BM8e>O6^+=dySm
zrCM-*9h)5znS<L_a1bJBgaTs5vKns|bR2WOWPa2!t~?u+*2kzFN%p4Z&LbzeIk^WC
z`f^%qLk8QrTe)RMQbg>=;sN3#YD4Q+(@9{0SmE@WX%Eg7tr!ul1lhVxr(*S$aCJj^
zujcf(T6YiOdS#hQOus-SjrcyeO_EKDP4>_hiFUjhA(HP5q4{#3v}c0Q2ySVRh{&)<
zEcrVoXC}#EW-g?TuRU=Y9_*+ZN|)+A{9u-+xj8uOPlX77A+-6j*WE>WZ`vdsjc1~?
zUC{~`=DcbDQ;7=O@?zqTA{PvO8{F@oS9Neo%cyYTNMB$$sjzcC9&=pV)D6EddX<Q=
z;g6&IJU(F3NaQy{&Brt&On#W1c20Yg)Mp88`1bJgLB;u~O)x&o<d@-8NXVJg&Yduv
zQYBc3o%qHy_ivDJf;wpn-v~wreZgXSMV=HKOU(i~>m|q`nTaG5SEMF{xUTU+v9`qx
zXfu@2a<gU&IISYSf|3|fBVObFa`-?Xh|DUznJqv1;{$#JX{n@AX2K}x2Zn37{T`US
z&}|d*MB47o-&$z#pUq5gZ0I#Y<4o1HSf`LOPIyXice_MaUZ;NLvX`m**l|pk8m#ow
zVkPYO>{77_1KeKz2lH6;mNY<`gN7f=8;R`a2MGe>SDBxb6@M%8zvYL&ct`k$JnGHF
zi&?DZWrMdbHn8%s332(bE^w4^=0YMv?&xs({MSHG+Ac_4j>J1;OL9xT!Vu@U390LE
zSw$vAD@8a((RnoQl8WDzRm!h_>Q|4mV9w|xdCyXbDe!raLFoni3otETG<!^AZw6B`
z9%(4$i>PSHBuQ=g!rau%`RpbYb#)%iGmQvU{GwqE{o)3tv!)%%$w)QUB9S-+a{0J-
z3CeEDO7Fm`OQRpSt#znaBQ)OVN)+d%<%xXrQO+(FD0G;(w5pn5p4gcvD{59_DAp~k
zn&>GeEKVq<RBJ6+EEb>oK4oNW&tq<-_JQoz*67xBj&;L_?hhN*Y*X{O9^zHHVL4`{
z_*q@k$>l%PcdMsN1*L1hRc#Aj3t#hI6MH7`s?15JWT$28=QYbTDl{s|$oJ?oXj_CT
z1`Ru;DeAwS)=9`t$!}D<@IO#kQ1a~dd?^yJNYkv-T<m#x7QBtPgL*1|cCoWG`tZJu
z1BtVjN6StbY`=uXSIrmBm1<M>>nPm#kYnGtrNg~LE|{FiwFil(k}s4m*}QtPV6tH8
zcPIa#mQB;}Wwk?jNSW+6*{^-fF}U<1dCsbSs+rr&<MiX3XynhsqI$FjJ{X@S5hn$u
z$PU#c{z&rSt>sjDm0{g7ecSS^>!W3qWkrA1OxDQQgk$Ww^8m?!kj8tKJoTDMl42)~
z!&2K4)e=IjX<fIutvP#}P+KdTgPAcKcpHWpjTwVl+F8zh$9>-4`V}(!B70T)EPL!i
z-!RfKB!bZ~_B)U}nuEhWYoZ_8=LlAkg^^j4Q3`GgN(l_P6uEf{{B(Rh`(>SWIO1|+
z?@#Jx)6i8}SiJnF0lft2gz2*LGT~{I>0LYSTIE`8J9NA9dGh&Z2c!#Nk5Lh6kC#Uu
z&M$Yi*O`t>&i0NgPvtI(PIru~xJ;A6k~W)>7sVH=nj$^?PLEHSP6d`v63Vl|Wru7_
z2T{DV&uD8Sy!R{)dd}unrng6qlrEO`mWYiA+RzJ7-0?nO_hQadT!x$V^c(M+H={XX
zL?e)6c#fPKj~Hd3b${&mC?7Z{Q7sWIF%>L~#Dh76Qi{BfdV-Wf;QA^Ri-NdCxGB(d
z@Q*{0fDg6-HW!T!O<@#r)GjTh)QpU57Kuz|HjQ*CnG5?Yvksm<CA<kgCyVJp&uph+
zs0fKncvwX77cY`S95Io5Jvb3#iJxVk+(N`d<Z%z_XXs^Pl4B-gdgwJ7T9uF$wWG!a
zJUh3Dv)OY=@=#Ps$KF`>S<a1L_k0T17Rj?N>G>U9!Pherp7QG8r+de-$7-aKaz<O4
zJx3ZOJ-UVE4wdh-ZrA?E@_RM&)8pSwJWYhRpnGQD$Tp?#5=Sq`*`1`!-Qo?W`b*OH
z{P&=H2GqKB2A9kX!WOE-H}i?}<n{0#Odeh*>d^6%$S>8e>$BoR=yCJk+7JqTk?%~4
z@un|LpI``ywP6RLbmMd5S#?leX%v>85^i_UJY<DtVaCKTP^M-HNO9-76x~fosb=5@
z6Fl&uXLP4b2`|`Or*30fd^fjA3K<+soDm){nYz{1uj(*AOqEQE5!$?{A9m6l^buk=
zGg$Rry{g(2yMM8lkc{&KM^?56Zs}PH!3t^+>Ks8mt4{5fE>r8NNs^Pc$4o+3Qx~D1
z%azN1IlTO<glg6^y#@zjholcPL+h{uZe^o-DorcL{VFWG8k`z&WuA?mKX*{q57twr
zl&9WIJC{Z1%zYnrT&Q$0oU$lyRl!!Ps2wYCR9U}4zeU&Ndu~^<@V=?gW;}oB=TL3U
zC)>i}$};99xp|%Hv+bHS+f857dyVD=d#{=Ea-r8kf^OZ1HihR!Ws7Fj)|Tixw58iK
zBCZ#4BJExzR^-;Tdy@;gHJE!b2VN~=J`Y3iQHaTyuP~=5Dh1U%5`F8cF$z<3rZqG4
zmu4WdqrP-2Z=ABhtADZ;W!%WPgftxVHZR9;V{uQE(75sW&NA>a_C(@bb{tMVZ8YY~
zWV9^eOpm$JLkF*_GA)!cOw+6d58dv6Kg&RsVSB|sWe9Cjo8I~5dFd9pgjwrRGpT>x
zFmK;=BWHTkdycCKUzXlL;Jv@+GJGb_mj$j~dfVVpY-%I3$$DRUFy~QgxWLqKa&@rh
z^~o#ug6AMXjKfp;EO>#^OGrd0QVh-a_ci06Ukg1=J?!at!dGKF4@~=(S6t&3lM7`F
zJ)RO?Wx|vvO}jytU7L&1VmuEe-j+AeQNsYgALkJVyPhvRc#b@eNdx(W78_b^4<h&d
zj@TX+HZO81(M0!ddvDzjRQvNSG8p`*eY9^T53E;fd%+jap@`2r^^i61S^V2W&CTJC
zJP~24)nK-cyrqe=`5*dFv-qp}u`iXh9He6jm0+y?Jb3A`-!^W;7%lO--+sK_F@^n-
z3k%soSLUnD6m-{!$zU-HLK=ZhF>N(pv+(y*#j_ZuT)D4*_DbFD$mxdtzSjHmro;*y
z5K)AhX~~)^D8MiS*GMo3up}_gfGb#F7lkGL=UN(;5eEJ#9VioQ34`!=9!22%_=5oZ
zW0}9s@X?=P5P>@!V0Zrp_dmJepx@yC=NhI4cm^Y`Dk&=qoK;O7&CKkaK7gHzOD*<+
z1Z4X++D<Sq1TP+USlM@UM}Yh(OEoQLErqv&reIq(<M&__Gd6cy`$su2!tR2=rLCE>
zF_pWmjh&OAy9muw4ng4hF`1o)>M4t}wFr%tf-;pP*wKuNmyLssgGLmEii%3u@x8g=
zyVufx7YFV{Xg)YQ+Y7R@yScfsxpA?99WB^71q1}xIbN~9dc_LlV0H4ab2fHowR58V
zOUVDod2Qxo>S$^2YzelbdX#Hy0(Nm0p`m%K=%3$T?`h_4`EN~jPJcHGXdwIJ6LwBE
z4)%X!14V@&Qw5bR-OX&YUt8J&Is@tu<>Y@Q{FMLy^W@(e|5H-y-;#Vh-2W~4pC|u&
zNp&YPM@g_PP^GizzuonB;r~ATyPz=pW6S@eiNDnRlnUrs6h)Z*pFR^sX}{3K1-g;U
z^0ks0aDFt*$Db~+GyZ)BuJQchKwX%?Szh+FxSBic-h%cQGAW|A<Jm8l-#*e@<5A*i
zOdyt1A-hXsX}^|4qIenlkx>Qh&FidhAu1}z(p_H=)V!5x5O`=pErdtW>7~W>tDfUx
zehIb1aS^G^O9Mw+Sz9%D=8x<o-|^9nYwEY|Y+vW)=eto^DTE@!B4B}F{yu!fA)<m{
z7PS_<ghQh8hxzMZ3k&Cgf$*PPaCktb@8bop{*ws-N(TRL5hMgg5EZ3SzKYp@iXp8D
zJpZ>UEF@)rkZ7A?cK?5hVf8wp{ZlUz71lSH0B`>^={W3v-VlolvPtys9f%493wH%0
z6v0gUKd*~K1)>_^{P)fu<PV1cg>@xSm;Rq-VuAcY>8AhQ!34lOLt2C1<1PySyUma9
z6d>I5pF22M6s%sv-<E2`|4|s+RdY|<+FDL%RzY+1i0DlLF_-z8sG#fqY%Z_ed;u?b
zvBZ2RIrljtqSNLiJMzHkB)!<}QQ&N~WlXY=TbV%@%U>$|fuhFQA`9<(z0JTQC|J~T
zcdqNwOC4M=t|X?v-}-R>qox!&NPT_oQ%&5vV2z_$xACv!LRn(>m)ZHQK3D4rzhoKG
z=&-$S4`;2*_nB<R6x|j*EK(zfPri3xgi-jM{>oVh)Y}}m*v#<zs!?u0&Z<+*D7aIQ
zJv(eN@ad_MAY!<dZoaC64Bu-@=RZM&u3b1ds>0{LO*hKgho<+v*5fqk1$Qn7eec#2
z3_o(k6_5Jx^pS!DH`?Ga!*w)m@@?iMF-0aO`w|R&uVS~OUA1a#v^5Pj#Br`km1p-l
zG_M^#d|><)4-(ye^Pc>%)gTR6%GDJ8o*qg+VTY3Oiq0q*3QGoL^O@lCUHGW;YD45T
zQhR=pqc-^Cl;!%(^pr~JC7%m@=YFRAYIwheOb@s#Owmg!Hce<$@hvJLTX;FyZd0wz
z%qz3B<fcH(C5CkOtPTy`ewG05m`H-1&p<Oj(Jq^MbGaNe9=4QKD0bJel-1Gbd^Zvo
zQj7wLP4opH9v#^y6cS7RK0$z@^EcQv?$^|fYn18DI7Jxv-JS94r-OVKn>-w@!`NH+
ztri%T*mUiVlejDtn{1C)x<sZ6Lxir6Y!_&W{6=vGQtTR-&gmfF6%~mz!0ZE#5WK^|
zI@@uY1|dB>K2bXzm|_ztH!G9DhrktC<Nft+x&Bvh)3J1u&&FWVuY-lg#&gT-(3XZD
z(t<`;&~=CW-ne<chr8O<jN+IvI1jGK9E><!u|Xx5CBFxs9=pTl3xg(oyQ{04!&af1
z;L7CzO>cU7`q_u~2DjCG0Zm^xs2>Lf9`=zyi)?tN-^B~l6ogn)Q&Xwr@w)03?-vH{
z+qns|wa*wbd(&_Q)3qw!%Z{-{7)}!gH0j-?i~0G^6E~>pmCK5mE1;}VdaOmAmy)`Q
zLeC%q&1X}(4m0l}b7T*!$FF)ZZ6uSuZ)P<RpfyMXPovudA(^A4RJ)4)D<+*HMA!Gm
z%<5zLz&jJ<`nn?1Dwo}<dRN=i>lUQ7GfYNji&H4Gruz^R6NE1i%%yZyZKJp$GVeGi
zGxmPblh$zk6Zh5mk5Z&HTkQBJzW_zzHui2jXxLpbf{w|vN2GgfByB~@vxR&WI;cBu
zK87lP6??elQaWkMxBOU+`sc^ALD+(1bE)*~6wowp))yDK49^y!mIJp8H}k$X`}%iQ
zh)`&Vmf#fHvf2G_)5wT)Z)o4w(+y0uHD=#2=d(%89*^c*=zwe^w`&^Mkn}TXk-+%L
zEl@>*aJy?yhK1O_V(ukMvdb$!oJ|{UT4qgiD;&AN_C|jBQNBx4!eSdUbs<Fs{uqC3
zih{hnzG%JB4rQjq(;LY<d;w|QNVZ<NHE{p)tg2sp=;r80z`tz`MY$wdT7%0Z2G?z@
zTe>?I%N4}1FVU5^-d`85)+9%Y-Sb=4QcYp^9`4hhgf`vLH{2XFs`~ZWJcmM2w{!ZM
zXv7xauS!~K7y_DDe?B>IkV?QSxFO-y6k)hIMYl<HO({yBQ<e9kE>h>nSI03e-Z5WM
z>$xOSg1vPksv|(YV9Z81kE?#`qP|;rj50%!l#AYhNK?tQeqY-26>DAW-Z`3=PtF(*
z|A?nWcC;)|&ZBufURN;L5irs2xpfa{rXnD<iLG(3^7rXR3RrRWMWPD=)T^-{zjq)-
zjzTH++$H|%R#Vf+YO%Eyunr>(ti~*}R&1~1AG;tbaM<@JH?6GJHht;(aCgxoo-6$M
zo!t!w!wL2@j{GOVE9j5rHDiVi#C&v8-rtP~Ki*Qc);11;3=SlPc1x-R1$E#~fk7iu
z;WWgTL`JsceWK9gYdu{SaS~0rsHmvuYJ+K+8O~>-VK^83Pv3qk@Gn0e^1fZ0vB+4M
zMDuu=g&{0?KgZPKZ97V6=*@YKb`=<9r@8bD>Nf8nGcOK(!{8MyR%t)2?<NJE)*afc
zPYGzUiBal=5;5LwL`}yii3(gF_fYP?oEyN|+Vx?tTmE>7a(})*H@AIS1MwqfML%1&
z&cf|YAsKwqXDs!ywRa47+a;W;9EuozO1GNUN$1(PtLxY77h~viHA@fuptN7_y#3D0
z$$r3xu(w?22lD~dcEO)Nf3~_w!QpG1k~m*>hsWpEcCJQ8m3urr#(U`^6{2r`4RXI?
z*PPt<Dq2%wO1bIF&Z48d!H#!46J6^IE>BGJI;ba)#O5g->Iv(gM4KKDl5ovGcpBJ~
zD3?S_&S8Vg@4ETw-@PP!X6DV+(}>Tau|?P3e0@5p1r_!kcUtX^@KpU?4!lxiWTe}(
z5%c_{6Q~RUh4)eWSH-w}K}dpu$Y!cjL(Q)6p8ql62;B}?EYp3?XN)lM^c^T$STxJt
zx=h|>8m>nQ|K-J~AX05TmwCGkn(O;L2D_XJt^MP+5-mK_`Q@(ndsQPV?h?L{qnN5d
zZ;UY+dLK)1TMVbxRh_1QTKn_o`Ifw^7W|-y1UX!1ydFI89#;sKel+pxq;_?g#SiN!
zE#3;It;A*zz;xMS{h+KlvCpEp=H)Y7=6!U%U%Moco4febdWvT|6ib77{iUtY{VL+f
zt*fpuLKz{q18z4=$Hn|||Gq%^wG=N0^!-Su20?ESZ#Bt;TMW1Lc%H(}$%vSykn5gu
zFr{yu)w8`Km*TKwPE+aJ{kTdQ85#9m+#11^FSN7k7-(o}I_3*WH>ahIrLn<}b2Wz#
zf_EfXx5h>#g%7Mzy0lXW%)Dyo7#Kx?n4)!H!M56R(7|GJBUI~q8OH7$sD%yGFa9ss
zX$yu4Xu@>qfBkRLw?~{M4vPK{hFX^N2Q3B*n$i3Vq?7@`)<Q+q_a88JL=B=kluUc|
zpCm>A`59tC)}9pf*WnTXZ5Nq}N&iX010W<j(i_MBz(a2&tlk^t{I~x}LIEHbAp)Ji
ze{iNK0@9jKfhzrflAZzB4HYh$>_51S5<q;S6HK`OCIx&1lAgiJetkkaPX~WY0N7IQ
z#D@Hv1QG|(<VSxxnSXWje^*_jTcG<ix5Yj)$BiTlbH#-G66S%v7`F}Se7b@G$s%P<
z?dI6kIgwoX-%qei`6GzkG(i9DVnt?Ir6&^unA8SU`yjU|$Jt{C@^{&2Da+lg&u_u4
zcdipZEg!6T*qEX0N-;TmC1z~8H7}i1qVuL5a}+CEZ*en<=xvv*oAqvFMXD6v)v&Zy
zpJ@+7-Bq1dSZ3tE9D73EmxM;%yI=Dwzk+8u28eb@T2IKZJ^F`mBj1o{Ht(P1WnNyU
z#*X~VwFwv)m*p+0Pt8~G*ik+Ucvr2UDzIi*rOn_a8Y!a+@3BxLY@6Jlw*S=YlPDZ5
zE{I2-dgJA)5#Y*Zn;zlj^n47}AFPEG(b9A8Jrw$jhg}xoJnoR$Q8mhMHehX_0W>Z#
zeUtaq@KbSlp=Xw7_Ghx=@K*rbqHk!Njd7*1&z0qZ*p~H823y`O%VkGa<W!C=lFHr;
z2$G%mF8g;-z>@7az7fi381`!}Q4%Uow&mJ9nd%zMp1!I+sV??;)$-zp{7xE!n5MwS
zwZWX}K|(&8?Eb5hx5)|I=K5Yq2@O~2O$bmA%mV{uvkYO?7)(4L5SxAjHwh8Zmf+~=
zd5-Ial}5L(<=LB}l#&6jmvvBi-HiOm7p-nfK74`?w<kjL`s+DqNP3oIXzx?39nZ9N
z#OlU!(!$5)^;|lPBhfJU6A!V!23ti2a~n?AmrtKXyG=3e&!=nHO*y`Qczns1vb8R|
z8Xh6}O)jo+#^sTy{Twm*wjS3kBgZlMsb~s^meaF-d(2`wi9&96#o(BHjo?DAVSdRz
z>qRD|Sz)m}$hEq9eA&qC4{P<^2z=GIZ{HSFRB-Z$ic*xz7Y7f^oO|pP=1%}|#zNGv
z9;BukL(WNk<Elt_FH}wYtEKC1X>GR@c&<jl2S7bgS(V&4a9$*Ct@(}|tyx3A<7DX`
zcXUSaOU;FTMOD`ny?t4h{5{_F3i)Egbl!E@z(AzEBt2h`YhERi!XTEkVmq_^s9Qa=
z`W;?%5&=zi!xlYP`BXZ??pj4RCvRNt5WcZnGt%RkhtICw@$So$6_xq+tvP99qT4)w
zR25{0Y~J&(%n7D9-2Yfij5<Zd{mThZ1GW`6ZcR%|t1`Bq(`hl|^P;B6y5EgbE?{;B
z%n#QGmq;8f_nmFZoMR+~3!bX+$)|C@^FJ<aveRz$^>J>4MXWY`wo-Ll*3r?SqR*+Z
zUpv)XizXlcQsrXaDRQs|tomW($dDE8dbei^HgB(d(y9<9=&wLkIRIu=SD>I+qx2o2
zzBBf*HtcX;pRCJD2t&~`R01^$YY*hLF6kx8>FZ8S|Ii!{gieYJVaJ>&hy0=Pv1~IJ
z5b}K$z20(sm6BKAbiGq-_2EOoolp<6DH}N8mU~|_IpQA|#et5nF_e<;dN4n8JQ`h@
zqCjO>!+7<C?4RD}tych#^;E&sYB5VQv!OLrtIv(A%cwY_!p+xb-yRSy1#2I!V+o$}
z@73cs^y^AqFI?VTENSMads>+dmGD5yOYq0}O_8b2s+`QyDJ!PjIDVg(F4azJ^1t}2
z8s#Y9tsS)Hz877FgIe4TJ%+^Z>*C2?dPXY^mC875OBt}>U^(4jri2J-vNFRl=stS%
z038IPrGDcw7ExLVdwy%v`n&0XmC^uoHg_YVZ`rO0P;#=DPYpm|ZoAZITHOg9L0SvD
zh?^$c5mavCt;`H^%Ci2@qs;mwOad+(9CSGwvinc2Q_8l{xaBDlR+YPApRD4Y``R5{
z3FN<KoLly`%kx&L#rlR7F1V{EkqL;i3ppdYrCoMWdq|neg1FfeEg^?v<jsO%*<-<w
zYX9+iEk-HS!e(7rmr9J6HxG#5OINgCG8<7c>P4|L&w6<(IydW9Fg#iyh#z>HuABa^
zBWuUItE;PBTs4jb!jNetHDY37l`4x7JHn9|krD&O?;j``&8f=wUC!&QLmUkRe#Nc6
zVS5yy0oz?$Vls1X;XSYsS6xe<p^o2MJXKZBlBRQkKG5-F)vwg3atxZMyJ6Xq*El%P
z@^&40SvUQKz+;DKBk(_%|LSjHZ<!g|!*lhXLRs6_m(!+)um}Azq(2WZ@l{@l(6Vh^
z)-7ePf1KgjV24}Tk{qX2b?1M5ebjQwxf#ZJfCQDHA)ixL@kny?qdHXmPs$5b?uA>r
z6!stcxz5sX186-0^vs2eq2aoO>5(|%c*g&bOtL-#h?@966jgF|*|(BJzovYpYQ1N;
zUjwJ`!mmHQkPP-}z2$1x=Qz&q)p79&ucX;SUxlq=UUPlA$5WR(J_GFMtlG)cyGp8(
zJ-^Fh#>pw43gR}0&>DoRf298g{;3mj05ii;*5Y9WK*3GjoWQR$hs`l=K!OnoDo2aL
z>Tv!xIe-Mn$?kr%g_N~k$4ot0-5$1)_v#yv(V>_4K}tDJpx0XQgu+M)u6_fi?i0BA
ziD%@ETD4T*as>w%z*04*fMb^RTTSi;nwXdvtM~6;+X7B<fA+_K_VnHPoYGK+*iwBO
zy4)Rr^Q_KxCiU@p$eh9j?=M#yr>fv!5unH%E$dFS>JP|Ry%&ZV-K;CG2~{~(u@Vy#
zmGjeFlV`sh2bx9>PvR6jk`-wz+M&-n9i~dOk1Nx)>+CaiDolb5*6?8wPypYDk3i>P
z3%5H}s`_YaVk+N5^Lg(PVPGB}rrOdoVPL4L9%;q>G{ixCDZ<Z)_%FB05%5VFpML`F
zsdVB<N$#=!HkEH%B>jzzZyM445kKOERcG0k?$Gu{7C7dQAHN`(z~0=SlnoE<?F}yO
zUmP?a(k<0CR&i|(3Uj$#>>VT#|G_7d{z|ruOl{>iXIrb^PQqk>Jz8%{Y1<Bh6AVyQ
z6v#cF<Gu;=XzC)+-O{cHQ`?^m@#%UjdhV9SPi$w=zfD};Z@D>G)UhsIk09Y;PQ%)u
z2?)jIC|7D~Rc{j4t-Azxk<WgHjQ#ifFs@KU6?b=F*6IWT$ai%V%v7*AUzCi1nTnvi
z2`X2D26(cfx}Bn8lU&>B*BzhHaZP)^qN_hFxGmw6f}0!|MJ+pb%Uju=V~`Q`p0^`X
zenIE`O@-A1j{^vz3fQ1J0z%SISor5imc3z*f!&3~FUSZv)3a!h3sN|Rh)yaH4ppFd
z1A%84;5fxJW?dB~@|7q#U3Mmv&j5nP?N^~{zOwfi(p=!r9NCyo4PDzfHN5c{7-R?x
zaxBRIvo|uN@FT)(T-!=lSXXwWD07}t1{+`+ktENzD^=z+g@6D7x7qXTUGTgP(sJ?Y
z$7iT;7S|Vt?2m*LNN+aKZlNKh-f8oBrQgkdt;vr`4b;wKe}p%{_Duk36axP0YQ*&B
zn=R5yy*lNR>M^;1>$2}ZQQ(fr1)Y&PPreDm0TFM+)?%|4=uc5`88e{zMi3e1V|jms
z<X{-|s|`!k_)iG4AKT>Fo9UjTli(HXmNgzQKk^_z#{L&a04oyGd_K$a?bIJ01M6bJ
z&5-IVA{_`0*oGvH07I2fbYH&d?NAauR8Ec*79F@aSP(jd);0?h>0gd_JR{WmM(Nl}
z1xIgj3kti8B2e?r|IJd~^e)w*n>gTv(7-*D|FG!<&OsYvnd^Q#KO?lf<+}f&V{QXq
zQ%~h|>zCx|{@gSrQS90N1AFT&4PZ&EOSW48Er$h+0Rnr4?U}0l-|u4-I?YpTH#Gko
zwmuNla6ZVh{!lAnz6U6osO@+?$G*3QuatRXv2WR%PsjY(swV*!74OS9X1taOazHEh
zpA5fyY@!_0T9)}Q03){=rtLJu<D*G|wy0jF@g_w>DY<fTVAV=>t$4bu?EJOgYu#?-
z<$!w;({8i}Fi^k*pwrF|1L`n8>Q0yGuYL)Q+W_p20FdgTK-iN0#~OXs3P4qaj@?YZ
zfZo)qO#FfX`lfyeHmk9-JSR3UH7;O-RXm)jtBE1})TS0#`s!{5LM;h*CLzMPZcOo~
zYDDxzI-KZ_tY#VnBW<%`w`{kM*iqQ>h(UwJki;D-o>PV)0GbfGT8p6_s5or7@f>4B
zixVfYT;!dz!A;Vf+yvNS^z}$whaPe__vLSEISVGqa#uVrzRHOW0deUTI;H4*%x5G0
z1*mZ^si1Sgh$lewovf7zR-CP+9pz{ELeJ;GtquEMTY_DuJFLr_H!|7Ihywe75V_0(
zeQyAfLP^x-N;PmQCaAI>aRzixhv?`X#(?<E3%V}8;F$eVhSH}x*g7`#PcJ*=H6c85
z9a7?5t@;7D?3w@@(_B#IRwE3_9k)x_7|zTk7KN6%Wa}Ce@21o-vx?Y$3{wbKhKN7$
zX8yg#p;n8)1-()_nfyhTRG7DEZu(%(=DM>n>>US@admebt-cA|)hd}V0Ld%XdzcXU
zr%)XsWo|!_8QzaIZl-%psjB|=AnJC|D&TfsmIN)8To@HXjl!0Ky{#z=5U&x?%tZ9(
zeM+7gd1;|Ba$p#>F)&pXzq}mcfcQuF&sgZJ-<ua(t2mg=Sa7d}S+z&6#ZRip6)EOU
zRIdanHo$xe$8P#HgtJz{pA?Qk^m0s!x)O<_|0wU5O^oo@brK@j1%S76rx6+3r-q0P
zw~Hf^$G(wi_MEcr@HpA<;5gsyD|GYSOmq9H+B`qro8*m?KyDa}W!H4v-RS`|X+($o
z2%ly*A;!9_K98Q=YIEAqPvNf3$Xck^lnRy<wyJd$QmYdBm1RIQo0@U~L&2}6<m=ce
z6^U}2M$>5f7VoahQ9ELOp4a(|Mete@bd{{5cOrtJ8D#&gZ*@PXl#Ihax3!{dIL3Fi
zV)s*Rt>w=oi*#b$;L>^Xd40D9eC_Z$JBd8Kli$wTA!^C1vUN#x+wk_q@BRSUWIL}{
z!q_#mn}JzONW~x~!8hEO41W~!#9OPY1M#uPw|sDKxYzV^=8vD$w_vLr!UgoUTpMFO
zq|_bg=|zA*(`T-HT+zshf;W{bBiQW|nDS{|@dRhuvN3bmWzFj?4?F#mBJ{u%KJs_F
zJDc_b%+lw5(r6w>%gx#DCb?rT^&+q?VOe^kZIPya!t=QzQmjgBnMm~95^iw?U?mrQ
z7xXHP>zEhHScxKux#%mAE?10Lgw`-AgntVicjEdGe_*J0Ph>z;)ESrjoGQ-nPBV%j
zlhpTs&47_|_UsGo*P~^W?2lxLJLFD-oKa^V-}Zhc4R<UKFl>%U37rE5*qv`gFyrOq
zst)<t28EtJtXa4txrDCx($!|hp*ZE@5vAYlaTF%sWiuzlKg@rp$13c801^(;J|{-M
zdcQyBTsW}9sGxaF&j@K~E3{?N+p_u0h8CX#d+<CotUf5<b~RE=N6HuI$6@hcN0dQj
z3W2*Ub<9OQiFjfotoDLe138qBrea>B)w2P{ezjimsscJC|K7=~VzNA)Y|ewI!t<^o
z5|L`*H_u6X@RE7#Fo0m*&O%H{*@cOuf&h2G-wFHDuH|Y&nO9&S+KGds#pq8H>vHj!
z0^h>>NqBv>3>V?IDYiLZDZP)x^CLyBp~k`F=0hpxunQXO-9IQ`CtD|K@DIRdwb33*
zkliZ0cb~@4u8voG2;T*`5?Z;&W=Tuc3R&phO+Hh!E%wa}jLO`}0>@nclK48*+o3Co
zHdaj7@^J4}9QEne6Qb&+JrKrM)_QlIlFnUy=&0Wg51RF_ofhQV%MRCuzf5s-*mLBO
zD;nY5MP3ATEO+h+vx($^uh9ACA!;th+sEd!-Ge-+(bjD&6d#U`ES?kX@?Jg0dPx(W
z;lc2__ZLfkVzCrlbHB&3B6jLmK}0RgGf<$qIcoiIDp;wuJudau)<Vy?`e$5ere^3K
zY6yIxz`U}wRLQkA7cuxXixm2M`eAT}56UWgBRlT*?RUB7(*0I;Bpf061TvU6=6PL3
zqK*Aa&Dt<l6gwPV;_Erby3#~T<wZ|vVHDT|GA{P7$F713{*6Hj>Dz`G(ltXtk`HP5
zQH7fo4&1wNWtY((3-L32&b2YYi^j_>(0&$*JkCy|adQ-4Lah8DLa&%g#7w^vAl_h}
zcD#HE_~g%HjcC?&`)#1eli*+qRm!iAh7S>~b4arMqxc!N-y!fS7aeo91~Ik{noOPM
z3+@m-1YgyJ&TJO>`VAx~3R~v+Xjn=b9AbGa`mbc#?5ml}{JPqiuNa{;Y4ORNy!=VU
zwTmD&M+e&-U<;x^YomiLLpIm8F9|1{+Y{t3&%+DP9@vmae+~>fy_EGJ;{=)S_3B11
zB@JLt&Qi=GozPY_*(72$IUVByg9UWS*Rph|MK%(i#f|XN!zWcp2gt=-OgVS`oF)=h
zPV_wDh@<OOe_{RVNSU5cPCmMsX*~I@yWc=<QZJv7(m3GiGSV#N<v4^;(66A$^^<JQ
zJrrIx`{w048sHk-n40}kQ8aTN(CFJk!J7J*tuh|Q1m^77sgr;ym-_iHD|O6UjuS1m
z=h3v0!tc_CtAErx2t#{mliu$nt)tZ4Qz>_GZW9E4iK!_sYc22@v;<FeIj8-^i!~=9
zllhUh4?9XvtB`u-RpqlFC)C9-h5Vt0ok4qcaZO%Q6m`hn0E*5fOJX_;%oi~@HZiB>
zGMUtd1|7AtLBr?tcfeRyL~x$3bD-|nI_khwqJs}HbpI0sA@k9|tXj%uWox~K{`TZh
zp0EMd4R70bk-G=k@|r44d!_~mcAJLmI|)OC;m!LhL^u*%9LWdP=TBZLvQ_E@%2|x0
z?+-ix;!EI*|G2J@Y=yNKus!$a%wF1s)l;aq)I^zDuld>17xdY?rBtqE#v}QHi3B>6
zRFi{Hl|2e!FHh3lmOxx`suJZ_yv{rOiOP=FN6cyRdPSO8(D}m~jBqo28&!3Bhp>uL
zqoN1b*z`KxI+ny+8+gLie%0;oS!GOo`}3!A<`V?>OUp?2?)H`SE{W?ZJwvZdBZ`(L
z3RNZdlr?y$Ce7(qy(eBOWOzeE)bmUrF2#C1YUDO|-akJf?>Os%6{fh?PWCa@QquA*
zGfJz51!sbE=$t{RHs61KY|=szxUf@bY0^d!V9a$LOtV2)D<OhynX_vwg>m|XK`wZy
zd#2=CG%JALW9tbQ_#7SO{nQMiqS`VDLkCY6-0bo#$I(9(GLH6qWrsW!Sdo{w)<cZ$
z>-zkO%$w*-m>}f7Nuq)GQ}i1?q+bB)7xM+eH5^Ck3{di)j^MBXw#JuO7DDa-{nJU|
zwTCVE;%>NW>sPkq@fppUuFfOQQ+{>Tt?9h?n-edCZUun01Y>bD5Gf2r{IPW!tb`31
z;dQbTiD5?i(S+Qva4qvBGl{Gm2d2jmAxEsK?MxcuH377pJ=Sh4M^<83mogOm{xtxu
zgUG<xj33FWS#n5|i1_V|s3;KYTvr6QQ5@T9h&YTuTigc@&`buPPLmn41bhH_Ia3V)
zfes+li%JpW3qB%+yAf3xIi|ZGOU<P2_PxeSrrJ}+o53`eF5)nU4C~md*U{^xnq|+d
z;f*VW5EdyiS&81XNyDJ_Cf;@Na*W1nMppF&=)J_iXE5^5)W#r7xj)QLb(CWyU!<6J
z5@;_fDXSf{yc@Qy>Nm=r5dOH2+5=#DY+Y{eEP$6?-WOQh+bwyV1+^SGW5Obg$6ymH
zV$I%uYM=EapKKY@M@AKGU!Z4h8`0|<t<^O2aZ#(8*!<obQ-*kR6!ieCdE@qVo+cCJ
zgt4<8HG+eT9RJ*v;_Jk@ypaUu^u^L)qF>QfLlp*4^;?8>wUcd)Urd*ftB~mxf68M2
zY1U!p-v$$@Q$I%b2}Rrdp__AF+<Y(*o0$fUJ4K~rpx^=v3+LJp;knn2<M)j2v|jq9
zZiK4AIyiH5&(#P%@4IWgdnM7U_f%oF_H1Z3!4#g1kYQo};z=O5BPn9v3BP(sVXdP;
z8#@2V`MjkYSx{3SL!%{PHS`=<`ghk-miHBbB2Jq8&>Qr8k6IHKpNY#tV+wI>_7CPS
zu_;O5lGWes1Fv~hDRQro&X7zjyn%BrjnWL%K)*fxUBsHwWN;}QIr0$0phPeRgM<tL
z+etcnfpqIr8DLCd7nf9qLdIo3>p4KM4(RMH?E7NCB0=6CumHSX1Cfq|h|Wl06yT46
z1-{9ML0IfiuA~hjae%2I9pbwkz<&p7@xdFT#Aavy1T4^uh+L#XP*=BP;cg1@Ph~JW
z3*iO$-}OGr`NRzg(u>lNnyx8pNZyM#^z{f3+umhgOZla9UqD_b)V?o^Y5xa;i30aB
zLDx=z8vR4SE8pWTLJ16BjQVJ*Lm;mCqz79~zcDI_i9eQDPQEK-@7HPJV)^~rw%+~N
z#4z|2lMo8qKU?Eiq!4g2L@;Wg-{4%R`%-{IgUn?@#bultR?kWA4u9}7Iw`&9*^c_n
zes4LY@7yxq<xX+A&YX;>KgyLELyn=SRtT`L$bc7#bP8X8ne^Mf1O0Akt)Hd*<yv6n
z!;Q3I0;(u-Jxt0ZI?7$=i|L{gWLghD>z~rbQ6!Szw(^l$qS%wa>$dEv1n_ZsoBlDM
z?6)||o7q4yK98j986%txK`Wz_eJ_<8k?gm4ULz={F(F{NrL!A&)q5N-#tcq&y8fx5
z(e)rTo2UFX8xw4Qw(MD)D$NTug3U8W-zru)n{}Di$7TKcss1E;Emy>1m4WiLa0;oi
z4mmSr3yPXZK|=xSAy^%(5--LI+c1^$LNma72QI&+X;l5&FBHa=7YTaNjelTQf8sA2
zKqg_j0ul2n!%-6dz#mW*`B}^~<dsz`1SUyE_M>L3^SASCfciojCl@}ik)G=%GA-0D
z`Ff=malcehdyo;UDqYkm|6b#Qh8M}LTe7%=Vjm|*J02%9G`l*c8RuT*3$Qb}>srH2
z$fJ;vln=Olt3hQUB-sH{4nz62kLoyku4h7FH#gzo;JgQr4C%?cjTF4wuF(Ue^3%`*
zA~MqaLD?~*Wj61bRfLz}K><~=YrB@5b-&&tONvIab8ESQhF5U%NzfGnU+ug-z2ja5
zshQcdY&PuG-CWWbkwQvNjlzO+>!5nuM`l?`t*KjQ5mn%+n9ts1R&U3V;M|FNUb9P`
zsgOV;K1@bCLnSnEw&*njiRISRqLa{JPZ^qEdu(_e&{c@!645*N!ZtY3<OgvvI|MBK
zQ$kZcE6XtS-f$+4Pehp3an!+STn%;{1)-rayR!1GjYUco?VwLg?tZH2hS9rYZDJw?
z$Nn)06dT+#NkKGBC_FU%!zc_=f8n1|dY9wpvBBJHDL{*8xjTP)*B*%IWl!4Cbm{;<
zg~z{NJzjFBnNWI|y_ooiY5S+0!@xwZv!_UfVQkbyui!g3Yb<5M$<X8<6-Vu5i88o;
zPvnr-1wkdvea7t{PQ16EA%^k^NHa~l?F_=vO^?5-r5?(5KNW`s@AKjFy_s=JwR+(5
zi)(nks#xpcQ~`<Mduk3b!RKct9IVeNR~p7!{rvpG0?LWV3W9;}c$D5Iy^@Nw>h}>i
z%QR$1W4;40E5gp)4{J@oZ>EtHBAt0Dc01Ems`-kA{g`${;UVV%HfZ-<x0fGidwVGP
zU3aHg<8C#EKegv3T2R7&t%ze;zrNFL_!h!5t*2U2(M@vi3J_MfM6nH>IPy8&0X6L~
z4qI)9$nf32_&DlxwsnA&rf1H^Sg)SHt~FT(e7Nopp52_O41aWuPC>aKQfQI(TD7xD
zg1)Ot@O(7Dse!f;Y*00fpA!xK?06!e01nCn_ZxKvpGrS1EY1gcI4ymG;=^Rv@q6QI
zvFA@LA1qF93P5u;g}kEx1n6QzRNsUGbH943)389Mz>jqp5G=e{Sh1vT`+GClCt3^#
z8<2=ZhzqcI)wr-PKRFMu)?SJpXw|*J3?pEL-ZlLpJVbG{Fa7Pl8lj(0tn|o)0b3G4
z$Blf7Oag@H#KEAQ1;8U-v()Al(rWO=aedF}>lgkjAKfS(f*k4IEZz^>b_t`k`vR40
zD;^drHD?7_H!Fm?*Y4<2lCE}jf1Xq4E56N5k@jY`8GH>#H%gBHK;FgCr_i~oR$cN%
z?4$~bP9MGcFsi;OsdSkq^BUPq2nsc!CXS;6`LANl^4C|1iH|(rYzWLDct*GJ<xTi!
z@ADd~f=`u#a*`}FIsG1));1k&VlCjMmQ2_Z^TM?(ev@7ZwFDas9fzTaWDA1t)jA=k
z>z>w9mn_?GT)BN%HLxoBWx;h$p%>qu)q+?Gjt+tdySP-T`tp%-$Y?GlVa^07hfz7+
zLPvuwk|hq{lg7qG9_x7j)+XY39V;e3>hy=ZL%$6sip|%cvPQ=9B|kq-yZO3Qx^6-)
zw*x&t&XP-k#At*!RBV9Um1xKOCo<#EE>JF&sW@8=PP#8>EvW4Mvf8vW!grm+<@P|*
zcWiG-Q8bx=4iC6RKDv)jvwRAs_iQJXkpY6?vqq(C+=;|=4tj@4oV3yH<#9`jyd?^c
zl-S*AjQ<=SJYX32kJ=rcs2-Wpwjk&9xjx<dh>n~~4E=iu@=ly^u)tPj+fOBzyk6bh
z3}O>y$+J;_v$x%uF3-4x4zSA?Q-xp1P<XuF13tve-Oi!G`M?L))M^HH7#G#G%#G^=
zsNG6bUOGIx-qzDGc{slY>d1c1HOaO;#s+GzW4FL9aN|$d;LZO|?g#QM5MY`-5{rL+
z1_f!nPcn<gP7`#&S!i^-s10JyKFVWgyV0#s=)BQ&Z2>~HSZek)qg;uO-Y8m;knD2?
z^gfBRA1OzSErM=)w5*c82)%q_@v(pvf#773j;>QaX+W#jQc{zWI;ux<sKNAo>xrMe
z*@#FPI`<f_19AMmbk(3<5ey`ZJ&uSrDgS9h!X)pNK0YCt|2^7DB5Z(asp3BJ-CGFm
zcaxTDtx7F3Ny(37#@<`qcOPd|KJ+F`4>pa)^&}@h^3EWJw%u|?+6N!#(4)RXU#+21
z?pX=m$V;j=U9N;xMslo)8;yPwzc(^@A-_4$&KxSosT;$7&2z~GR03>x@@-A`a@n$p
z@J3;*;@0wfSJ3<TrsnLus<p9s0TjM9PE&N|Y{aY7Y)VHlM7ZUWkFV$lc3#VY8}0DD
zx|;O`zArwGL}dcNzWS+$02L00Qq$oZGjMl8He>Y9zhcY4u{%33a-u)h1_4bX9o=jE
zATad^qfxG-j{JCntDrJO$V#XbQ1^Q~z);eeWS#+7PJqQJpaCQn{kD4PgpDA_xi-XL
zq4w#y8TJrytYzcquSkyGFEW3*fJs`Q_s(X+U$A`!F@0_lSjJb+6xV(mnEcKbQ^nc#
z71Qz4xP9}@3b4vzov?}~DJ;e=h)nxcAWPQO&OmkZk2WJzq7$^gJ&LkAU}^Z%jsQoH
zv^7@?c0ieZ{mN#^wsxUrlsHa~Yi)><YzgRvu(M!BqGKb&&faaP)oX>nOJjM<_TK1<
zimU*isbSyUT#U$Qk;(&KH;3~d)UA5MNj1bInD=JW_kYs^K8)=WI&7j9blv;it)OQx
zIVp+9=ID)UNa1Eh^P~dCc)-<MLGJ*rs|$st1X4cA8_VkfegCW6P^@)Bs<n=764HZ-
z(M{*QVd!_~0t|e9*TiVSIupa_hIkc8JJ#{o5&+hM!f=*d1ob<jbp`Ft92JsY<!;2Q
zb)!zaL-%Ngx(kVc7li|qFz0NlB7B*vBZuR;VGNR$<IDIxA?7QcA#iiA1Op>9Qyr79
z#=f)5WA>BCOaor_){2@H5ix~z(DbRZL<9IQ;Hu)ATAzi)c46rZjN!{*j@9jP@vIA8
z%Q2M=j)C)y#Wk?xn>>NfmYewe?!7Gp)C6-PZi0nn1G#^j3``G1sbP){JXb71U1t&6
zIty>i$V(lX@BYB@*US?zMT2#bo|nIR$NC#F`D!u3RUWr;)}q%iZvj#kZ`sQa{f0YH
z%pug@ipg=tFMwqcI>m0}aG|>|5$u^?s7ik{`1WaPQDzLNyWkelPxcb8n&YtC$sx&`
z+0E-udXK6r4?f|)Y4C{;vWRvpsz&EqQy15p;|67aYdnSVR%^-G9Ie-<E-dlf*7wZv
zH7XO{@6M#qox3WaEMyf~89PP1`)O38IdS;&ZSmKW1I)FfLwH^RNZ@^=g<pZ9-{>6c
zK})F*_GW*ilghM#rs%XtM;yh)rvk@1DD`3<OVQv?`bS$e!30HB!&GO>-Mj>z10OEt
zwBnM`*0S;v9}?(8l;WJMUWApaTC=#mht2U8+2!1a?1)rC6#beO$}{-!@$H+T{`T(f
z@bTTuLm3ASvrd8Mw3_zoO5>EH2W9?FZ2D!wK;-6;bqz0|)?-xhgZ-wXRjoJEN1(1O
zQ{}j3*Pwa#y&9#{wgiQ)eQu8kbrD&aH!t~@Fy^Xs2Ie&ReC=BP?Xd}T$1|*p7UDQX
zc9t=wgNF3V3*cK(^`BqxC%y$fv3~|6wVC|*0jD)5I1O?*vl5yHo>cR>1G$JEYDq&)
zq3~mg?Qe@S+}6WVWHvMV^9D3;RJQdR1}mldzHf`C8qg<D<czyaj%TlJc~D6{b1@*_
z)zzfPaad<z!R%iiTW8i?^q?Ba9Z;`nI$6_a(DcbVUDMC!-plGw$Wye0Lg0~ZN%d~W
zs%?{n8G~3difueL1B4UUG-oQ;9Qq~>u_5Jf2Yj*v`D1g=^{1c=`2>Zp%#_YE!@HiR
zUi8pHC=>DZ6#YQFj8gvYn3>e}<jJnbTYXQ#1d6p^F5740Cq0va&UtJv3=?eoSx)M<
z(XU<^D-%9Bw<m`RcsaEo03#Ju;|<9Ejb{Gx4J5E6P!hC*N9^^_22uxrPPv?XMe5)F
z<1Zxk4M0uh?aOTc$@%}pJ}K@0>GuD}zycoNVIsD~C4!I*AO9{TE2a3lOv32n{{ngA
BWH$f+

literal 0
HcmV?d00001

diff --git a/docs/source/assets/kernel/q_vecs.png b/docs/source/assets/kernel/q_vecs.png
new file mode 100644
index 0000000000000000000000000000000000000000..f55b3742f3c6a862883c6a966e0c2ba0dfad67f6
GIT binary patch
literal 42065
zcmeEtbyQVd_bwo*Gzij2i-gjh($WIb(j^UtZUyO*mQG12K@bjzbayvMgVG%8uA}e!
zeed|iH^%+r{&yYYxHmgi%r)1X&z#TPp(;u;Sa(V9A|N1O$;m!bLqI^~LO?(yL`MZz
zB8V255fJWHSV>B%$Vp04t2o)4TfH_zK#&bhNJ4v|u1(;7;KkmAOZ{*Jqe;#lG4IU~
z5|b<%*+UFmdGvrRV=W@BPdPM!J%QzSnd+IPCIa6X>k{6B5Syy1OtuFWy}Usq4{W@>
zxV^nP8tC`7=WsLM7e342#6e&VVoxY&>p?J;buwK>6e*pmcPH{iXz@p9YQdg5LYY=m
zdifGv%uMZ8f5GjJ<@3cF((sdm+gmMFys#=11TiBf#=F%hBurE?osR=vB;G-&!l@-=
z>x+&0CH0<!qekpKRg~i)GESu93ZKc7Rgu_bz4zmY)GFuR+8hXkv||H3_vvySnVOkq
z<=+@%^lS?L?Ca_x%!&AcVqO$YxL$d_`~6^r?v~X&hlU|Dvveu6HsW5Z)(2L_4fY$+
zx>Y{~=>~6(5DWpPM^!BjFF1#V8`Ba=N?*yXln%)e-Rr#W<d>p+z}+4A+U|<(-bHh3
z*q5zp2?l<--9h#5-OEx+RZO$&#&KpT%VBzQ9BdYG7gZm>9@a3u6_)z0{(Vp(G$Lh9
z&+)iaZa^s<J>zhhd6&bZKF2qfuEm160&2WRGQC)))u-OqFBgjWqBLDp11kG1B`k%a
zq)3tJ@FeXpRP9{B*_-f7-j>l_KUGivZyD)i*~w`^lwBXc7=;NxUq^eZ$bpWC=#N4p
z&IKb25YfZM-bL)fV<i>L^-p`G&qBahxb}%}la-*-9Y5g2C9$cji59`c4YDHp)hudJ
z>Yay2PvTZErP=X5^=#qm&^riR^LNC8gv>Z421RWV5hBTuowWDg*zO7-vQQ(EzCyst
zx`!D+@=0RH33b1P>^y)PH*A6&?Z+KNSws>sRHDHASNE9^q+W4OVfTokOx?o{Y+GWo
zL}F~!c0klaoNTrIiE!-i`x6N>;OrG)Ahpo5x0^Ajd1xe(i7~?8gM}W7G9lj$S;<0X
zB8d}ovV2s7${ZM$%~6D39PAsQBxaGVphE0|v;7t|n|FxuLBOLYcP0W9Uy&taIWN89
z!_PotY}0n2{D5QsMq!!x9Cr>q^KC^--7@7lickc?n}{cPKHu*e(dy9=hRVd@l2nmD
zdL|#koQG{CE)bI@i*ty7@vhZq^8r!E-PI_J@6U{olUe$l`aGYL{Gcwt8;$xBy48+p
z)NK6mrEMww1XgcbhjD!6y-JiR{9WJIcTR;)c|KEJQiilWbu8c`)}v;_P72cv)Qz5v
zEa|LWX>jnVHxi<(2&nt+wIun=+Z($X0ZQP9?uY7kXEFL#jGEvW{S?_xLP%WVr6SYA
zF<d1AJKPLB>Cm2MCOKqJ-!sv%G1$@U(|*RSz`G31rMn;Zs5^*IPM*GuCLwNHc0&$F
z&c55U3UN9?D2;`of-Wg8I)**YtDEXkG2y+tNgPQug|G9-vuf1#RTtDgz91;(<<3Zz
zoR=*t$jB8b@Kwoqto``c`#18B-}fYtCfG4G<=|O$aADhuutsZ22ue3fdwqQVf=BfS
z^HD<5<D|!#D$Th<r4m||>KGc`D%urlxlVFsV)rYobG8eaU-WCumv}1GMn?@{ZRs`m
zHwQOI`(`W*wmf}KA|%b6`_3_Wxch;cx`y+Ma82Dj&2or*SxD#;#bhCK0cV0Ers}i{
ztJDFm0j}X1*U{~Q`DC7)^8EcWHZ}Ga6<W};8@ZtT<-*v4N~v7A)mR4UQ3>bIKP38v
zQ#ZH`t)JG+)gtS>)sd;)w`rRpnpvJvs5!GX9H*QZ9>*Ap-QFGJFFaC9`nXe;R@|s~
zR<oZ)sXwT$Tw14i5V7E#NE+TIZ;?<nqb&)Wfd#;vuH~<lu2t`-2QdYWqn@BfW6A};
zZli0n4Ym!Il=hV7j7%gQC!ObIAgzv6k8pe7_8>YrIrV#TDYq2|$rBCU^^_ke6R9Fx
zxv3s0W+@se^BkQi)%~M=ZfS;f{;VHZ)L430%eB8H4kQ})&L`@RR*vcyDimlbLQSM9
zS6;5`jO(1}aF$7yYU%Q7L#v)vh1N|Qj~X3~PjvIlLo)a72x>G^H;zMCA@;khyC|pY
zr*-5-*f-dZ$x|r|1a$<$$j|r|Tyq5bU37NFejK+?w`>2<+Ys6`UW;3*>Rrc9YsEIH
zurPFKZ-CX8I|y&VhWn>kJNJra4fi$ot@gS1?{Gf1cxS$wHoRf9)OuEQy!p%2gTeh~
zZF)8Tp#7})&~wFhll6Gv;5x!&KyICU<?CVPN$dsJd4$7!eQ$kP^E-Qg`)vEd%wog$
zhAE3Ces^w7Z*^|75eoh35z7LmT9#T^1KivNgb*uIc?s9>Ygmeelkv&%^oXwLzSBG-
z8sQ+wAmMV}=?kfju#hw7ejBJ_usJa#V^m<IZ3G?Y=*Lde7rC^m*FPWm8HMioJ@8%X
zyR%r4*fbidXDL$MxidLd!_K)SxiZGyCSfL?#<s>u?FONv5d@vwovW+4t7n}!gTK<M
zeUxtX3csK@H*Ta1=yDy5c=umy9VxSBu`aRmCHgDAlPi)t@55Qw`V~Up5!#3!f~?ny
z-Fh#(&LPb~W$hs|k+8y{EVB}+?5XTRs!;Muf?C|6THpPoP{Tsh0?oXrytf4uvX_aJ
z@dlaVU9w$#4^<QL<YqWsIf>06a;*6%BN;4prF+_vRj;ORUNd<aK%BAf*hZE2^sk9_
zc{vL^_d1KRL>Rcgj_sl8&M66JC92T9Vi^=Y*q~3Q52UwMI7uqknQ4ruE_LFs5pwjH
zz1w#G;xSAMwN$7+tcKZk+`hzCi@3tKj=@|}p;_ss=c<_s$+w7Ag*oQ_oc2!h=M|a0
z=oKo#N&eE=FZ(vxpME;U+N#P8Z=3|4B=Jt!GR^<AH5us`P#j1*ecfT3_<Da~rFQ*Q
zrM#no<Kje$E?HgW=lgbEbv~`93iMI*_XTAI*91M~CFRrHNcZNx9yx2vPwzQp9>Vlz
z8YlKhr&;HGOrp;+8t$ft<%B7VhWa(=lBi6-G*i>4BCg|gI~esDz7+5^ztw9pboLv*
zVMn7w^TZCOdR+neSYAF{KK<g}WRDFRPQS3r^{0d79|9!}J)Y)1!*~wQeAh16F`zC~
zSRyl!2%pmZ`d`2Fv5tekNf$XfuBxcZt1CO}IZuXW5!kZsvtmJYZatUsZn_WEtqXb5
z8PW$tulJrWq%3P%?etc6TU^c%)d=$ud+lpp?w$@DwQjH*{DeA`2aP)Z$mwj4Pnu&(
zukUM~b7{YjU)3lq!GiK%PYi9%O}qE6>m0dG?y%}}HYhFrY;Zg+IOxAu=U!Jnm*Lu2
zB3OL6pZ>+obN}uE_M8Ir4CcFkJ&g94%9x5sM9ufb;mb?M@q-`G$uqvQ58EeuO)$EZ
z?jRBI#c<K8+w-~1+xM`VwUmYGZN$rf_@JAh$;?5Ig<A=j@v&KJCbQqZr`hTDx#5DJ
zK1CSCwjZn4^<F=$j?^CKtF!kc4>`}(xlhOCaO^i6QYvg;Q@`DF!lSL}&2NL}gZYMi
z&>C;78`w|Xjyij_<_OV+yVRTh@o@<5&IkzB<Os(*2r)mNACIjfDSD$t5S%Lu4ZB5$
zb0T=@BEHkZ5JN9^%b7u@;Ic>Pn8}f~M-b#=>M*?(TqubSWrLLsdE)JnXI(;cH3R*+
zo>ATE&)<%T;hosS3^i&{2`$q)`T&9rGkYm#uBeCrU@QVU0x}{g0tz@o1V0f(vVWaR
zBQhb}`F$M;0U^W+0r{_Il)w@GF9!VJ@BBI5i3>(R1ApOzpL-V4-%q1*W!?Gv9FY**
zLl9S&l#>HTbrUBuGdpKXdzZeahuz=;hJ&m&kN^=q{D&x~M)wo6KWX*irOQjj=K?17
zwrobG_Qqyx?zRr_b`XTz1;DATnTrv%yX|W`X90I%n%_?dfOGg|b{gv6kGR+f)4Wtv
zp_a6FGNa~Ud&2gFM&vFvHMNkFskwmKGwHux2Y(6ESh~142(Yuexw)~qakAMvS+GCl
z=jUgC!okkL!3v&Wb@s4xF>+_MbEf^%$lvWeGjld^vT|^-vbUp#w`*i<@9H8<Lj&*V
zU;qA$)6CuK-#ytm|1~WzL3a2Z_NQ!5*#FfwcvT2~RY1kc-R!mYGb>vlGth?!2QQD%
z@8|#9oqzZEk2hcb`^~4^e0=|W^&hwX>#G{hW=@j!wxCZJk$<n(UoZdj=3g%gvBRhS
z4^jM)^Y5!b&?0w**#EU?B6kOQKg@!4B)58|`~n<-mi_)02L5LPKll;+RFZEGI!zG}
z#1Q13iNA11+)c-rq90A^R5p?23N|XmU^P}YEe(3Z6^n<DLle>cJ(so8yfOmY*d~vJ
z&eEpt6=^8<y9bmQ!=GQ(&?idkl138gzOl5K<G<bh;AA(i?)FjlY@xUTdL*={rBOI=
zn6c<_&}2XEm59ephKY!T>yLm;jev+QhVVa~5+=8xlar$CQ#=d$PmhUF)A&o?k@}BK
ziDmsncCaLD4#&m)pL+Ut|Ky~b|FJIwq_>RzlDR+i-}wLM!7mMoq5bbugLlXzgh)>M
z_J9WY&42DO1XbXF718g3i!Ea;4KaBWKcxN-!URw9aSi=%qW((&ca;%Bq?a4f5g-1?
z9$T^lu>X4Ff0?3+JLv81q7*v%f9??*%jJJu`hT?^@Qt7)TlJhxLhOH0lE0pGu*|yi
zpBIEM0|7hon;RaI#DDA&s1n8hSEc{2O8;q3{!5iU(>yWjd9NuKcqS7~NfBXBl#*Z0
zav~e-JFc<j`TgNNsBf*@p(|NcSFu3&0KGzE*jxe2-Xlp-*AnPj2)u^?YrLZE1N}sZ
zQBvRVo^NIEYW#aC&2ine=0tDp8I3g_Gb?2hm>dWG?|qh~jBJgJ{ywH9R)$i$nku&Z
z#;*C|N0e?xzQOtym+WVjy`37${JIXiPFFK4bs|U{t3uZ|wr~;CSVfPjPqLpMg@4uF
z*2qx00~6e6{8$W|{U{Pc`P&|bDN={~`0L~EtM6{}^QaIBazC$eWr30TGT14WKShDf
z`d!=Zq8R@f1qD(;`m?+Mv0-#mAERwiZ?mU0)eri<;%4LuYfovMynO=e4brMUcya4h
zJb%QpXwh%FMETn?Vd5a};_t?p&n%R)t`zPR>IdHBK+Pr=&8wVoeh*^=7&2Llwsb_?
zWC2&}-f1K<yLuzrvw1iBlmXsi$qjAHJr?$LN6S9dJeFP5y&?CFbT?~dp<mDT!XB3w
z?Ntwo)V32Fezj?m^lQV>heeoUM9`(RaUT0j+`Go|Ny1MmAN>@7CE$-usE_tZVZ@cN
z(G)v?MPQD-5&2rmPuE8&JPTT8x+d~_v@S~2vt)S#I}g#2XK04{vMc*}X2<sHSEK4b
zu&{IZ1TmOecD^nAVCwNGTG9l&@ArVvKekR=Wa{ferp*K)ud|?AIC}feBrEgGY`te<
ziGypOb}DvgqsmSmNI#K48w%jqM1aWI2-}(kz1cTO>RX=KcP4_k%7$;Re&^_QllOVF
zr=JaN8eTlzjKBL^Kmre23YI1Ws0l+_n$8w%CbTWmWvO@O-bFe0b0<}@$Molfv*|Ey
zs>)E9l@4nVPj*X9eAVW8+N-7&yMwn;T>CNp)cbf?#{5TLiuqWPYHZJ5WhbSyU$*~E
zaM7Dpfvr1Q5{ZMMho>dwG!vttHv$9Xzdb<~De@<UnS)N>>tKZ(pxoa*1u7MW43F!4
zket=bJv2AhoJXGf&3j>829~Q=%HjMr`mE?nLzal|X0Pd&1i8+a57@n|cOv}BtyeoD
z!#St%uB07-E(ajMbb}2fr2km=76;rGHj*gca_h&Eu*Pvs-S#`&9u=zN8HM_|{w>r%
zJ08&aY}-)-4~&R`heDSXE0XrQN-oG0BJW;+2f;$YLkytA7xVK%Ps;h*g>AkAGM1^A
z)q8?u1ru~!c~-rV;Uzt?GQ{+<@F6xbwF(AsRm{VY<5tMnk=w^+<GOL=p}D8lo>d=6
zk<kg$!2pjbB}D!j;7|AfGn&hd6~F*bXKcutW>tWLYBwlU1S8CXkI)52#g77UV`$3f
zY`&j=<Ab_e^gVRKNPlxH%NJ2pFFkt-TRmbivY=A?6sCUx8%$!v-MHoLgW_7AJZO0&
z9~U`B6x^E~()vr%iYC5)e+RVZe%Q^}fA?`|jASSwV!#6agQv#N^ildoirQMGT0wg#
z2h`vHZVwN%_i%gnE)VxdXnE*=Yj2DmG14Vd#K*J7bP#NqIWsHM5HUY=x=F=($4&|?
z*euoQ*obgI-c9kd)4;8a$MkS7`+y6PH|WJ3R&-pZd!YHMV?w83^f$*^TMm|QEb-95
znr8_DQ8ui~?2}`hk8){$W|a9vIPQ$pJ<PXhKNzbn9)%nS>t)F$UsfZ8+df3RmU2p5
z_u{=&u@N>BZtM3cPfesJ*VPb^VdB&up~VCt(xP4JXvox0UIXxksT)mP-_%XzXK<Ie
zz8beI%KvtxWnRpW0wYR_F=ot=oK))I=^9)BL#YT5R~zX(CWe4KeVRv=CE?Rrvg5MU
zc~CwsQhvNU@R91mM@lbyGlxz`_4VZ`9d8Zcm<f^$h3k6~UOPQ~)=hf~Y__}4!7u=h
znbUrU3V%rvX=>aykVbnMoo){w<DI^Ky`4lBA=1O^_Uu@sa7(y~=;O9z52FZG{g9!q
zS)dcWy7-l!2bPM{JR(KN!%3h1uXg>>Dv;c;bHssIyTlM01@3TF5e5T2HEKW-=fD;7
z;}oO42Nw}0n7FCGbmA4p&$diab@^Q?#7Il%dZO+&^Y?7kkf3lRf~Cv4mh>P?xVmuQ
zE_n(6X$q#BTrL)ai9EE3In;#&22XtlpiPpjSb9ICtDhT#@9{5X{7Fc!O%xR`2!>=0
zt}nzFyODn@ITIt|?zYn@ncCpPrQJB~YV#Dicm}ChOhO649*LH%12ih{R~^w*(Km_t
zt1$!s$5Mcn^LMJO@Ad_0abLQ=)OmY@t)M)r^k>V6{la*aL<o(J`NSRIb<i|fT0J13
zDnU+K<`J9o64`t(!&W+qJoJXBL^lQ%3AYFrG<JWxkAwFuS_Q`$WmPWp-+)}|p})S`
z`wX!tJ78KigDE}G6cIu?vb_9Z4-o>2;Xv_(l1=W?{U@`W%tP}S1Q^+O3A*359IVV8
zYJYtWgv}Qzf}n>lwD~O}_+Y7Ire2^RqGQs7Ym(D@?KWIE2JJfpbk>;&uvwAEYJX$J
zAzh>?`lfY-t?P%t_K1G?n9mDV<~aCqKKuy>aL$KMjwTP5HDIOikPYlceIz=;zq|f|
z*TU9~MUR3N10X2kx5DjmU?(<7AELmwm8Cqu-0DJeyTpi;Utv<Io}S_e*Lx@$J%5J<
z)bQQLGS;Qe1#~uSg0^cE1B^C(h=vueAp`KHK9KNxih<54#qztF@x=TtgHyx)M&_2{
z2#t9t)OJaMpmQ}B0*L~kqOnN9ztxo=51bRohHYiEynm$L2OOwPqORSSqEExQGH!-U
z^kM*V<UxJK{Cr41NP@w_k@1i+ki~VLqJ9(g&k0?75`J#NPbFUB$sAsCU?%KaYCQ0H
zn*c<P@|Nc3W^Qz<hL(P<3dcjtzvb~ypl%rgh6Q1Y1#9mw1_3jys;0HTfv+#(i?F{%
z@K?|9pd^bOE_f8q1Ry)f=V1A9-F)%Vq9k+QH3dHZXT$KJf){Ee!Jq`+%ffd(HGJu(
zr(>(p;B6o<OZ~mP|A=;mh`|_OfhqDpwhtNnB~9K5Lf<KNW~+WT5HHX~p6f`v$5x-E
z?X_2{OTQt4d{?SvtAd>{%^t?Z_Z!i{hiC}A<)4)V+Xd*nMe}KGg_Opab*;e*sTBAi
zgHyq*v-fXSxG6)Vf5DJLbqM&ji*ozw9am{AI>tV#>snBwAm4okZyX=TKm=>xU+xU<
zDtu!LG7Ny3@DfkpfZyPPt8&+yWJGX$Dt7)NJ;k3>k6Z9gh_rcxDLF1;A=LMTkIVZX
zYsd~?LmEZlCTjni(?Di3xFjs;fNsZPJiP;sCjLJ^3UDG?)UOP6MZ`qhp<6nhBt{Gg
zRt|Fhv#o)VmI8`pnD?=e7+4ttmHg-M8<9Y=TpSP0{lO6vS46{tVF^=<oHPY5&e0$F
z!wrmPY3Qew12&Knd`-om=&zEF!?{k51Hpc~SNt9WF1}bG8)=5`n1An<KI9L}VUo0*
z5**ku{-cJ5$mFE|mjcm$QI?cBTByu>w&(}dHhO!%FV&{p>PL!22Ae#KmL`0bOj=NW
zE77Z;l1D?5I4M@61u14p`V*yJ4|~jiwG+G5J@#{f?eG%*Ljq#H$PKlE2HwA<j?P!3
z(})e+TO&xhdrGT%n9VEOiF%b=5R=~Mb1Y$7s$L%rirSm^B^$ZuS!TcOlYd-hDsq%E
znEYiUqfu&rzfSag>CREh9h~IC49}j;X{+}8TCX1)YU5`^&9)w`fnCs}1hjbC>yz=}
z>JXD*$jxaTlzS02QP-zy-=x8KW#F}Ero_Ka$hqi!G~TZD>l;ZqA6L(KW5w6QuxaN(
z;eN0K$EnYi1oupp09vm8j<3KqSEBP~)DWuwPrYbh94ecCYBRy&+=7UftibjqPdxd+
z=fLYnvPsm_d+c8w+M24QfHkk~gyUe@PP_W$U{u7(mnK-8aA0)<Bs6zK8zs2U=e(I6
z0#hBP&Y_Ls`?lM7(CqrltVztTcgDF33laT&q*y*VDe2yd!M8r{Nj=+Wd?wvOvxGBy
zzss}>U@>xbX{9491p=EFX&DQirx+ifZI@JZ&_<YI;)W?M!FDQmfsryRjuG7b6!hhI
zL_z&PYrP}+sY%~jcYME^0;}2Slu2}v_NINM*V$HKCXeCuwpL`6m3TE5mvioRHMjdu
z`B8)w_|ADB?_XWwvsJ0!O?oYQFtP7`v7XewA$sePYohbgwsyQv_-xMPN2cFR4PE}b
zPPRt|ytSS`obh|NDwl)F8hRT{iVUxjEiya`roleHE>g<~nb}%UO{fw_2h$YGDni~(
zvM~xuSDH<H4Z;}1{;0b}+8w_ZeJ}SWH5ZIzj|T(~#y(FOhbKq!`E09a%G<)uf35C}
zFgN*RYYAQ-kE$QJc&&EPM!znqp5^FX)J3vyzA(3KMIny3R1`2a{4vQhW0}jR)NoW{
zcwJl<NSJlZ?sw7aYOC3Z(N6!0qLEurxVe0=`R1fz5;ALBx7nz4y>at-#=2yMhmNuP
z{H7m+t{uJ=na~lNC-05m`4Q9NG@5`XBW($$nLHCOfu`wOgg?H?3c_aAT;-Gvx-s{k
zSY?^e)-3d^iry>Dn$fW-(;#MVstI~dmRA`S)!@=&VR6JewK^zt^jvbGN-7I#w)1rD
zEl^_ltWWmyyG~TY(F?6;RC95A!!lI*XLlZ%V25;cPVF~e?xTuWVMh+(eDbO+%%$M}
z<=D!f@pikv(miY&F#v;r;k##kQX7QSN3Zwh@_=eFzPfT7F+}={=Q$8##EJT%O#?(6
z$0*6r*JJ*A($H^yzI-kAU{N0OI5H5}++>8so%Zc3yS=EoMc>8&HvY3YXB($DrYY?c
z?nX=vZ_z!HikHt)6@qSfHaOokio%AYX(-}NR>4?H30eotqfV3|wfYhb`^{{h7d&_P
z`hlW9*eYAepM|6ppJ%vy*+sl&^}?YM1wjQ7(px<z`CghYiupCI?|CAui88zo<xFIX
zk~=BASPVKB1-SaU88hctW{2iR)znvVKjzqF-m%$DP2sedczWJOb@QtEyq&$n5wiA1
z+rH_f@4)Z&(z4CMtu5-qlU~-k4~>26e6Q6x_lTc;6-q_0#&~jlk)6)Pc71sOwa;*0
z3AMcVke_5QQ`)dyOrglDr9U&JNQT*bw%}#nO`jCOV>v2qZzWE~XhZAp_Ho76k*Qb{
z#}VEIciI`f>8-SMsr$Bsl#E4F-J8R?{j7C!YLm=Q%d9PBcxUBS&yRKeYB5uNlEaFD
zy=;qg4HHydS)FV?<zAgYpBSN-u)4okGSmIZ=T@`q))EH}Qs@GheJge@hH!KYrf@Vq
z3+_X2eRDTBPy_{MrUHmu70_(rf0`v~GjwEMhO<=3dh35CIOe^2y$P#agjAQDfsi^f
z15fgj)_47}Vdw%0(_ZbQfz588MN&WAiTdD&=H>;kp*jYBMvo{!YmraB9u1Ifu+Pff
zKdF1|<Np-|W-+**It-TsuVbT{j&()~=3f~WcU&ZA56dblAZyn~-ALrRq4GN!Crfke
zHtdu;iq_u@sVXeI+)&>e^t*Ak9aZGTDj1Ga@^#<JPs%ZKx^kNPVlG{yvoDufyBaOp
zY}C;3*yO06H@cw49(sN1TU$F_ZP9tS#knmVdJ)PMIhEi%z`t=`$LuEZoRh0K3#t=*
zz$khlr<mK%#lb>y1Dk|S_%Q5)u1(|Q^>Iee?DGYpI^g?K_MuAt!3pO8U#=0q>~vBp
z#cMPRDdbZik%mQE3%{ABDr=acn~@5{xen54Q_(~;J7zr_LBgpQpR?*ZsZYF^q)(}H
zSjZ;X%09HnKe!_`yr{W-*eQDRbV78ikRn1SsU15i;=pFBM&(n`%T=yy63T2LsmPMJ
z8={1#PVo2yH$Z}opFVexB!{EtIF9#%c)K}IQc~T4n|>q;zesz~?>L9={b2NHxHgpS
zR%)+QyV{!LK`pB3CVjGD6veuM*ZFdY&7_{QqV4wBN7m6x5Bjqjt5Iv%FRe7Yrjv;o
z;GQ-Ztvu{xc*b_${gO2h3=>J6nC{E|_3a+}_rCUps1HRYtDk4>!VgN6$C`!OK3)B^
zF?>)so4raKEu>b>X_Z=_D#|QdF;1NAbj8O{D}lqaeYCc~>D#>a<@P7twaeM0zT12j
z^9QG%M~<W?_6cpD__&I5pk*$@iY0CW<%@aDAx0NBWlmHhJaAMZ62R<J!Fcx73ED59
z^ntI^+ybTZw7_pLy|-NRLEilCnd0VGipI9O>Dp(5Z;1^a2($MruR<})zbzPU9&~zr
zxv%QS{Yf!Xc&qHx(J_;o;>1HS7z9x5R@nS&F>8C9GgL1<RwH>)N8i&|$-{JseslrQ
zgUJ{m!OwMkxX9aeMb)|X#r%OKmp_$vNU-no1cnr4+Qr-BLeY!q6=z%YwC5Gq{JUx*
zSZUTacT7n-x`+C^s2ab@DblXQbJW||elER!l8VCrR3aF<cEWq9G%)nwxGU5MwmgYy
zEhqD)+zhuzl`bUFe(cN5q(z&OQDlQCKfbipSSK+0`fWd6HxSPZ>Ej5Mzoy71aPp<&
zjd8PDOLlHuOrp2V>!(lB!%q^0?7mE@jgvX9(}SfIym5LpOJ8K%l(5e^%N|Me>o*2h
zd4Z%+rNE$^LGE2J>Kv<&#JC-}W?$zEHPb!#)P_BrRV6;#Z{;}>I-lL@F~DFCVY%Fd
zWF}+?>*-#fbEIpj`uU)}-Jx>B+**`lOeZ{BwJJ;}+HD3oFF_>&&rbZl7e8hF4$=a{
z!`$Kh$;)M_U}H5!xQYhvk<a()%PhEAa`jda*O8|0qw(`gct|MWiKZWrZ$Z|g4P-CP
z3e!U{QEH`VDMt?JR^zd;7N3xO?+RrWcE9i(-<@+K?@CF;gowu1i*hF{;G_obo^AE>
zF77TynbXpzo7}Ku_OdmWvO&$&yJA-Y2P*1U!Wy_BRG4mpPNiR9qvWK%6f<;Rdw?@Y
zx(Rks6*ypzXSkl|jnl0DC1LhEQ3AHr17H(p=ovHIjbA5O$3)xLh1F)<OSdg<)xX(Y
z1HyTzzd5!abJXU?<6wId?U`7vRum}Nb|RB-e3k^gGP}|~I#J%pZcW<b<2o+g>1lv%
zmr#6GqjI!--*AZwlDbTYaE-ES*ebAuh>wJ3;qi2fHkB$iPvP)$t<eqLn*`Z4)|=R|
z=yi}o93K)U4K4AUi?-S<T6ewTvWx5zlD;`t=b^8Y-rDKZ&kgt1=K3Z|633pIVBp2U
zWFJ?!PftO*_kgsCoL{vat-_RwzM|G8WwAmj>HUD6gr3_0fL&45vynI3`&-pnBoDep
z_wbgIZjUl=h1lBs9K8U({Qj#u{;5gFce8VL$TbP+$f(Ls3(KV;bUbD^y_6GPxdl6e
z#V3#D*|Fvh?YfWk{t7?Co@a$YYJKCcEk1>_H8AZX?TKzIHlDA9uX;&+N;5IK@r`TW
z=L7K6IBl-(Abvw6$+XF=u{=&PS046wBQ-HMQ3;M+T|GYzE}KLPYZhAHcm)G!F*ff^
zeB*^mP8xMr=67&nDGk>tMoY@Hz~1`H0k>v#-Udx}@AToMQ+!R1&Grg~jVPVEF;++V
zw)3@k<#Ju8;@McyVl6K>%S9~Pt3*ubMZdjqN}LxFU8H-{(uB?#n<YsufWtHc6+C8+
zmd8$O&+b!wCBHS7@jJ5TC(-+P@`&Gyc5Bx`U}pO~i0Z24zF|D}mt!2^V-@mDp<zrq
zl8l+ocAOFQB)_uSi9`GLjT=f*crtah#69pP)e;i4Gn=zh5w9-OKM{{D=rf@@@XC)s
zRaAK6Pgop~Lr_$#zk)OI%fjfqcbmb&p0L&K^n^ZqKGvmYlV4MT#)c>?L-{&|%rm>x
zqKEknGe6fDmB!FwjDb~5Japtuv)`2i@I0N6&!_X~C|fnv?X`rE`$)i{4TaYLGM9-X
zphtkKhS*;k=UiN2zn-dR?VgXQ{zx{+F?)^iOJCIpD3hjWOZPQ$WO^TVr<%X&dg2-!
z5Ovf1Ak}syWbu06&2?|&hkN$YAcEXyzad)2V%XCCZ8TA~i<tac{{!IG>qYY`X2#}m
zx-XRqbt$f`HkIn$%cRY_k;aidfsMlSwQpp}6cx*z#QZj18@wdCc5Ns!8@n0|6v1(#
z?)j+nS5Dx4f|$l0zOeHJ=hK?6R!(b|7aN&|leR#A(#5p*u=lDrcfGNyMqhD@cCnr9
zrN~fve241K92N6(^^`;V3A+u0<qw`-In29Gw!_jsTQxzWi)YC?*>fW{K-4v5(}fA*
ziqIBx3>&OG36#cyq7=6U589N{Ve8WDnM7|beb<DGAG}bK{Mud_62b5sw#@YIS2tUk
zH;=-q*^m4CQaE?^8^eKLoZJPFcsM^j;ne`&DyG|G_Zl+CG?1A=JKzqlq5aYSu`q*_
zUX_;|GG){e^~5@nzQKZmw1c$RQD7(H2^xiqii@qxH!?vo+UAYSIe-StgRpqs<;3k}
z@;T;Iaw6w=0dUEGbG~|AI()%1``X0!ddE;^16-UojaMWcF^&ARvq0LtAA(d|z;0l-
zVF(*ZilCUrPq0YWHt&ca`Qdka=9jyZ4w>oJb7)0zb)Ak~mCn3JT$<Zc`sGLJKF3M@
zc5w-*;Gs}M#bBri`(21mMP;sW&QXougpm<E`fiv7*t4o~AdzW6X^$7u<LGUeSqrk8
z15=MqR6qFm+bDF@`^TkcLDxqOx0>{$J}HgvcJZ5>;c|fZuqb>3=}$&Vg#k(9v%>M$
ztkXxEDP3&Q)*Vq4WcoW79c-i4H_KEv4;v<+IaB@fQxrGOsTq^IBi)XdbD<f%yJ8<V
zNA`Cf%wFyLl0*Y+TmfvME6s4pq25c)YukKLh)RLBWBdJF(+Pf~U9}!ZqDH4T+U6-{
zD>lMAQKa0=&7U75Cc^`+;o1zx6LNS!(RtSE$>DYSn$u~mOZKXG(_Z@$=?h3thT(rQ
zEPs2?gM~l;`bk`Crd;fY?4`mJ3rljJ<KdgKB@aqkzpIZUn&T^~nAt~bm{^MQE?cFo
zWbajDNw*G0=x0SYV+k}U<Pv8{7O?z+BY|^#PG+>u9-E7&-K|Yt?#@c$3Sh-V4K#wg
zSkI+@F<I$?8;O~atE5{%gQkgpuV!>R?rPUUH2g#^*V@2iJz=xC;LBw-XPiRb_<gn5
zdjN2!YrMF|Dw8Y|HjEReeRSPq8+Z04_)i**Fjg#KFHXzQhx@Z=ded%HJcHt0NN3jG
z54%0=Y1;Bg0qZZ1M5l;vUC#_Xx9D#nR)Zxxl9Cjv(%0h|>>D4Pm1^cb^HeLN_CFg^
zTU3&f^kTF&4{oeT;L_ty)n8+O`x=diKAe4zo0Cg^))gJ(hu*(I;N;q6x8XSF<SLbH
zBlkUD?xu5{oR7L1W;%UKq{GBEN;+?zqz`#&c+sQPXCZp2eRSB%F2l*OtEQ=%3Iy1A
zkv1&rixN3;+s^Kbm(F;5u@%S3g(~bMK^t~#5h+v+!fvUY#$l~)$ctZ|0E%*g<x|!l
z-3{LVF`~d;77iT3J9^?mk$LykNI0=(y&DuU-1l%R>K;gef2zcJF`Tatos=ECx`cRs
z()KwS&%K(!!;f@urfquP&E{oJ>38L*rN`xIo}i%_+WRRAuNtVfbi;OCyMQ{W^taBp
zFV?<qRV<Pp*VV(wQUjI;J=H`TrlkZXH_SkoG4bfH`WR44p;4!REqpvQRp6TbP;iV*
z!$jm)sB(SLIiF?Y7v07-Y$6VL*pzOYvmxGpnApE>K2*{Qkz+PF8-F)z@PnX{rF?jo
z@GbN7xY_8UNfbYW0ir}T`-Gw2^`^%>wnXG>)5b}Q^wjBAPxI=2o|5n*tuRpW{4qmn
zuSM|a>S(ajs>IZpQ<8RHQwg3@Jq{MbqhLRrnw{wI>sDnrXuh=D1NpR%dmw({j>LBp
z&mGcl_8k~fxt@=>sucZMITt7T<x&0qC7Bt$h2=RJ*W+8T_oqLReCYYazPsWHR%I-h
zCgMXfk8SERl7?4>!JR&<E>pG=nW&Q2yCa{SpGPIiMV(q}1oOBkpk5-3zd+qXQ%{J*
zh_~H<nIM5VX*OhhWfx}rol^w*s)C8+$@Bb1Gt(EgrTCUCZ~nvk+Hu#e{rZa^X*(I~
z5@ws)gV>hVJppM!j4C|W;}(ze?6diV?SoG>2~Y0e2z|2RhhVI>x+z8BKar%BNhSmo
zff%(|R!mx;!oV8C6dJjMn4)OR%+!?M(+VoJrsNaDjz%i2rt`@-!MmxRW%;gi!W0^v
zCzFO$^ONQ_sr$(+x?6-BH#K-^1z3zhqVyBv4+{f$R&H+2poXJkJ*L-?@7^z-J8gel
z8;KLv*_N?&Sig|z&gJ=%71?`z*;B~J@53|iGTPw9mfkbuq`^j)G@z!t*HFV<+qOPD
z9QDCL+es9=L{gG$D!gu$1x-_MIXi^1*tTX^+EFBFQM6~8LGs1u+jwy$f~|r~5s&6}
zBHghXI&Gby;E0=z@%1F%68$&Z1Wai+0u;7q4?HVXt_||v{)HM=@Q13iFDWKD6AdUo
z-wmotOaHJ(Il7CDWjq?(o9$#;G24Wp^Ki*m9R0nVUoA#cdOn`_f^$RB849W142xw<
z+Wb!Sa{9XkIX&Nt_5P~FLrOqHK*%Hjo};Hx>7p;(#WAVDtK*TiyU*2h4uU=|JN^Pw
z7=aSs!<i?7T;p0+d%eESAOa`xYuhm;$+o#{qqNuC-?s_kv87qC`-DwuVv+o;+YjVS
zQUQ*uN#j=9SR1O+(&5+yiJORnS})_ta<1F+=-UVMw-vWA_M_3v8!m1er=yLrHzf8#
zu+3cQG%W%F+E-^GPa(qkxmeRb(%lv;SGieZtH;z7Qh?G_m>EHC!!3_^Y?sU}#H2>w
zCA~BbwKTC$7*-mpGMjj4Mr7b_bo7?pJKIjO19BK|?v6ny?iZ3I!ul5{fJq!sHA_)7
zUgq2;bkxTw-3#5e&+zy`Qm#b4GJ&n;ck<PW!lI+OG&<u1BpZFJcdL4(drj+R=}+}j
zJa#KPbLdwR4Sg>xwO4E0xc3sZzq_#TYXC-q0pu9vk?-yd4dKlH>ZG!-=`dxQ!do70
z#&h!A40*A`wE(hyu5x=3_l`twk3>5LG~Z3klLLlx-lq%^-f${hFi&(Yf5C6>OfZxw
zT%Fw$3~Yg?Y_WDeH`H*jt&hp}tnBX;ozU#{VN@Rt2&SPBy`*k1D>Nr9Q~oF{DA5{~
z!wXrH@K$gt+JkWZ;#1mrnc=zp(R_ER((!{|)!ygX)ROWB<|s812&!~CZF^J=&x&ah
zaoCx(3TnPEfOUO!%_n7ie(XwsmHCT{_zS81y`zV4;H->ziU3F^88N853<=&L?5F%R
z8k9S{nt8RxNQ-j=DnK3xBIz~cP*g{K7?Sm<)k>SO%KfSA2OIt2oO!9YR%2BcnOd72
zdkQB4QomHrIrU0aQ_h*s?!D(MiY4IVn%SZl`Mbk^FhVNN;Jn+^<1S=G^cSIG%M_#<
zfr`31fNmU4#d{Pcvs9ED#Q`Dc=vo$VT=~9&@$MO}CE!UbGQZo2Y#TcL&2<36L;?^d
z?8zVVWB?N~BP_Y1A;|Ue{hyvL0CN+|t}MS32gnaL#5f;nWYc=<kDo!JYxa4%fGahJ
z$|K||d<WRh#zt#zVJrXNT-r5_zy1jO$k^yTq)mJFA~gnhzD*j8m^1~KQw$tY(KCxV
z2^@kSV@LL(ck_uMDf#)}Iar?mk9^g?na3enz&Own7^8w?2g0(tt2xqu1K_zy9+IHR
z!f7caY^+Swzd{S(6T$&|4V6__W=GG0Hf72GX1PcKH)&}q(T4ypi=g%{9(djYCnh*(
z=~fXi_^wZ+TC$f%m&pHsID`OSK$C6}TWxQlARgBdzjy?=Wq&bRK-on;e@8+Mj(6TD
zkEdYpwcQ82)*DlDYQSqXqyLk?pdR{v&1)eb13m(O9JX6Mm<qf(n*f!tr;E!P6>^x;
ztG?u?=7($D=J^Sl90ZI?5#Oc!!WV41Hqd`At;;&Vxl9!-q=E8N-5~vy(WS3lfF%r@
z6YQD@1Kd{_?irkpSc9{bBIJ+q6#z@`h7q%ShdwI_xo*x$AssWqp#oHT1YrEU)?FG9
z=Yj{RB<MezG7w3%?@A(Evp(0NmPG|hfNE(P0Q!mv0Gb5I|2R=Z-}562`Os@nOp@zY
z`tgw;lK1ToPyqn%5Hk)W#e?#c=jTD$Od}|W5C=yzxy%LtS&cwltx2+HV~g#22ozpp
zzzSc_(09lv3@vB&0fK15a5P{_-E!s$`h0{5R_%#?05u8F08nUv0$)8;pnx1=v<f%K
z=gWhckLXXM!Juv`3kpC%!*v=cXn;%Qi7b+L1x}oZtD6GQzeMT_G&zK83Q*_+uxD^6
zc)@6U52-GJ>IPmiQ41$2>1pr@K{g+XvQPdDUhx6uy0`=S%zpx?$Zswvfu?}gH20S#
z`Xi*Jp)Q0-mJ5QV5K+H^_fw)sCLIw^;~AC9OJWpTcN`p4Um>X%Jbwy13TlqW`0e&o
zlt8J+`Yvx?hGfe$eYJT%2LQZbG?gX8QB}AOuuI@sl7Fps<^q0>56;iY%Yb#;YA8)b
zI$F}`_oYbG%!am&jMl(a7{2b_fNpI+prw1QxU}GPz`avit@Iu@MB2t6kBKM%<ciZ`
zX=QSt?b}3oVCk5jfyUknRQD+^x0duZh3sy!|EmI6OcgMug!zmyxnw}J4cRga;=m9+
z-K5O&1+HKlu&ytfLKH?sD8I^&sJ<hi^S?o@#D%k}Am!xWf&gB|L=e#U0jCr9WgmTt
z1}_}r@o0cVuOwjaJ5(TEzexLowBVHV45R<e=fSY|e{cZU;)!63+h3@8`oY<G91H6$
zEU+h=mgPUfpCAY4QOKPE_)D!eS?3?WDgaGPLg@b6`ia>AZFiEC6aSlBZm@xmUs@Is
z{&Fguzb3@ty_5n)4F8qfh7Y|9m`=KT+=qmbKpk>U&<|ZCzzi5`WbeUQHfPY3fQ57v
zCGJvd37X2IDtn*?&DjMsf2NIqtOKgHtUxJRKD=^_aQ;oqBQWng|A(Y-9aRG7YshiN
zhYz3F&Tv%&ZfsrsLH_^X2oM59@ZB%>>iUuHvEYusEE$Kr<b+EO_W?E-KyYrR!e|eO
z42DW8J4o|miN)dj>%TVRPy4fj%syF)NSm(Vd-ihCZ?h0X1%!7|nU_b7ZfX6iP~_wh
zoS|J)A(i^CuLE5|2)Mz6(-3`t=e)1+4s7YUj~HMKl<+YWRwxXs@pA$9`>47NTkySv
z!5jDv`)>n@Y9UY-GSbws%HPBf3C{h72N#4dTo9dcaZc3KAsry?YRcApzCtdai;jN$
zr#u9ZL0*P<++ta1K)#^XME39Lali{)-b6k}dAPL0o;E|Ar;d9E)bg-^X(jB<IPvsd
z{}jDgi#LCJUz5p#Hpa6EJ>tPr8tVg4$7&TNuw9gGEf~ov3hc*~&y?If1D_KOXON9f
zfm7;zsocHZfI|3Doq1iV4Z_x3(Ym=+87d#s<0MP~rIa~}?I-;%XBVH(pm%CIeN4}$
zzRvX8{~}pOy7Jv*rpD?AoISe9@Y?5xv*RiEwe+6C2s)*R^=lp)PP_wj8|kz}v9jy+
zI=kP~aQ5bUczmBj&jO+&A9xI3+Aec3R&crBI0(!;?Fa^Ibas|NqMnmn^^a5=X;?O2
ze!5Nh7Z-W>HXq^y+gw;uq1A_aY3D#w*VYVPpjkA*b(xPtqlyAwyKQ(H0F)I5w}2uK
zo6}LvSx_Z-kCq@%j5^Za)%Y2VIo;KdchTEf$7$uE3KQ5w*$ufC-(Jdy?SbgF!sl1G
zVFa{r?o8v557ZN+`UDuv+BNQ}jTV0ZtmhYtLl_Da;8pHMWHUHP{5(O%cIf*0barr1
zrx74Go1OATyHW2Rn{?z_@Mi^Eg`ZKn=x++k{OrtHJJHk!q-CylDbk_fb}hUBz}(`x
zwp5|&000qgpC~}}N6I#XY6fT-jMDpO>&#T8N$zJxS85N|;Ksery5FbR(p4KlFEhnK
z&)_>-fF8h;M^8tHL&aqM=IyOx*Pi`0${G(_FtIKlOEO_2@=8kLPvdAiB1__<)1h1P
zbbun5ZR6VG_pX*>?T&~6G{7T;Pd&cqHATZn26Q|p>Dx?;Ez!&A!9KtOXIjLzR^41L
z-p=cRqT5!lY}qocvJ`yw=JOnFvjh!zfy90J3EaXIoSxOe2^E9Ok;M-_r_)$lx>B2U
z%Ycj6v6OBDOsRGA<@~{CR_9dAYKuW0IHnA$TXbK=a{i7dlRT;<$m$MF?<%WXn9&1)
z{vI7Y7JM~7gVMH|$gnpkbhsmnaHzI9AB=RseRs<09RQ2(;a?n#xEa-w0;{8Qtvx>R
zCPK4zJgV<$gN(n}pAyjYE7R)Slm7C?{ZzvM1G&}CXx<QVr+38hw_6}JhR<3#Cf6M;
zeEPD+CGi6;C^2wj)b58D;LVL_xGnnBT>yICHUosYQ9VBiG6BQ&BSiEno9ez$(w!Ah
zPtv!>hjIV>bk;r%WH_PA6M!Hzzu8;7DYMy_Akuwp1iEbqJ`9EHZcGBO?wU?(kC6hn
zuIE8!yf05Ko(9j+k}$h424o_NNWo^-*cpziD@>4VYhr<P#_<yWQjiuSYqDA^i|QVV
z;%wV06J+RJB1LMV8!EdwRpaRyNb+xvjuL@wdfz*dhZBMQppIj|ea#QH-*_Mm>b@+r
zr}97b?bR((VjRvc-b}m(_3XCyT;YKU)$MtwDh%ZKyLM|vm25X`6NlLLOhs-l1`8P~
z6|7Std1ktp^eLA)GcO`Mwm%jIO4)w>)iHeYs5On^*-U=ZZ+cxEJB}#DzglLlva5<l
z_XOR=CW)`fwR0OUM@oueNZxN+GPkQK<TWsU_kmL@l3>>?!s;g{<<PvQ_dWh5ZM6KB
zSc(28`|Tn7$FmwC6{a@^nvE_vLO=Zl%LISuN8dfr(w8ciArBi;Ryr7Q$<?<Aek`*m
z;U|?XLYQ5#Lk~M=?%jCyC47jX&}|;DgLHh41rYl&Wp%N8_PLEf%9S(M3^!57`lBW1
zCa@v}>-SvO)je$~%gvITkMr42P=e-_zgc=bwtaO2(lBT{dY~dD;sc53^_Ftv+KVCC
zE?_(+=lpK3JN7Sd`z+4BzneLZwD+cUGPnHlD1ZJ{5LV#U{YQ-53~8^86Ae7KEG1oS
zr+mPq3I^7OI_4ql(XTw>K*g`ybA2@h+i8b3tbXXt4ph3=vI*4k3^ObPkffxaC|@n_
z3k(m}nA`QqWEv<3sCsDND%$~yinw)hY!*B=xFI;W&LI0(-~?x4Xupu%rOk0c{#3Hz
zqP@W5P2`nr_&HE#uA(7zJU--i(Z4f#-$kbR>bBiNW;f)dZrTgW5b~$QjhYtmQcTKU
z%m}?+v9(Nqw=1t+=yrt1{xhn3`N<UVB$ln}o0X&2%I{K2_zpZ_;|wQKOhpHWcIv4j
zHN}jPQfm8}>n@(6iU!AZCuh%eGS{zDO5<)APEvM^@#{|}bk(6`@OpyOdC6>XhsH5A
z+i!0$K3*Kp=V5(DqnH4-ET|-P^N`sBV}0Ojusp^xLJ1go)~d1N#}uoeN9hM+E~13r
z2iY9&m&&+K)8|V}TSKVA$Aq->9HkV~zJvs(SE|i=Y@`QyXqYoit=Ex`%*N(&+csnn
z!Kt{^^k2g<g94f(Rk(&6Na-^aWg{j@HJ?RC9vCNi_pwDv9j+#HaM%dJE*+^{dY+UG
z5-wV4_m3p?m)o5h(0&}$lYBLV3|!fl87$s8-03TbgMwkZ>O2&fE^7#?yCUS|$y>N`
z)C%s`(^}vDA;`x<K3?e8)&44ToRk8!)l~$tm&=2*)Nz$>mcy2}$@;BeONDY7g(N?_
zl|dkqQBMEm%MRU{UBxJj^}v@I7NkXr)|qbd1%0ZCAdo$4kLhX#XuwF-Q%H@?b~svs
z@a8MtYV016JwHH^@&dYLZS8e<DZzdy3NkCfLQXn#quVoV;^Q$qgbx|zn4lCSGckHN
zVc9j13E6%&(eW4&5~H(I^dCuB_wPQGsoRdDH3we%BTh6(#cS#dTYPYoE^=Rym!&>f
z6x?!7rMN!=jk&2KOFEq$59i5uwQa2WoK|jVTO6i865cHBH(Na}DG+%xV3f2L17x=6
zmH+$~1Y%)o(mk-fW)D~kRZj51$oDSulcSmG&%;Z2`IVQh;|XxRomApme#2Ce?y95f
zHt)K#f4Sb(e6g(s)wzV{>Ab*q6J(!eQlPa7nmUiTp@xZqTAret>t38BBd__JN++`j
zYdaJ6sOTF}cYya0{n!Kw(|i+z`H88<89m}MZEHO$noh%DTYa2^5!(leO^fK$2iAo%
znWbJkWp(9mKQM<-KwbtdFx>)&PIUh|ZSkt008vFmJza2KjYhDI`DJi#E+o7gq<IW4
zcMNa0!$0hNLB*cd{()tiM80^ra7IH>tFXc|<^kI(tjmXvC=1?$KJ?>ka>ble#bzTu
zJfEwtIuru+obe21u8LXHI8aW5J$%gCdAG%;<+jW#n%A!B=Jh-_Z{o8xrf;tE97jvN
z-IS{?%xb*$58a&l4zp776@k4LcCWQIZ2WrVVg<`m{X)uGGTgu4*_?Ko3%+)FUIycK
zp}J0QtDc0B87u2mqJIF|+Pa9f_`sH-l9I)=1-2M2Fc~C{Co!`dA5$%jeNPyZaW?04
zdcA}+HU7t|p`c*$`+HYgW`0#7`nQKk(APzSc0~&@nm8&I%`j`pnVNTsyz@2XnEeIl
z{MPUN035GSAd6GKetm`=O1X5LRi5&ISvj|37xkzXAR>N6!W9MimT9vo`YHOd7ktO*
z!_4KA5DZvad0n}^&WX*Y2*lMF;#z%1`tZ~&>|WD5SI@~^%d?9xRMouvSk`>vLaVdU
z-Nu^ZFnFz#Nlnipi|veX)199|2K9yc8do^JKktZoc-tj`-1Q*4I?oRH;DcbJ|8foF
z!tc#oH)HPWMG?J?vE3Va6D8thc(XNHI5`18Q~}GFZ&Q$?IK-+5o4Bx5L~eF!epEFI
z0c+@RRVTe<aqq@k>CIdeguUVOx-9nGUEz}aTFIynrVa<yBZud|CJnr7-wRg^Dyk(4
zl{;FliN|s{(GGRv_z?y5-uf4i%=DA4Mp!^UXl`g5_$qwXTo2yojThcCAd1qSdH*)u
zv=?9EwN86&2@dJWPT--bQTCN~{iUs?mH}f)ls1f7;Sv-J;Yeys7#ddav7-hI#XY};
zL#$W_!&}%SnXebuw=#5b^-tFaVs4b4FXabV9IX5Rd5D(gG$f9O(hIq0;LmUwk#lw3
zHex=0Lc=J*D30?*iS6VX;J5hH`F(c$LRXI&{_WNiPz3j!HHmB-p2j*4<Ws=0SWa%A
z=rJDEm}cw#Hh)X`y*wj#Pn$0X>O4iCc~8)zB6hveJjSEJ(NcCl2uGy8m2J4oahlw7
zx{P@9W<z}gUbdIR0FtHa;)2X63((J(>FxCru9sGl8*(9VpEa(A%8HkYc>rqAMw9ll
z8+@kY*Ug$VQ@R^W(|S~C3&pyWH1y}Iv`DIg;$gJSsp1q)PF=rN0?m!-)+cJ`CE0X}
zHG<wroS89-{`yX!`V^Ac`sz~Rf{=0g@B^#K*@RV$YEbGiCXo{lC?0-Xelc7*zv!Fb
z3gGzI`nXh4RLy6>kl@4hUtd)DJ<xbi>Y8MjumhBM21m>tCN_ImyeGc)^H3-dFet#G
z#a^jST2+n3_jh=zNwZW$v_89ZS<uLus$+5RotE`J9DWp%fp7DKjZx*6023Lm?Y{h$
z?AfQCDpH$zp6`WHuK_6f0m`RJ8p{+b_MD!(ItDdNnWx!CxXO=d*i-rT!EqMfKCmj$
zJoQnuH@;PXtbZUlN<%`S4^pR@O7kv4$=N}&mrsm#`$G6ad>(Lts#{6#GJ+kxC#L~r
zY~|_q8X3bXdx?|%ZZ`bvQs+d3Qh}cxoLbLMEl_B*lBp$W-1Q&s*m|>#52EnhS@02A
z$&cS(NH+ZWh%C0{Lo&<YU2PFYx$C{PCh62&sYpYBvQ5&yVV{P-R&4wlISp(gP3&GA
ze@B!=^Z@USC9R1C!?O)YaBwB3e6DjNG^tX-P<jLMk@^UZe+1laOGjJ0kJ1(+*fcWc
z8T;nLq98=i$Qftld2#~RDhKFIlH@obpJWIN1vKjJo#q!`%x#De*BQaTiv#tME_J!N
z6eRXUFJA?0>V2+xy={8_vkfPbg3tt1S-)Rnbq1IxMISPq|Ih#rQj2F|p(%4_dOJ;D
z#WgfHm9&pnqOL4FK4n(2@-=Qyz++{{PRUe|t#P-iLqIRbc(>QrgaWan`30|1NZBgA
zb-XK0yM~+yaf8vpS!0hn6hPx<$N8$Q@dHSQ)Qj{P+oX=L%WrI2=5SVfK0Zn=5s>ST
z8z8r1TIHhSY{F#c;<m#+f|vh?y|)aja^2oX0Ra&gVSzL%Ac}yJ0@4a75=u#<gp+QN
zE(H|{=@bwt>24;dbayjRx|NzVXS}-B{_VZ5b6w|mzMSiP_<wONV@{s;iSdka-}kuR
z+iwSK`yfqZ*z(f8J}{J0{Eja8Ja++iW;;lV4=-~iWZtNZQwhU#B;IuH(17!iY`V}3
z3S9F*f!U}s*QKtT{#2WGL^wF)PY^Ev_L3*C$jz465&s{{Tv`pJdj1_N?-qBW&4IXh
z^CRRq0yvPeaj<?t>gs#<znS%`s0ECg#mmngAFUm?HNC$wby?YxJ|VWO_Ktj(B2x5<
zg-dgtC*<;;mS9|<>*Nx=Mk&sU4=5%A7+$#T&wHCjYQJpZ`^sB$l3`!2taz%G{4NWs
zYWc{L-EFANFREm}x1|1lwfs<&y)mo=w}fwHyLZ;@XdiQ2L+bCgFrb*gXSimcNA|V2
z$W3CzxNw>U0Z`^IK+A;A`;;+va-s#xaE$5BmF4mIt<2mS!4k`Z;p!2~uftRtHZ24z
zos&Nf-%qaA81+}|k0>!xLFI(rISN<PiLRnssheD1N@w;znCv99a`+U1?Y@D}E|7i5
zq2(TsNJcOmd}Kz*{%dL-r7EWJ^Evg)2;XAPp=7tSTIvX0sPTxdao-_*S%3IADbdhZ
zOSH9QTT*p<WOKCmej~`q+Bo?{)O2^bPZcR=o<7Q&;@RJYdeVRC{*n;dwVnDt;lig-
zl|t2Z;+Qu-TA)NJ4Sb6VB8y}xpH_ZMy2AFOr-VDU(Xw>ZQ>(%vzRX(Sx&n8K#HFB4
zx%9Ct>-AM~M#YPGDo5Wr|1!W|i1AE4U&%mvoc;n2U~52*oyqJuFai7%7Z2nMlJiEc
zF9ZK&eA8guF_w<KPnGIWhG}Y~d4^%UVf;uk#^V^o)(<Zr*v+Ykg}JraJLR0dsr6S|
zds+MsiAOl^x;&eII%2*U9d>m^aF;7lmrDsWz>4Mojr6Z#K9_RHc@!T<><ed8pYZtE
zlyH17m+oXtO5&k=!ee1pM~J8BTkuq_zRjuhE`N0)AHJ4k%3b0S&4*4~J-5#vFbDDk
z%XHA8<*P@DU#XIk%yfHRREvmXaU#Ghtb@3$$~fPl<gOfz5(cVc`vnaFo}M9v7@lhu
zp%J|16w}LAD{8vsU7UWtYp&LNlLqs%=>c8Zm7Bvd9M@6`X66;+S8mXaWk`PQAbh(R
z$=yo8J~wO7^C)4aD7bNsC42eCFX&HFyrR&FAfQwl#2wLMHcWvv2Q$MgVu?tz9oOdL
zeJ+zLcQEAka{w|MtG||FUIDpT-2s=gKgDM4{K#CO^JG}>7w!E<RqoS><Xx}EdaB_b
zjgpCNOP0q->lLblNE{GtHhgb(RnJ*De;6Zme=+GES*MNGD{uN#o>pm_p_KAPDH=g4
z`{nwE*O#1L0|M?FgyYcsbIU1QgZ4KNAymToxlgsYz8Yui`K3<+Jj?3>XHFgyd~W$k
zM_r(@eD<NehtOJ;ZCma*6*cwCWVhQPH2#(V;CwFN*x@t+$yry7Hs5CpRnOSrSUw-|
zSGyfvQ-MMp*L6!u3*-A56j6;yN$QO`v~(-$GD#l=12>+B8H`_LpYB5WRojwh2&4D<
zb7NX^)(^J*{Uk(bsXv=Sq87BIu+*4JB2&JAUb9>2onUsL`WzFNe7{li!7XdMG*XiO
z+-=m&IPbl>=A`8978C`ftUPS!UKc=jnWgqVoV8-Gs?5<-SW2=Ynigji;8hUC<3gPx
zXFUPP;a}B#AKt}KtcoV?_-4?b9iJ~ZFpl}iU_)MZAWdt1m#eilcS~_;dLt;wp~-2I
z-m?dvk_JudXfZe=*V?0UFdJ(*p_m|*Vf@zVs(3POadbKTTS=wu(B7?nj>;N)O%=c)
z+0qr_Ewm<74Vj>ufbpjz?C5lf=u<B@2ZzraTByl54Bv3@c#HCR?kL)xbcsfV=F;fa
zo_5dtybup<T9H1RpRbvo$F+C^0SAw=iVFMh=qS0=LHn4MW#V<|pKR5dC5zBdkY2bU
zQJ6AnjY_AlJeJW?wTXBcnjD%O$CH!&Bb;}KG(djHboZ(ndZ(D!ok1A-Tpq3KfCTi;
zr{Nwb)&mKhm=iKaiW4-88^=rcJ-^Y?k{zd$eBtd%kMDwg&8hV8(3w~NZmR<CY4?O3
zr*WsuPPxK@!m+PO?_W_nIZM?F@Dw=vyRcj&kYs%2xxelpO;reO+OIr&H68DIwD#Br
z*z8?-qvcVN{FklpZIiG#Bl=}C>FRQOYweh>Zi}9wC*7X<2fX2uT>I9m-_ydIsCnn-
zZ0BO$Vmd-JH8QcOu@-MreNBCR=sA`N$LdUYiMztr7>Y$~)}M+hdk&~LB-0%pG?$pr
zX&={gZ2cVRczydg<)y9^O2bt~>c?-siFwjvZl1Q1iS7P6iMQBekMS)=g9XR<nw815
zr1vb+A4o|W1)TcyBK_!{Sy?4YIgJcU%lCMc?zM84iypr!+xC$4b)cg@azDLNGbukX
zquc6h$K@ZuVBlji8xY4D<MiWr;a+KkzfGMZs%$-^k=(^2!X_qud9mw}mUekZ$I2_o
zul;8P-`?l^0&3{Tr3w$yR~?M3p4+2PEU(w}dblMDhh+lAOA~0+<aMNJsh!?`ipx2L
zdK*3v)7L}!doDKzj&UWB$!xlYcmFV)gDp@|1aAb@?cuqOjx6}FZ%Rk`N@nEUTLT(N
zvS4F#REc^?934P|K}O;aqq$O2MFXaL=C5ZL)5cI;Rxc90$EpP)HG4Ew&Df%-Fr(?q
zAw>Bf(9!c)BZlhI{(L3o(87el^qj2dPVe5Kpj{5-OB=K1g`{ni@(2N?7P)X!YJgRn
z?V+Boe20s)7-q&b0UOB4jP>=j<J*mYMUVC6O_^1I_UWFvr$pnod5X|c%V&=jTJM&x
zd8=~sXjM!<;_T`r^oPh%d?zC!W8Ivw`arF#C##w3{9sNyT|vT#J!*%~i2j6o@n*U{
z)9#@awA3|XwPL2b+5P$gC9iy&aC~-OH-?}cZ+*!zRv+7%bV;P3S}0z8JWDRgWuz66
ze)<!BctsdnY{O+>jY175+mt``K#L1g0ud|n-iPCwEq_9+fx43xS!1aw{U_GsIs8q}
zm71a_C24bI8I00+@|NRg4%^Rg+}nyn_ObzwLFapHmiM$^dX7RN?Utfi-(;>jtx0UE
zN$V3!{`pI{l;umUllF=?J&!#{%;`Yw{buKq`M?`l%)&3|dfoD$tHK9_OSbF&>x)dm
z2kz9oCdxPI-jlOl_WE?R=zLLvpELmA))L(JqE|}8B_)%`F#y)Zm}MPeG3bueUU#dC
z$iI&K)l0R)Vpo>UCZUIn#YSs>rX==KqK!nMp5)=`kAuki7EGX|Bv<)?;Oy43am!Ls
ztNSOC5B5Ulj@+-+NG6}#rZ$^yxv*p;F8IA?Pd=$vK~y+!u;yISP21VF+W`rK3QIz{
zMQd*eNmf8Q-)&|}qUF5RuXZ?H9^Y#HNu5D{E`-3;iO*y9rCqjq(~qvGn@=U$q~CSk
zo_beCEG#O(H!^yK%JtCN9EG(Fy}=f=^nJ&n-RHdFW}N5#7aM6EVl1t|h3UJs9>$(W
zv-si9_N0C{%<uWuT)hzQakOh5L~V;vOG-9kIM0h}xGQ5+zmTNlB4$U;BY7#i?Zz*F
zx@A5S8PmLfo+0hTQ8E9qqT=Ap3-#8`4ddyHNv`+@B7Fi5_PcvlPnhLLnCI8>Oe6$G
z>3T3WC7-x6PA+tHokoc=@e@RbUZIkJb1u5@s!;P;Zz)Nq;8Io@k%wMDOZO3~S@bxi
zNGEDP307hM;!dpp+^$(_<aURM%xZXA^i{+@#C&gap*uk~Ejjj5lEd#9yJT3cn`(5$
z&)dj*$H#-mWtf2d*J4X>;q~(e!z?j}=ZIHRs#_jc?w{cq{nDH><I(G#i`Hl<QJ?Qi
zvWBa!kxPc6eSX`AP)2}?_x5$Firq1$)|M%1KPme2*RD|Ft@=y_KIH5mV0*Rc<*9QP
zFlp$fAe+p%ObG#@y%*JSPmFjW%2VpXF4>J$mupUaBULe~2o1*X>Sz4G@sZEK8TpcW
zZPUBD1LPyIU&2S5A93FudrV+l&~(4+esofib0TSbT-S+mTA`#n@!uAbn1gs@doPkh
z17UMb1IGfp<%7;3a9w!Mk&OOhQ2?2-rRfc+g?l!r*G<6cEG;v#=P&p{5(a_za2V%m
zoRx%~ri;xEI@eehPxxo;3HybE=pty>gGRe*2WUy_8++}j)(Ks+5$D-A>}~k4*V5u1
z#|pbRNIG?j?3=|OxO*FslBAe6Oz1j1Sn1`;bPwjPU!xJA|5i^R5KBu*$J^A5S>xFK
zas+t4t@=`+j};1K>#VySsvuIn4OGuybBg*@4YxF@zfsaTKVKu|ot1&*@M#+pbz18C
zyAp8zW2z7^<X(tP#bw4T{l`{IaX4ND<iUP)hwQSJm!5~z=83#u7Eu1BaU~F8uTWR!
zA*;zr6%{@&7cW@6`duu!B?R*{iE`%N(@*bBlPI@iI3~=YuT-jtUn)d8(=_00Z=YlK
zt5J(C$)8>)>4$1-)U{1YrKm*!8<NZHPL~MC>Pd(&`N^+prdqRVtneOi*kt3~u^W6~
zsqhR-*28-hV2(I4M}IlJs7<w4ilr1(6N^&ZQ4s~2p{VUq6HE7p(;C<3p?M<59D0aJ
zO5QJa|D_eMNH#Ml`x8hRzMKb~<)PczMsg@}1(dq{)m)Fw73Sd=&a;DVqB@)jhkT0!
z#ZuM#oPpswyQG5iiHQLVi<_<XbDz086PaHfv^T_?J<A!!sfaX7?qRiSJpx`RNGEZ>
zP))fcr~V;LNTow%GVRUgV7n+8eg_b^$DkL=)-w~vYZ!9=TIu~E^p}>fN#Wi@!yQ@c
za#ttXEwh<LTj_qbZJW(`^)L06Q_yOqqE;N?e>RsV-g>2L?ozqJn>CcxOr_0Ocl{Ep
zJ4YPALeV1p9cSiSn)h!@5NJF-MShVLxAIJAzAlG7S7MC)?K0MzxI!Pthq}+(pZ$pb
z^ZOc)(_;c|<n1}lrWEQ1XY@&qDn=$;H{|pKMk~smR^*ZOH3WumImmsVM==z%gNo2>
zSuz-*a@#=4OGg)p5VoNAjYdjm?hlbV&dpCwzIsI0DU6NFH|3+L+%3;1Q?=HXWiJkU
zq`jTg-X?l{5WQHiBpNGv@SUuohWzH|UO{D~sBH%x)-nQ$|9MLYrTnrt^6#duH!IVE
zhJ`YXGx(D&aJF-8O^;~tzJ5aM?tGzqM|ZpTYz-Zig+U`|^R{N8`Of-gkajC(C?(*X
z_A}NO7}B;|+bMbZh5kO+3&zhgVia6|oa=V$uWY%V$um@z5gj|0=X3uZ-J^sUiIVVT
zo_)L#v%EUOj0M5+r;+ho4!&OMdR_#A%>!K72IFRh?QPIsr;)0HH(XQj2&6-;IYXLh
z!TI~r7lm5u;jj6mN*oIHo^!|k1~A^x4H;xj)sF%9LxR<M`QhpEx^jact5#S(_oK^|
z{DCq8`7Un1Y1bKxZPc%b_;HIoS#RE|E85V>J~HrJD0Fm)ip|=7&vjT_{+E5`xuvex
zAAGXTwIQ9;*&!#_H2r+@1FI&h?_54+z`;2_UU|5x`H$b=%J`|G3-$4hzb;b;oDP}%
zGTHFvEpbPbPwl%Yif<R&Z_`TC-FeI9efjNW4({t`FOZiezG+A#GC6Pmjl}vi=>wcP
zg7Y%?1O$(8De)=EFI}FDKPt{ht+2#kS1V01H5ieT6_%X+{RyA3I+!CzmFU0D;lXCR
z^TMj(;<>#@PqX&~`0HokuH%-lSF?{k<)v{Zip%hU6xwIpqd3HIUA?N-o8WYTyGOAl
zre^2meOz4p&n$_UxiK}hfP{0D*Ywt8_qJDP`lc?2PFvw$XVF_)OBDBctn0A`;O``L
zvWM+vxzomMG#H++8HhTrj<o@_^TB=$w7D^0gNWtwr(kbchB9A$RLF5Pt~(#DqL&F4
zo34U`Gm*OVf{`L9kILMh+aRC!?sRYy*`Tqky473lg3Z#mpMbPS==K^kGRH3Tq{uG2
zAMRs=#*I32ix4!Yt=8)VH%QR_>bcSQ#g~MJ%i~~*f%_!=3q4gGb9ZGH=o)H8>B`lQ
zbjI>?mW36Y4(`nucFuH=0m%KXCV_{?e5~vi_p@)WhpHTx2G}>g`!O`o=LlYYj&)rv
z$o9)WNm;%QC{-)1$HA|oUEtb(v_U0H6C311D#*%hzt}faNJ7JRWBVeOx+p6l2?*<(
zVNx8q^ZDC_>(A2=F=+0_i2jP}&d=|BZwlICuc@pVJS0+9(Q696WHjHElypatSCZCM
zCJ$$ezwVXf0J&JAIPRL!;O6o$cTp#}CYXX}43$RXNqq8jI^0!RleDZRV~gKHD*1T9
zU7?@VH%rfHhFr_D7#A!AHt&%~sQR7hmNP%SPs{>ld{gIUbCxc8Vi~?ERW6p#2pCYi
z?dzQ%I}QQch%#)Ah(=J+hSPab=B49&YoQA}uTE3Oz+{guLnb=wd4lvNyJkt-{*K9N
zI@;8ql9Nw1l7kmq0?LQs18qQsnQIiAsI;5)_m!VQ^(kAY@E5xNdSE#`@HPCA$5K*Q
zxfFiSH7g*?VnDvZZf89~qnYZsHj!lSDu&}Va%QsTlS9kfxLFFu#P_4V?9G($-vh48
zengClqj>l{CY1+|LtScKq@=PgtxKe%?bVmHc<^|y{%m2(jmp<=j8PET{Wc%KA_o;A
zpXor>DQ*A3hr=)Aa@x;G(>{e<=iEpGOr1;(4wku@^NP(-84hkhL%`6La`^HPO}^>B
zaPfOCiHTjIn|ZRKR!5l|!}+D624~Syn^KfANKB5XjLbJC<hy|FY!oS-AkPf>_B|Z0
z>!+=BAJ5=TZZe;-I=1MZQNAy8|C2X-;q}+;8L)sH1VpGl!~CmjA9sI*S8-jE-aW18
zu~GU=0oE`OakMk8!kvwAT?diF;O1g~W^<!84jz%Rg6Dq7;;`*(&nfaQ)_CS?DmtE?
zQZwzn?otE}{N^LdKMgI)XCep~G}lf(OK78pK}XL#b}awluQq<c=`%;vUhL-|)6d9d
zQfi<2(HZFg?8%}VOGq^KJi#r-mG&f~r`Q&L8E3`^;pZIjFbh5EA&+NZq{<U_+Mib~
zq{g=8J^flx@y5J`x6~3d@i0N8+K8~-qNO<8R(H8ILsM%6pXcqy)cn~~KkGB^1~VyL
zz+JRd84|dLN5msO3Fn3nEImvnq|KneL`pn3f?adg?O~G85fFs85SY}W!hT5=b(Yo-
z2QLs-gOt#pmrYD8*~mZeoeRueI$_FrRR8C2_55e|y6*?l@OQ*PZ~whCC52OZ31dAh
ziSNe>GAiRT*xoVAMnM0>$j;zIifm*B_oTO}8skX#>8(<S){wX>!Kz;UnQE~!t&y>C
zFsG~T@@CNAwn~hKe80o3rC40I&jBE98)k(HqP}n#hrP;JM%56n>A*z-ULI@R_?FA^
zTlda%$=Lr=J@_FNM88tn)m80!sTU*+Q^)iwg6tYatW0kzgbDYjDTHwYrX%FEXU*!o
z2~xEy9qgS}V0RKRdg9>qQ$+C?hX?>MoEZ{;c&vOS6Ro;FH~Z=L-@-3q8Tdq~um>S_
z<ZupN;NnsL;>gMlfpxiFXRCn^e~62J#|vS&-LVS&wL73DdAKp#o@s5;@dh>v@*{}C
z9>NA`X1z~@i%<Ug0%PKt)-Nq4uzAuW?#$hT&y$PQMr|z((vVQ^z&TZUCn^38l$^22
zfr6i(%}w9^*GS5dk;q2aoq>_8!H!Toca|A;*Q0uIqu=9LRMdkoY6Q_YwaeXkgL%I@
z$lU;(HP{M)K-t-F4bm6=GhlKt#%APPX$ek~z`0_CFCj^Z`e+Jg`s0_ZWZC=^mW%9D
zfpkIx-Ql(w#^iZhgVhv-7(3o4Qoo0b<MrSa9Ib1{9ZICQnjZZ>w!qwEOW}R9L5ubE
zp1bgG7B%3JwJAN;IX-wwt+%|S<>k0(&_vE{H?Jrf1$Hi34P<&`_-C2)mU3g6YhIL7
z-?)Oz6ijytviHMy=C1kSY?ab}{)Eh;!D;fYCcdX~7m#CcCP9hvhMuKXzo3t&RNXt;
zC(JN}@!oS?R{yudTeK8VzJCrTxd{nf=3ip26T{437D?lHQae$0wu?0?_(UcvG7bVf
z>Kww>zh_(V8U*p{%<WDsm&6X){jM8dhVc!<q!q|LQMw1)K;_$9AHnG%8Z|s>f^#-{
z7YNJ_?F+@kzV_=;Au9@VaGkl<?S9H90R;j4bo7iOvmE>}qbAHt$tly%tZ<~>?!;B-
z;kryc5KGpRAkF&dnT@ufAVQV>znKg)ewt3Tb8)nQZR+lBr-ub%19JA$6Po+KM^^ku
zr&|m9)cj_l#^;Yg8r_Yc=;sBmsL0`G{_GzsQ`ZZ#(W~`m{%7_Q?eJ*$EqGuPb8pOb
z@&j3(HVhULZNL_VU{Aq;o@UqwY;v!W&Z-u7mI1mM*F{7Yglu#}md{@AciTXOkQ5wR
zsK7hxO$NGF=WL}9%`ac9MAnx8rk<5N@N3Hzc=|gzGY{;?E9~8Iroixz2XX@*ZlkuJ
z3R^&N?CW;4*iU>)F}2h>g<{G!pzi2w<wTo7-Y`2jKK}vIhD5}jPhe-#Cpq>pc6dM+
z`{myzPO+-CC7ca$I@AYSxP<U0ogakTfuuKpD=@scUHcZ;1@tCMlR1W?UAym5QXL9g
zc7MD#P@<t(=XRe<Hfv~q7V5(`2(9CQZq=i)sy8LQsuV;Id7>$`^m5YC1YfK#!fx%l
z3mZWWUmv?p1RW9m^9dGq4kN}_kC`f{^d6*oPW-hrURmli>2>lM_Y+Ip09YvS6KseY
z$kvRlaogWx>B9_tejgQ%r2M<39{Z(I8_ArO2m+D3-@i`k`s?#d&y_XgP?UI9=Q@;H
z>HaWLcHafugc~NgsQb>~C%;OQ^7om=4M-IL8lT7|tKMQ$HSVWhUdC85x4Yf%xSTWM
z`)oB<^k_#PnoJmyc|M-Q4M-<SfEa>{zYf9ZeeU`Fa1~FQ!GiUOJJ!XYj4?HC9d=9X
ziQOr(l*y{sSHID$=18qa^b7k`IOi#qT8ux8<uh*sDd01pCiT^z)fqDmmFvljRrj|T
zw77L4a*#hscw&~CuS<=e17npVcdQ`abI54oFmip|op<zAvz|OvqZtpn19n1n&nxrn
z79Jmq)Vr$=MgcJX66_A+z~e7K+nkV$-c0$Z_zomXYR>gz{1`wH>rU%-vt3C%bKyJp
z?|3nRH$g&i4o`*hr$r67P729krgLhOn7kKUDh5>?P6zAj4Kz(iGceU!ou#6?j@XsC
zeB-gn0IIWEmD!EPRUJIqy^!0kMn+RcwYJ3z@!J_sjJ{7=-^6>7v|_wpyM`$WXC3sx
z!Q0}fUE4C~_e45Q?R1km9o8Fqo9+PIowTCs`s>v=ZQDsw+KbD{E|i?Fzgr?lgv*DJ
zgt&y)a^lVq1+A9;=|0T#g(tVL?Rz@u&KYYmI%+x5Y&m6@({o`^PL!Gkf$8Xbcs94;
zv4YcZ*m>4T5A*Z|2o_i5XxHX?=M&;%cukEqJLA`f8&{7(pcr${pJKOt2!uguy*b)K
zh0xVB(jL%Z&q4BY8%Y}BO4+x3S8P5yz`!UlShwh~xRi?~-COL}e6~oba*my+O2{G2
z)s|{tanE(pNMwI#DHnQU%Oh{7{}|jV!(9#%Tx8{TS~n2&hr^K79UEpGzqo?6)M$Ob
zBmtWzON`nh*8?o1w$~4KL3HBNHOGW;ghPL2HcUzW*M<v_uhAeOWC?S0yA9LibE~?~
zFbZ4n5XIoQ53*Ggj;fzu^ePFd25B~_L&ZWhUZkf;XUq5LPI+*?bLTIbQTNBTBQolP
zWlN+)+%3gU6m{jLw9q2)HL)3QqMZR*_w+P=st#yjo6J7Hc9QSOHhq*)6{Gkp#EZ5U
zZj*#d<8D>Is3Z)FU3b^pK>K{5GFL<KMSGXzzO?8I{;+7If_Y)uARHMHBrtmsJ#!Dc
zI-cxj<c0t!KCWF7lE2>B$s0ptHm?5FKqIa_&{_AR-haG3Lo+80gU(>P!}0rCKO@t-
zwgud*aB^u`GB(}hQkB^-*YRStHS@DI!|GdXTobMQUW&!5vDJR%(V#}=NG=^4%+aQq
zZscGM+9q+(o&$@V<y5_#rD8&z!Kgat%A0z%o_hP>@?oYnI32gQe~A71sSnd|{gKjI
zY<1+Ecg2I#0S$J;fylKPEX!D}Fm9uLYw6sz;8dDHU(agsZf}~~`zhbz(I9=GvaBs4
z#O^|Ja-j6WGQh9qplqS6l~^iWz{L(ypCE6G<gr6rb~vj0-Aob*r%-03^368`lr)ys
zARFXLmI_#B;Mtt-zOuQIc9O_AVQ~g)IRg7Y^7lT7anG$JTZgCK8?83!|J;RHnHQ<Z
z9SAGhH_}v%8C`WXo4A1UMjm0{yq7i!Pm-CdQmR;VUERY|M$J+0h#ig}#9w~1RXVh9
zgh$Q$Q&NTk$#AI2W#*JhsXedyKlS1nFZ0d4v9EM|bXbUY#+gl)?6JL1+OKPY5Z5MU
z{SR%XNRA?Z#pvyylES>~7TLRO+jIY_A%Zq#=}W)6_&)qh?lotWXn<GJq~HFl<`0@1
z5MT^bzUQRRc@l1wmMpD7BY~voX^U!w{T-^+&YOSIfCX}yUbkjI+a#unhSA*>Ynv5;
zo2T2B8m=v^m91t&O=Ez*clYdCY`AqUP4+{cHA-!}M`p}+KJ>u!q+>phrgmql<#uhg
zvXF$<xD2i5^e^4lKV;vo5m><`(NycF&Ky_*gQQs`#Gi%ShACfp@uq_D-j8t0U*%pW
z>}f8<ZyMH5;AdQ`7RoqPFrK;#mY8Gh?xiY=94+I|A<?T72@=^up<)Lqa~Wjvg;>+q
z!T8T_`;F;@1$HpbvthP0WBSAPHMuP;l=#H`_SiYZXxW(aK@J&miWm*?jH_~Mps4Uh
z_d7RBV?VEcigaJbSQal@X<ohht44iS6k5=I?y#*@L<TN}JQx(PoT%!mS^TVuU0ZxM
zpi>&p78UAyjRYQt)c7;HeuDp8S6;EV!0*s}3-2%D)$!e1D!PmTK4{+5Jtzh*a-z;G
z&u;ZvR&k<^<|uVtyX_Wwn1{9@@r_P6+DtooZuyR+x4^JfFP5_GHQkQo)?)wcAo|EY
z4+9sCc~$M3<8ocJ`z!>w%}CDj>9ZRk(&iiL)Sic{wj3J24^$>7vj|U8=gx`OWuP1S
zY!0`MUdMp&Znz56j}0#Vy?5Yb*9CcD6>F<NQfvh7Sf>Z_|IR)`2zp-V78tct7eW<G
z)y^vn8OAJtzZ;R&7HMsubEPt)0UPa`M2@iP)wj6Y02j4DdRs(~_Y^Rvs#agUTNSdA
zqgFf^G0kl+7I?B&Ji-S}-^M@7RVfi?SwZ{7>F2swlj1um8AlW@n7D26I&<Nzidini
z?CAPpZZ*sA?eG!*?HgaFl_%8;AH@T=I^Ec=8t-PQx*I%m`)J8K)6(n5;2wqe_vW-S
zYbG@CH?P-OpFAGW)NruDF!`5EAZFzQu}>yA!=(m|^TdSgSH}c~bS+(HVyZYLqmc@q
z+kB)*s}g_7IOz<B4-QNHX;?msTTXQWin@J!hu>cQ$;PvZK5f-#oG=}A(J8&qfV7Wi
z1(i(5jJOpx<oP?0We!lRPR^7sM%T2C16)^EtZ}-1{1#7`J9R2gahIagr=+8X{>;<~
z{$7bI^6%|`=hku;WbDwa)Si3^_qD&@jUAm`^x#R@>?sK%jIgcr>z2!HkbNDinxhr%
z^yAG{E=nyc@dnZ_X*AO|3dV;&`lB>AWh3_R+yUs6)K!K&^Wp=Rit$6P-mZd%!MxD@
zlsUlW!CyJ%Tvs~UQz~ZBvS_`seWCx5<{Yvoaqx;S2M_F<kho{BhAkiFR45)oP?T6&
zg``UH7V|=lI!1lm#Y)>xN2}-bbGc~5#jOnx<nVG(;~rY1h2@O)XL-{bNA*y2rEl$x
zD#n|%xN+kK_%aRq{tm>sV$Uk$_I@{YBkq}ACr(nYrM6XKT7@AYX+t_SlIW^DeE}fQ
zlvrBa`ue;7VMh0e*=|=y?CTB9k{eIv2!bH<bUR4xsB1xUzTlrQv@D5hNd5JwQ7eH=
zX!$0(lExfM_!mFO1#5Ppb9{PjqVTzb?eQVjk5q-*zEq%sKdNk@D@hWSP)4>gQp`O;
zqj!`F-BsQ17(^#@-NCdwfS<@|>{6ZJTEI=j0fI1Lielcz1ogUppP6<qZh*`<1Kqvu
z1b4gKXo>k@|01!xw#ltazt4!`X*}baTzWyf^zJQV6HHC@FT!tpV0saKtd6nllUpM5
z*pWOgJd$m3D_kvA0QoYbuEx)aA%2jfzcL=kQb)NdiM}}D8~gpM-$_5Gso7{y3|!gK
z$~<knXwwoQo;{!&d#tsQI>zsUd#4btuKoKeKLjLm?nJr>Ys-ggMs1!EOI#&b4{%ks
zK?6`mU^$UKY^C4(@t!}|qC}&cz8?j3Y=7S!=<QM<-My=cdPgrTcoXZ+D$(j3nm(qa
zfnjQGTmhFfBB#nDa4}RhJ8Pk2AFfbz!B1F5TS_j5r`gheYFv4^pUP2BkS>=$@dpbD
z(<*_=!NK8oGLP4z%ul3G2mI`m!wFq~rICf!5p6h%4M5nWfy9s3IY^oow#?}cv=&KZ
zSpHOjCifWIvHb~d+v-2D!`06R3TisP$@SM*1}?F*(B!hQWttd&uNHe>Jdv{lSnTpg
zQLK*lI(n*{)Pk}kDc<y_<cJ0;oya8rC}OWBnu3~l=c`Nvay{Y`GZnn@<*)G>Hp$Gc
za^aprzggvF-j1Z%SFns*YPXdu>S`M3(KA2{Nzs=^B&I+?bxTLFiQ(beqz@S%I0zYZ
zaiyNMlz*W;c=2$<V!aK#Sf}T@Yuv)!m=`oRI*$)Jk7~cJhC7L~Zh+lqS*EukHh{VE
zvEgUcvj~Z#UgKKcUHgMgH7hSxGCRKN$Rz+f!%?K|bA}!Y!=qo+GK!XE84s7I?Zu6h
z6?mM`t!+@DJg-`9em9sBjh5ePjW=UETYV{2JmMIfhMLFYJ{t;@t5)1zw6s~u9sR73
z&ABP0fOgz$h;ruxfnT)Z;ifm=EG2s99*U0PcZE$Z^I2?UO_q>^`n;<`zG31To6cbN
zthDIfd+iHtnQj=`LA<XMS4SUKV7n_QhoN{_VexQ>T^$Qh_2_X;s*A~ZsEa=3%k71M
z>{t)Wi6@zhUE-&XChh%Ar2?pD=5ESi`-@dQ{HrZh70BK8w|OU`-ie#rE>-RqCc4-Z
z8;yXKfsdQ1y%z;_v4+hw4KBWk6r-1T#JdL9pE!peCiY1bG~FK7y8}aH+qa9mx@re)
z=Q`Tw*~ftGxwZjSM*|&&MjF5Fm~FY`7gQGFCu+d+Gkjvw9Muzh67f}?h5Kb2<8Eo@
z!OuS9a&)kJq&HT<LeiAQKNHKfutK)xRg$1vU7gF*-<Y-arld=8#MuW^Q?HxNf7Nb$
z>vl%qNJ0Jvm}n-8zlAlsvUJhKZ0ycnMEL}s|0w5R1t_k#n}YUM+lzI_oO74TCZACB
zTvasPas+j+)1dC*SbF$Bag&Hivew1=DkMHfCHy=p8#Mx}6J<&E3c_2;11T@aoC|Y?
z<-RU?mvpbCslO@scpdN0#BF<9lMHB=(wjQ{`1VHSTIAyd<xY>~;&4Ip9wd1r|2a`0
zf7YA+DAWJizCSvtoW4n#VW$F@ganoC_R-jGWrR1`;sqSR7K-s>aCB)-m(83{99c}2
zCj0d<)I#w`&!Y<JGS{&(>uyQ!&b_G>(s_z@|B6k(LyYn~#=N~RI^49xzL>#+LoZnl
zCr9G<RB6CySdR<vp0Cz!Pef5T#oJ9(IZ>5nUM+1~DTo<flIn9g$HFFe_?P|Kgm60L
z_C?2>id?Y|Lf6m}ntCMk&wIOe!KTI@YZaly0#l`i##EKIKl4x<an8iN-;4e7=jO!0
zYd%*t9U|+!ujtm<JU5eXGR{Bh;_o_VjZ5O@*XQrv&9f;ib|QhhKSGSXAM*^d2?}W{
z%c|I{3YzTJy{zY#tTF1QnRe(-cH97s8@%i9;Ob%uPUlEF0^f|@7yPCZn1TZqUg531
z*CY-Om2t$X4L4qpeSSxIk%)IjW$;e6MsdrC<~{=cs4uM$+qWmPbj0ukZ-7JBqT{GR
zX=8i!$tJ`Q!^oe#gvfkgcbvmpf+n8qG{udZ;Mj^ToZYW4o6S@gI&^!L(@J;#PK9nU
z;Zjer@lTV~)Kjh?muWa%MMct-x{90vezZFv1A-UrvXjoM<!CLCIcIqVLEk40dY`Vp
z5kRAhDRnQ!1PDH>du-WGZHb##anc<zel+<}1Fyb_f$qn&xJYQ1{SB8gDl&VSOgNsm
zZC;E}$2{%V4qENSj#`Q$w8@1~rpAFpG2SIkZ2PX-#frEV%v)h397^Dyz0ehP$Ejdp
zFgKx_(}Vx@<aQ>Cb3Is+^j7JFv*~=DyOx+_9bo`TqPtY}>bvFdM$=928)$ggcd`g_
zP0xV08J6c_aM$f~I*vkhBF;EjB16?V*=?Qf+l7kGf~DAjcgJ~cOA}X%+FkS~;y_7~
zWS<uA5aR@{(g*f}+q%bFx`!uwsnvdfewk!@1%pQaB1Dmjn2LKb61*H+0LA5orHw9h
za0RDrUrPM`ARiX=@8|uKd%|zea{?~*Lh><!%}}3}Q5O4Zq->dRUWm1<M-ZTrL$7bk
ziH{Md;72#bLI+cVL?^?JFRdqdQ@ibLp32^;IKhQ90(Otl_<x0JvY`k{e^ylD0=P4#
zbl2uP#BdqDg_^<t5T%-}@!Oqy%AV~))<#49P3oJ&@;asLQtan9rCeJh*jG$ER_!ef
zdAed*2voq!o7Y4A_?mh3BeRo(>oF!Vz=-g_DAdVjhjvZDDf@&Aw31?167_(N{1cav
z2l(@UtZx7+54jmagjA_YjK#Pl_Dh>^okSV6#}H8Fazt!S>MBW+Z6@Oe03;RzCffBw
zwKBnz7|QdLRwu}bS%)TlD%XH~Y0rgr4!v-Gv4~#{qeL*T>>_>Z(@1{-=2dV-;$zc*
z-vk-W*eGUj$)~3ZQinQgut?xSpt582!5I=M3ymVw8MD&U7N>)Kb9DE6XTntU1I4EB
z7LPh;&<!u926x2!8iD|q4!MHh!B+@oqW^guuLlU1WNh7`_5jdkV0jEu6%tmju58`c
zZBYuQKvvT8YVe3>19WjupS(2o*x>{jiEx+W32JyB(1%B@fxaxlO?I04JVm*c=Cz)<
zvsm<THTr?abWYX!gDH}+mDRJAv?Qo8*Nt|>>1zv12MJa=IB4nz#op5Q#RP=&@!1`Y
z$csr5KO~yInn<H|xNN^X)QxCB(LO_`y<QRxK`rCQsq?>run(+x%p#Q*Qx`sG=DZy?
z-MG%LnCE0ZC3`jAF2w2JSK}d_W!ZPaWbx{a=#RJoki^Dd4rcj}k^l(cGI?~C-<X`U
zT0G~Cr&JvI5;^O`KR|k0Q1WLsWa3W$FM;$J+O8dl%K<`GPC@j-5B*5#o;@bbH5tr_
z7bWy-s2Ku!%sT)~1A`Z!wi7^JMj*1ktlzg1RuT1BO*IBahIktQ1KW#9wLM-tKFUSY
z9(Ilgc}gk!brPND0FvMo>bynM`~GvlV6@}nlBoF@{=F~L!+=u?51G9)L!NjPAsNnt
z1Q9c@yM#P^7Go%Pux=tO*M<zoxc{@tPp8AZty$2eU{7uxHF_LK+d}l)9|dUH67jD0
zC|A~&pXrs5yxQl}1`OoR+&~n#k8g&EqRP-;b`|R0Ae1}winY+XEayX`OUW(n=N&vS
zcB{O>xdab->QKqie(x!W%<I@Uggob$gMH$`5tI~hXpUkym^|qa?m-$*`1B;{)x<Mh
zd7fh3c-sx(o>JXmOo{{u`Jwc`AU|Xb#3YBu0LYi{(OdeCrq8>fUC~EoG<0%fM9u4E
zozKZw(ECX-_}nKu;?2vz`)ltjm(8>cy4Yu2e4NwxuhS{nIBy#A5VHv%zat0H(ye;%
zF;cRHHfL)4jcIwLMIB6mY}9tg1?jB6ok0j}9^w{M76}2*@+Y42>54e|5HAZd4=s}V
z1SQ!;!&VP=H+;+;#GNYOnmkzQ5hS<?G@d#Ce)errsB)4gyI_X&5W`-x!qX2V^<R#G
z>D?@NTW?o>j)C5`aatQIlJl-kP|nzrhR`!9)_wtP#5;|go0k-2|4a~jIJHwTUdho&
zCzON#kMxj?nV94WYwQ|ERH*SiFR|(!P?U)~K78M91fqZ=&*AnMUVn>qQOA{${sW*|
z_>O-u9G@xjd_8`v;Bod{#KFExgW%zL#wN$dts#hY=+P?JMI@C2F1s$wGt@aNSr;Tp
zCftXB%tH}tUp}j39^~IyF1xf#%Mb4=*l5)~;!4ntEh57WV6o1(mB3#PV8ab~d7fw(
z6#~@IsyIH6773F3Be6B{G2U3D@`IR-FiFonO;VNLtXl!WS(>h{ujh+tn|KvVG}KUI
zL<GVm;|`h|{<;TWpr{nKdlfI_$OkEK0!HJ~Dp@P?nueU}Z4Gq&D-FjuATN?sPbGaj
zIud8U&>?#4iZ;AAwhi77y<{eLH-4;U-r@qJ^`UJ8)$=F-peHb&BF=yHCE;HDaou8Z
zqPh~X@A8&w@yj_x(yakE*eve;q0mbLMJ-31)t6IJ<2J+hje$#VCHs{A%D3XvTT_x}
z*izeonWVZl^NE$cXR}Ia@>^uO1qN?`j154K0SV#J0q=p*788|*_Jv=XY60*@i%yO$
z{)H5&qb#v8j5Hp#sviP4DZ1co*Nn6aV3{EX-lv$XVQNm1oS;6Jc*Mu@%>kJ+9=tas
zsG(M+CENgk{1?BTc~qSu;2J40*X;3I*w#vk^MG@01WeE@KuuekHp|eYW&~E<yxE$i
zeBjz(-=UPPRo;Wv5gr;8`wnN>PgKO$n{QX%r1(-U$T9ZDRj-L9TEvZ6WUtF*KHmT3
zhko1>mqv@{ER)DFGxp}>mp=o;T87B_RUzfH9OWd|xwrlHWdS@O2f_#hj4OckTri^b
zLb8F&!8@K(mniFR6<@7oI2vK&wy9G0^lUIX9buy2i9r{u8BJe=y?~Mdu7zYFk9`5Y
z<WnU1uBnXY-e@rCC|q7%|A-VWT!OE-{PT9vB5;_Wb?^-miE!PIDZCn!;OY{?BiO0_
zP%Cjs)(KHSq&{oJCsy=0_k`8p6fXYbYd{Z&*K4>?*8l%%5Q%EJI$$_*D^~4=>|ez6
zKPDxC+?S6x0fuB#C>jP13(iMSmYdUVb7c^7dT2ic(VzevFrwS+##@}Mif?u}p7+$J
z96-Pfbn#=WX<&&><bj`Uzo0J2-A0*f7hzhh1&;kX4`P-OQXBBnT{v}2=yy#|r||xc
zc<YW#iQQ$CE=;m|KxP_2$4c@MOXia%5+K23;LF|;7e;Wt;?ybDa-NRglcoDRHS+pI
zWThJt7Q)C-PXwf>RFHlqJ0yQ2%(c5`#28m#k$QB#@Y=Wm3HaXoa%?^<?Gp-BgoH-c
z>jFZO;|ZqSC`YoZ&=5TWgc&D~Rl4LUrj791t<MxD?rqewIT=a>u2Uu+&05xffDa;{
zR@4DzyJbmsG5ic;;v=f(9gpdO#hkHbR)Ghsg%9VQZOgn~OPTw5-FE#l!M`{u<int3
z*bfPL6r%VqCLS3yO5*JAjP*QYVmBih4I49j_`!YK4>m-K9<50aV*dHTui?slm2Um8
z2XqWf*>{^1l;D4O?)keRcW)|i{HO{l=`e9mP8~xr=H|Dj6g}9a+TZ-0f+e1TVN>Hi
zvz3GSN`Le0a;N}&dfM|TFY-*6;Srd<nWB4$+7d)DMWBx3dGnqanXBIu_OD+Q#l1jU
z7J~#YQGJ%sjLB$$3&`ggV8R!Z<4k9-&vnF`l0xW@Y38K*BhL6&E{i;5RxO0>HU;<f
zbH`V3Nix46WEAT^GM!WSeMK)Ixbj2sbzR(q2|308H3DUMz1{VhRQsiYqf@6)${q3C
zuw;kGBPA5%ykS!j)Bp9Oeim`>esd6VA6Dc)emb&1H#CZx5HG|dB=EiZ9ry&{=~D`s
zQqB^wCR)QwA7Vjq6~V|GB9?f72l13wyTb0;@tfNx@C=b;2REGQpG$-h=9Ku^RN_mb
zvaLw*m2;0-4Nky`2ffdg;(MC0lt#xt(O@)W1k%8qc7Mh}d*-ezb?#&5lR(V+{zdgz
zZLp($paw$Lc55)(pP@3c!x8zE!nCv@IJP&Mt;ye;p>&nOn}T{vouSMGCMy@V?D}4a
z^Em=Dv1Ek=ky)rAOn!7P2Z|XIoUgqoXos5r1m0&!^-?k~$?jiBJYp#}XrlZxky}?m
zQgImW6q9p1=V)Z<O5#)IC`4G0at^k#pzP!8UY;Z<)@w&UBXPo*T3E;(dq3x-o$u0v
z=FY!k<8RPGEYTe~KnVr%U~bWm0ig@E@MzaJ+W?*j!9l3s`#~!5XyxqW!6lg3OQa%9
z%8aY2D(Wet880a=4im%11S#O(E8}&Y7%GE%uJYPNVmW)GrIxzf&YLJvH$3Wl+@Z+&
z2O}CC|Cbl~&!cW6CeaYeIjnk2isD~qO7xoOs}Gy)zLucpn_cQaFXz)q6sD9ow5Qoo
z^_~xTV6hp3<-^zUY9c4&@;?x~t>(<%8t7OW4l&KG64T#`yv1`}mnH5DsBL5+(71&=
z{>Uv!^7OS(9sfnViK8A`vD?hcTf{zi%bG0HcExm1ZSjtvZc0b^^hQ%s$c({~c8={V
zO<Udq>v5m%wQ2qt8_Rjr;u==4pm2*LQeR}N^pa`GZ_u~z0}i$y6R94uMrm_6ZjoMc
z!AAH&?&G;0rw7)$5{D^A6{08aXQ4@i2dy?bxPFh^3r}Z*p9TNym75^;d8ZbB=+XUf
zcuOu?y0>Yr5POfYNwQ{cot|hm)DFc!7iziZAUcyB+)R%NE9a-U8>*nv#CzdcS{3h%
z#&V5Z4dWV)i&>6S&2Fc?tK`z@W~h($F${f}HB$x2E!olaZ1k(Xdx7G}QIW|J)AwNb
zQ9^0MI~ECoiGkQM=b4;VY|S@e7K|Z3!!C!CM+VwL+c+zye5*8Rj@lyTkM!s~OCs*u
zC~}#1VmYD*6#}Sb!pjyemna(o*!XiSs^4U)ImEm4F4CXVPCathP1`I~X)>se<+_^q
ztJ+S|Xm<c3(eh6J_h}7bt93htWwuy(*y1m-I}xb}Kd)HP!}i}M=R2OAz>M(9(+7c`
zqdeRK<(IGicbI^+@5Qfbw3id|tp0sisX?mI&3Z;i`7tatbFskR2jbs87lv$ta)bMn
zupb~zkP?n`$II2m4UkCHYJ^fTzqxtlL1zBT>-ITJ3DQ!pc9;)isM+?PmCkWxhWW4S
zX4o?C-mz&?p%{rUeXs`<6aUhmDDnDS(^hHTV~@44v<LBQ&2#T1a|*gjw3K>suc_6@
z7K=oxr)b&Hg$t^MbYAh(GE6crPyM8;l;g@@R1<7!!Yce{3tf~H^U?UPVEiAyeS;`U
zcP?M{`L{><uh<DXH6OwM_|Sj-+yC<&|2bp-XFmQvm=Cc90+GW_r448YidD%}jRv-`
zAKD!nicsAjH?Lm4_uqH;113nBK$i=WqTk!MoFE5mWNhJzjR8UINudBB>1gfy9v#gA
zy%h}@KqH7-xM$%-beegc$U6}@$|mu8;bOG~%f%QfZX^8BqOJ;iQxMNx>`jt9iO9SV
zi*&!hL5FM);7QxTHqBeV(qY8}Uij5KSy*W`{a!i-EI#)$?T$FKE22Q+*JuVb_^O2y
zC&qax8$m{Afkr(=XA8QnSa>7MWf?5!nYOW-<u-;WG*ZMOs&gZN^pi|aubkgVdfxeA
z7Ib@^BbWVv*hAhtc*#@~K%F)y<@G)Q`NeHEbh8)45Ls@->iK$gc($N4HwsUXlkG4c
zE#X}mEyZ>^HC^%>l5>wn-UKiU85-%pOyN(!F6)<poMnm3l#{eKfKhS-%)kY`1^l3Q
zFN%G@#lV9Krh*p_hxSqnI~Rc&hEf}Vxz9m6MqA-NvkMK(H$}mstsN}3%vl~rk$<KE
zHEb4@kVBSk!`^VKVH%Dq%nf4PX9=Q$J07b>%AyMOLu}cIei?iPF`##AmlJ$)v|{P8
zq$`>CMIDvCto$5s;e=?&9QNcjj1W20Kbe}BAbHcxKU67%D#P<T(450P!LBlZuHGJD
zD|A@VIBtVh$mWq%`V9<N*a(y+*bk^vtvxmFSg{aXUDpLWo2*O_g-7c5BECt5+wITm
z!8(94_CvcLW#6yR;WBG|pZ(#(SS~Ccb<z!I@#J?{E8(>bBqvOo^52~l4({Bqs*nm_
z0J-FWl5<@a3!P~IFy&FkpG^@}XUW62<vQ3Q#91=&00UNS?p%m8=UI^G_m}6ma?wZ5
z%Y0Px&#w>W53mZN;e8onS<m0*m!5LMt&Vf*+pT6)l@h($osnU9bq><ld!c`Xc>koj
zpB~nJT6Qj@{d+=W5^KR(pK4zbc@+ZIHq&oVNA`8x{tI?)E*P)NfP=6dR)gB0N|i3U
zBVOpa`QVAg<K2YgW?H*0arIGn$;i7K8LQ)!&sWk;=0G=t(?HE`4w56P#yAHf_I=L*
z5-en-$|ZQP?5@-5^=>fcii75+SJA!zbypcs+XX#}iucJH>;qQwH6pY#SO|i^lQA;O
z_HO5bdky$y9`pu*guT<hl0CtYfQJVfkPPJnb9+9BdenHBT{~jqOZ)}eW<zasOS|dQ
z>632CK_BTgu&_Igegp=r(i|QD%a0$aomG7nz~x8FojEQO2)TDVtuhhEzfxDfmH6e~
z#QC3#ORNGawNI6<<$`}E=<Jn53cv^G-B>gDn^!)S7L1qfLbDD>WphGh;|TxGIGtV9
zKF%(X3S0bhCfD-Kh6^Tt*85XFpOKqmtN-Nb@3Imkdi27q0BNe2sax8+Hq=S8ad{|J
zEHFoOPoa*^befQ70{VRQs!Nu<%IJA2d&kOcCB>5-6Y?xO9qlxV67DLRo{2raz@`d%
z$q+8U?X>c=cQ+fbd45KOe_bRuL#EkB-<&$eW!#ovf5_AKtm1fRbTRT)%!8?|u4Az2
zXgXMRM4u+m+S^*9j4K^_g>q9Csa|dLt|~KJC`rs`=dp7i9;#ydc_IFz{gsOGr4shr
zI^v|J+hAI3@LPIOo@sG6?b))qhG5!Yy+$yZi*QV(egCTE9C6-|8=|Y6))uX-pdgrD
zc4(I!|I#q)^IKx{&!%INOyt4zgj%yt<NbXS&vHzP^p%ob>UWkP175oXg<?&7iVf-*
zU(KLCjhe^Jh##9Wo8aN>pRO@?2OqUYmQ26a?J<-0A8`WW`(nOY^h#O@Iq6#%*u@k3
zw#5l>4>i(|eOV2$?#$K)(b<$<Y-Qrf!);nYkBqCrpGp^`EBSlM?W!$gY3!Me(!i#a
z>9#6a!OADbK>pb`$K52a=QYNf&0l@Q5wm+7{B3pxw#_>Wj-5i9E=B*7a@V<CI7)Af
z+q7Kh%FB)!`)lSijY9(RGO61M-o)May+~pi+R>mu>Efs#8T~f4ZtbHWnD+1o3x8sg
zr-I$%ya(CYW=ARx*4j_*1x@|d#r`uheW_v#H7SF$TcM_ZpUCt?F<`>TE^ql(#b2lD
zQgpBKYT+(#*j1c(z<ZB@+U11dbLJI?fEkhq+7Wte*NFMnN526RYrV=@8%@Xgq;j`O
z7vMB}dji}QONB=UNw)7|uWj1u<C3Jf02F)D$sSV@<dVKhQbNC+$@mISucfkL{;hlf
z0nb=fMr-P0Ih&&Gl7}_~JODe6=;_kDQxX}Ypq?Q@r56XbC(jZGmMi6NKVZ{1?)PpW
z(aDf4D$B_C^bLH3)6kCA82_Kh3U{ZNAi)DHyKuaS7E;3_=POLHmIQUT@ttC%Ld<o5
z%E=D5$d!F4Do;>mw$fq0qfT*(_6Y}0j5((LR`E{E(=1tteVrY3!^!dc8q+sY@Ag$k
zoDAzcmJ>C$QQ6vUTRHkx-NA>@nw{sPPeG-&cR5*qe|vCC8=dG6=+xRMOs!gqNmhdS
z)b`5AWVgRHL;YJNLa+044#<3bV@<X8m0fr~Xh?gO=iVH`TqiK@*>J0oHWWVYB5~|X
z#z0Rym-_tNYty8pft!nOAJ*NCrEE8(iGN$H<}B&^*QpMDP1bVukWE`kHH%$*p0FAr
zoC+GEUcSHOC4a6RnII^tH^Z3KynnB|*bjm+PhSnGvbqz(=_+csW=K)2Wbl3>BKjPY
zJQK&-#Yf3f1f!dtrD35$4X@U}*PP0=#$WcCDXR-e#wDrDjnA}~9-FSI&RNQuh48UC
zsU=QStbw#C4%Du6TO^G6EpEq*mzy63FYUn(S{|>MPfZrgxW(~6AcMDKeWuli$(Dep
z$VWr=Ze^>@32F<SsR$Z4brs`#^m6*p?IkX<B9XssA~5}-myosw060Z~hac_?f8dIm
zqv^8pungm3ykPK>;9r+2QrNu|Ge)Oe{SnfH|My}usnoi?&WgbIINC_?@!N6E`|KCF
z>eFoV4x05?gC}R}Mf&;$g$;@mcsXZ2@pp|g23CtW2BYFPK*rW7XnqZkTF{aADu3d4
zYUxN7Qpmq_STpNIl!3GB-WcvQ5tuBoIAxgll7X5S^({U%441?uySrS2y&W19S2v@d
ztt7x3CAvDp7E3E$^@tL1nd|h8Ke20pBgv6zCfL0?^3izxpH`6Ht7M{<ScK^@9rTL(
zr`Lr5e~i(aj%X}!IMMI@i2G{!93dIY$_bBH-u>Fg8(N`4Wu<g}V(g;J`GS6_pt$FH
z^1M_VBxFZ?`{hX+J3<sc6DH_O7e-Zy_{Xdy^Ew>pF363LpysLjRJx!^>{(E+q|*{4
zHzRrs@q=`zJI;YN9vxQ`lf07Q{YrK=44*T3{c|uYHV?Fgj30N+L$VV@|DpM6;&;+d
zAHu~>shvO9)wRP&yjOlawmM2}n)Gx@$1cY?O;2Y`Ijee<PI<GcyR&=X)-EAW2VBF6
zAA+d3Sys88K<C*16lT}Rn+|yx2`C$x|IWq53h;m6Q9JJ}=QuK0@tuc5x}2;wPk0wz
z5qrwR7LL;RUfEnv!OCKzHiH~`sMCT{Q81`t*t&A6`@~pTGXVI01I{ljSDd#NXBbjf
zUBhy=^Yzb`EzCKe7F~@?(U}7Vt*u)Ub=WI52zisE)YR+g7}&AkSsaBPg`1l`EQyZh
z8)?=NyjJ^F2y9R2lxzp$mEWC>ytu`xb(FAJ;$-BG<SpEQXD10f;PyK-)rH<ksj0HE
zL5|bq$VJuP(;zIZ--^6YPa>@o)_l*R_XGEHc~%mUe=j-2oB^Z!1TgK{zi)d^J%*a4
zx@4MQMOotgn?3Q;C#XRxjLJ(O3=Wik(Cd~b;Bn;Y5n1Nx%NGZdCvj&!#>6cP`pxpb
zP#QbyPUZy2xavP}a3m(t@$&i5KhA}oQPGZS7Mdy$FiTs>L35jVUbrt|iz-_P;@`o?
zP@iRYg4){0n?KB9OwxWO*ZMZnq<*YOa>G(Md7QEwHvs8txg9=M;kZUmn>K~K2T#vM
zfZ{V%SDv_ti*~noU9wNVDf-zmugF{WbM~ew9H}3V`xd{AH=5rIQvglHO_!?K&tD(@
z&yoY?B@e*a!VR??;@{V?*InXiViap30OY5I!}@%hQDR@+lC`G9%e2oKPd*j?Rzk*S
z&#|w^qXu7x6EKzJVWGy)N7CMUYk|ZDOY7j&l_)Nt5XqK!MtJg5s625`2|GQHk(aRF
zEOjvG;xR$=i)1p{IeyDEDek@1w74YL%RDST?UfY*HfdV8BrC}$CoS?G+CQ&<dtvoE
zv$p(WF-b}!4J5Ewj|HLZyfZ|rvG^a$$v)7@x7oXTsjk5L!Ri_zLB?_zTVhg9ABYvg
zskjVeSl7ZA`PycK{v-KNR!zKv>0xsriGyak-zWVnN1k4t_rpjIZ3(jftFJ2$YVr!=
zP)k4rqId!VI<<%#o&g1G3FkCMuGSKhK%hjq0zpK=5ELngiimg=P!dOuX`oTVDI_pK
zX)%Bu1my|_3T;G=Dp*9K4bkpL5Cd(=e{bf?yldaSx4XaJu1Tlv<s^M=vk+4GVO&V9
z$t|}fhNdT8Iu6$tO+ET0PGe`UX2Dn)^Xhn=Ixpf0!P+k9A={t{$<+GJ`9+BRs38tb
z!|xNI+p;z*()ChqM{_&VNSXXTw;q}orSN??DOZws`f*evu~cr3-Y=hQ_iP=uf5F^K
z5M%{{EmyrW%pkD+PBbf4C@68fF6AJmnG{7VXncafR7)_`>KS0Lok0b5+Y-4n3z>Q?
zDS|KB?-?ql*Zwil^+Sxd-bnC<T+hKqO0wuvZ-N(t1az|-nz9<8Q5y-Ka*ZZB7#Ws<
zwFAf6y@>umESth4473`v$@0-5=j2Ok!%DJEADoG&ymc$__B$}b>{V1+-9!iwG~u0d
zV<oK*Dn2ouFFO=OcC5)fp!~vWe&3+w8{J|<L=+<~Mn)}K@tIr7R5}V%+24@1(pfG(
z-Ycj%^z}_0_a!KuCc&w@35w~HlQ$&aUSVZtOGVaNdR9jr(dm7!5N3GU7FRNX--xMt
z4!7D%iE*N^j0zyoF7foL^2aY2=G}3YVlG}y4P}Ys8+@H-Prr=kQ4L8Y0V$+mOl77?
zZtayMwMPnRjcs&hb8L_CtuL8+vI=8Umk|%h>1OG`@Yk3y<Qf^Gol%VUe1XK~iYPM;
zMeg;nj%pktb&KO7stFlcp1etEm*vZH@DQ9xPgW@JaHT&v&kgK@j*$9G5lT@C!(pj*
zF8y6y&N!jB0#Xa1atburblN#k5sa?nUwtE|5v{8g6ym?HBNm`&Xy@12e}n&_GTp&d
zbErx_qAP>ctSw_c<pPvH!*qkC*UH^nmamAtZF@?$r7!WcJDEy#=@(UpnmZs*e@=&=
z`%kHO-?x%i+!J&Ffw1nk`AUg=kEyGoXUt;W2oyCsd$PF1M_t>(wFr5liv8CJM3^*_
zZgFsDKA$;EIrD(n=iP%Fx<G3#ykV3@<<;cV+W8jR>l=DP{d_Na$R*R}*rNb<zi$#L
zYqrTz08}}-vR83D1L08#8`y(|_M?oLLC$wMM+&!cCU<RHHvh467rV=^K=xzeOpFgP
zy^HPC0qP;u)rzQ_p1{wLp#YJ46-f*Jj8|xAFeN&>owAl7ich3h^(Vx)L&R6AVa&a>
z`Xfj8!{tPQK+r^B;AZ9OtM1I4jbF@x$5z;AG<}h8nh<9!<-qd~U_e#n_`!qO3Gp&-
z_mdAZX0J<|jisUOX5S2#Qpw?EGgxVN-(X)e;7Go6Dk7P-Hy9Pywc&%m$VnR9vwT+5
zC<k~Je5WhMf1afyLy!o`%fpG6jUhELHE2+}Yb%(#yd6l@!}`#Jf8Jx!YnpWcj}E1=
zIRjP@E<<SKeU*rDiQz^Kz(XJsOt~R6n=<^CwL6jywD5PW>yI^UOf{>IS}>OjYU;Y0
z$#ZV5d#m}tB5E-RX8+E!2#M<lwkhPw@^EbQd6fsSvuj{yuDYHI2?nnLBE4+*xWzT>
z_;11%uuut;TQqgn1th$e_piOKw^a2Wg%z6bKz_RX4V;B(&A*TwztPVhy&0{FkmTaG
z2|JLY+8N{Q(tS!!_#q)P#?ej!s2fBbpMM+*;eAI~f;<8x?mkj>A*iSPh(>Te0CS&#
zi^~>pSLS<k^_1b>xh)r4VX@Q-4g7+st2Qrsf!bPXy{<e}_4T9r1?)Qu4tID}z?|SN
zjP5~)`W6r_%GYqN`cK(Y{vn_OB(P<T)tsAYK{wFEIzd;PDg&BkuxbA@q;QCHmGvZj
zzK9@yRSaFFtkLOF^>NJ;K2XsuaA<7FrHcvsQiN*Smb8)l@1f2Ou5=dNu*l3e<U?2b
zr0s{7Ef|Ap(-uO>;_!NXl~?G(UN;?t^Xatgboe4mcrOTclx2Nrv6-i6Pdn`0!K<6x
R8Gl|4emGYT7oJl<(m#&Dd}ROt

literal 0
HcmV?d00001

diff --git a/docs/source/assets/kernel/query.png b/docs/source/assets/kernel/query.png
new file mode 100644
index 0000000000000000000000000000000000000000..e2d15ebbfe26ec00d2d57581a8709f9f2ba69369
GIT binary patch
literal 32710
zcmeFYcUV)~_AX3EKtKgVIsyWM6sgitq)U<Bkq%04ks7dobOq@`=)Jc9L82lcz4sE3
z-lc>Vl6;GOw(fI(_dDnRd!L)<NwSi)=3H})Ip!Ge7~`E0+M3Gbr1YdXI5_00Dvxz=
zaESDP<Ap0kz^BjKs6`wc(n==<1#MLY1y*fOH+v^%I~*L9h{R-KJzZm}ki7tb&(~OQ
z50iXWb;HejGl<8nLd<ZR<eJ)*&@5}in}$_6Y_GasRgiKw@+gkKdS`7)P3B8$`&4_P
z{Z)|>hM4gc410#fUL5xK1-c1=?002OGKDE|cwP%67PNKa*s6HiF5=3TO*X!~8I1EK
z<O=r}ipfL5DIHBCqbu@uI#~005WeFRL>+w;au16&e0n{yh7d>IikpkHmXMB{MY)qV
zR4)l1r-rhgfxq`d?7rehI-xrGk1Vks2LzNc9zP%02rkKfSTy@MhRdpb8fYwpL(M+g
z-%Z1j>%ra1J)`!<nxuPO>R0cNAJjR~tAzGNanvi-r`zB6esW;>>~q*SA&|0#i27)<
zuZExaG}Z(#a?mBPx>9qXP&kPs_uZN=?s~#QGO+X{y0T}gKg$MHZ<2MQJ0%pEZ;5uj
za(2DoAUkXQ8rif_`-oFQb$dYfd)K0(W)1g@fOWiG>SCmsst~_J{8>$L%RwFYTN%ag
zy59%XBcfB6%{-3ERQolfu4Eo8@@xzFHRc3=;P~ReQwg`;p_@XK8}{n<_Nhi()ho-8
z(}$~kON~rrDlO9BK0u})MChEVdj(QA<!xAzf}i?_e9O#W$WBRr&HSUd$tqIj$qMmX
z4WTQzxFLjW4@J%yLS@aaQEcP>xXwo}l^c?N*Zdw8SK)HigLOWt>X$b{k={w4%RjT@
z*noV0s<ha}A{5{G`E@64gfZJ4?Xu1kI!EB+h|J;3zm~QWel#HGf{PQwK;UV-`^IHk
z689b}F8wnc@+`8ep>$P`wmgY;zc8GJvR;cEXCz+5$5p|llP9|QD*qV`H;&>n;Yo^a
zdBRCDidSt5+>Ur$Uya>y&2T5ay8Oa93JLy&cQy3n8TBhx>Bn!^-xK8#(<vmqm-!wh
zeOr#3fHeGP76CV1yu7F5-8v$kSCQF5MK?;qf<ras9kSK6X}u{o-x6hu4|3fKy(@@6
z9;)$-A%)y);n{;5nZ#Ue#_r6YDBa$uFY=sTo4o>gTlodL$b3pD9gXuQTJU<%cTy{M
zGY;wq<@jrKHH>#3tG(yRqi}jC`958R^5DkVyRTO3w{CWjF2$03e{4mNa<A93*Z&FK
zDr>>@k=Uk)jrOZnt=7dxE@gMd$$Q#5tP`rqstG4=YzI5zA4?yLHL#$V!`tqA6g;3c
zW96qvjx>E`8aESD+FAdz**&PyN}9Pc6#6}2L18~IkfId_P6fUKCIaIl;;{0pR7Y2i
z3BZq}A3ieD;J!V2O_R#?TIO}7i0;QWISltda&z!=y0Y!EH(aZ{j*7_TpozcR^_p5$
z?M^vcV*IAcnkuELTbFGO?o^_5`aRA{j^y~b_X6<&T`YG?sL4o^g_79{o%0y8>U4IW
z&g&HGQI&{`W~M33sgxIF=E@cXYv=G9^X`9qqsIHOJCQ!omHTtfb*Byy3Kv<vID<!0
zN-(8>;wO4yPgi*k6O(z9c_G@Zxzc5i46AiX^t-f;D|K=`Rqf<yDxc?U7V_xz8P1jZ
zYu3lb4w7$}HHWl@wZ;W!&JTRK|AbCji6{4+N6JvwEgfBbFTE%O(>#L;U$ye^2tkb$
zX?sa8szdJD^fRZlevy8Wp*o+D&4RfUv7Czh-Ew{%0li8?csWM(b^c=Ehk|OwT-Buy
zoJu2)yc$*?^~t2IiCR9tUpHG%VDi>PxqkOW+w{%p#cB1rljoLW%;Q62B!eF|w?`!k
z4|S4@x60E?U>YZNyIIWU1G-veP>sFld9Nh;s5Ui+#F}Yih4bn2&~s0;8d?+m6kqo>
z_v<ktBvIT|)iCEajy9JtmoNn-e<k6VB>FM>IdM+<+8Etv&@IrdxRjK%?<r-XPC|5o
z`r<38tEuB@vLd-@eyMh;`l)k5ovF2bBfX$>OK1q+r+Yg0y7?-Mza{l2S@+B(nU7SD
zm=~%S7;3<66svz4t(c6NAWekJ70L`v#f{-L_iG}cQ`RF^hhyViVspNb9egQ$7%Obd
zm(SO2n{S)&c;y(%SVV!L;AKo>vXC;7jAT4{Fz=Hi)#q)pHM)A#KGkl#YPKf5ZoM48
zP}8$Qk^Ys!rqaREyS@3mvBF(u<9w)Zim!90XvT8aVAp9^bQfRviNibl?ew8FtA(#8
zMMvxVwtk#1G0Rg+`FrgrB?taLUDo-I=J(LiHvOtAj6Yirs*xYgL{6jK=Nfw&%Uj>M
zg}7zA6+%iZKU$_DY{2+fTdWB-8>cYj4sLnq<d=mne4(J1lG3=9Y2wt&H|p*c$)wz1
zyl!^$g5x{e<D0`mRGD-lUR%B4wb2f$_M&fJX<Mw14=P&~SQ%Tv`#bt5(#>U2PL1ZL
z!@pv$_<w)(F74gP2iXtlY%Gsc6}xh$bDV~}a!YfStpjZ$ZTziWtdrX<B1WRAIz>B|
zmU5R)Iw=SC(`$n?v1Wx$gkG?<)P7Tuz2U&VvyDS7zAU~4fd@$;8t+t#R8M;;R}A;V
zsr({fH^K?bzEXT8%Z9qAyK67s=D8`OexSmmNw0FOGM^@$@-tB<{y?XfCON{gkf^{Q
zFE;OO0h0<Ui8;Xn^6-bsj|aD(Cg!P53;PJu+WD&T<r5BP-h-Cy7%S8~o5DDA2Uz%e
zQQ*78R&@6*%l!!OlJV;ClDije@zVK2H(OUuX%yehO4Eyb19E$7cT(=Wy5pjbOs+7Q
zhDFzwc}mntd-%<ew$YsNo*NRCNjFB;@wkk+mAV+xRt7^k?KRX}H8I^6tt@!KMSR*k
z?`dX@w^|#1D)+|yWRaSXD4S{8eUV-D%kzWFQ`Mn0<SS&d_@oQ>+%FfK;f{Wd{`6z#
z4woe7-T9yOE6=LcJS;pA<Ef?$(CP*n*8phH*JJfNv3F>sRHT-r{M8iH(n0h)vn_{S
z#%fbLo{)oc^J&=l4*eA0Y>-XdNoF%?Mr2N;mRtn5*_2Lu%E(SfzlIhn4%!<D8bV11
z+hff>TY7<qFapFJ#QqdvEY6j_#T6Ap6;pa-6WuR}Df?u+(N%kktCFS7-TwALL)YCO
z2QQxqkiflJ$YrN}ql3zJ8~69k$&bRmDHVBmENSbi>1uhIc}+xQQMvH#@{z+$u>K2q
zn63le=Y?V!oEiOc=$$9?sfz|qTRpX14yfsybutfV19lBi+sFNfU)KaIe!<-<UXOUJ
z=5)3vB+v3^H1@X6dbgjcE$J7QlEWp?<AWQsQ!o2gOb&e}w)o72n>7)?nmvvS_WH=6
zFQFB)nLe;msS?y~Mia<?mvoO}RvmtF9=wYlBIaeWX1OV=6RdY&gz^~MTZK=YJUIEZ
ziQM^o&hfMBwd_Mglw1w=bQXgBcwV=hI$yhqiwaG6jd?u*8StCOK03EPvil0*0q^?T
z9dDjm&V$XFBAGV9d;#d4zH=zO8)b`E;Di{X*u`m32WsfUH%fXIieOvt_9^w@#?<<^
zfzyF}%U*b0AUWpzmuUyoO{X<lZl08NJtQF>=cN}8&T~ebqb;2Gt51$bm+&+KiKD4b
zwWNnYaZ$oJ0j9X`%t+*~lz?)k37ACOa5|=QRNQc+9&mTqVx{Iw<0AOa%Lo0h?=WVe
zd`%5rfqy(^!J5xuN9C_0-Oerb>xifw(>r<r1RH5*q-w9Bfx`nFU%?^3rN<!zj&Omu
zEH1;JV<lW}9Q?oj$HT!1cfuj~`x;H)`||Z3cwe^p*EfEA7!EP;>jv<CnT7ZJY9hTX
z{NKko3&1&?hq?-?s=&9dji;TRtCypjx75xR;0`z>?kdKB1a98BeB-L>aQp)LpK#JM
z@;1_VB5C91!f$2kW^Kp+(#8F<9~|kIlE9&howpV1OBZKXFUglOY=2!L2^?SkEx^Y5
z*CpOBWY~-}v{@D0JndM;_yzd|*<?vsSy`n$ZS5s>9xMIb9QY-}=IHJ1E-4@Y0)hBJ
z!u)QY4g&WjBqRg`g#?6z_<$?;y!>3ftzPoEda?hjli&S3w)3*_baMB0a&u+9?AOZL
z&Bt4YjqUPAe_sC@r`=1Zf9~Y!_4i`|4=8YXM&LfbpunHLfu_=z|4M2*y|i;Se(dA|
z$PBoLtdNk1^k3KiZ)g6w<G-~u`lsc6F(JW!Z~AYi{%2EtFFQ{KHy7ZZ-m?G9*WZo*
z{p8;br3Ee@{oh3KFFF79FCb`HQfYxdlO{|0;7f@VFprE*kF_qNt4kfYd|l2i_ut>Z
z@dL5g9WQ3!8&CD|L%o-{+jAsGOiSp_eXb;MX@kO8aS{i|I8XV4Q7owo_vR#pa-tgF
z!$<{2Q}u+&l01D=hKHZDQ97fZdc4)klkr8O^tY{}jI2o_CRee6oIxGJUJkoLC-Vrm
z)%g>zegR?7xP!%j7nc`guU9w-Fa0VBF2OZ<od5K}s+&cLoA`vmo;3sq@5+Dr$i~5?
z;lcaQ=dZA8D};ook+a&f|F>=k^osER<2`^A^5nR9Jfa~fG5=*KSFUMiQ~alK{?)lC
z!Ih#>9BZ-vHe7l6x03&Dq*q3-v1aMuznl7R!v%!0`QJwR|Ag}YC*uFpHHGQ+uXb9?
z6an~!kaUu4(0NLJl1T}BY}9M{EIQmEuA@13r;Yq%-`uO$|ExNCYSye_)>~137RTq?
zf^PHDhrPlBDOeO7E6m{9m~HIZwBpasuuT>Ei1eOG-H+}135(T!slDg}XusrE3Knw$
zA97H-wyqSKpd~wH7P#MTkGVJ-T7+L5^b}aagD+41x#K1+puTV#b++F#Aj9w8!v{@S
zij$jfIT}{44sxsM;#~bkCLm(!Qm^kU$#QL-FO&~DJ1mXbt|i~Y6c=U+p;0GpFJ-`Q
zokj{$AHF6!OXw{rt$4oLpTUscK}-;jC(wGDBe7YO+p9n+L)~X$pK8~u%9UXoD|2k*
zg*jVGhhMA+*!O>u7`oUC+6!1c->MW3LLJ8x(0Dx7%FmO@SCr3xm#>)l_o%b(;SuO{
z>*y>~{cWq7)fRx^iV-uv%xXGYO)q7J(qD8$)7c~$Sx1!g+@}ilB`f-H*0h%S;-&cW
zL5mmV_4F14mJiM8(LZV7f+uqzh~HK@^qVn(Uf}JHT7rI=6Em-GBwTs@2A{hnp%(66
z39VQ;ecJIfMzNHVN++MS$FCGo7lr&F{o8PS)C5f0Mr8EWzo$)J{0d{>udntOr~B0k
zOhUHrjH{l7rb_#HI{O>bA6uucDs++@*~_<EZKA5TYkI<1NQxfNgpCmtXhB?k5kVkf
zn~rG5>Xl^6c}CEzhyC)84@&0t89~QWL((y2t>>Gome})TW6+$B%}CUR)nT8wc$^&O
zWr`vmk0SrwgRs9XjpF6Z9^GIA|81EDeeet|gX*WiSaj}^fxF_v1h>E?hQ&H4B6x<;
z{R{4DX25PllHWlWSDzAt3w4mM!0Py$Wv20m*KVPk@{&PY0=ygj_M(#fW{rY@;3#^k
zE!AM=x|W_!2T0J(R?5{F@p(Z8=c-U+uRc+m^uxoo@d&<Wa;G!>CY(*0Zh_+gWJk=D
zOk0t>6}My~sypak==G@`Ov;|G=ejdg!ZH4*^REF#mMS6B7q@j-<%5~bXr%8E>wPpN
z6mf;4kAKZ^nqYpg$BC?EfGip}#b%ssnEjSNxJz%5Bo4@7rOQB-pO4dwJ-hC-?Is>s
ze0;uQ&X@HQHePbrozlH~wgwS*YdtIYaP}n>Ul*oDO8FCvJw<8DBF**Bz`wo{EBP6k
zYi+&iLMUDDIGp5yL(ek@K1qxe1sYoT+0|msx6%_0Oo}VodTcBw!h~<?FnJ-=eF`7g
zb$^_cc-mP2GHl|iItsFIsUOpU7Dx&P9B)_6`t)OfwP<<b@#6JfE6R2K7+9ole9YHo
zaPf&o(HRpU`7=G{vBb>H@`H!+cj_Bc-u-qGBK*$;C{+J(DTLRng64^jcO4a_zQJ?j
zWF<u%8omloIu&N*tTaqoN4ZM`DEl}Expay-h8;TNJ3O<TWh!4fzVEv|IT|#10wrkR
zEr?u6lw(*X*+$*fnW)`kyW*UcXDu@mWBd%EsQp2Xfsu3`(2Or)lElY%KSd(>o7Zz3
z<p!h=pHxlRr?)3P4gNS$=Dqj%gs9<zNEP|TS76<z)f=f74y20<+!Av(x(DeG09Fyj
zZu3?}BzlVVah9K}1UaSL#ji;R@c|-Eys^--kI?O!2Ezewc(2PfHnWv+Q@5A{H-zqW
z7bWBvQ&cI$wV)Lrjz#aa9eDOMW^d+hzTWgS0Yu$X(vrxjk@nGq1!=CLjy)Ry<1ze-
zXjp0}T#S|4z2z7zG*w80#~>I{?lt(3xLx)|&`KB4ql8l0RWIfM2Kg&TOvf3%GcM3C
zM0nOCB9?39K@+t)%&0OKQ6co0p=HcLiI<{D4epjB9Ya}>(K28*Nie^{<@7!wyk~2w
z=3EJ*Zt|5FX;H~u{A&B+i%9Wr9XprojA53M^a^sZ880)g#!_7zRGFu>celxA=jf~a
zGjkFI-XOMrkX05do|YWid(^S8!;p}!CjK)@%ETbXA=st{k+Mo(Mic$35VmhC-!2<*
z1F#JI#XZeFB6{o)`qG4r6q{I8^9i^2N6=gk6yEy6E^JTJT#I~+f2sncll&Cl)!=~P
zyj}n``fxmlFSSBmx8BC5v*KAQeXe=)W=W||Tpu5VahgA3wq&SkswPV8qs@0p@6RXm
z0gE?l@OWn2;6dJHq#R9o@?C@33b}q=Hog#=U)qB_sTX@8WJR`w`E}*CZ3i2L5DPDa
zi54!O++FR31A~D<*`}pRi+B4i$Hz|5VX@}8QFS3k7AS)SQ3mayrU!hdpUTK2Y4Ztf
zIG?0wCud(yD2`}T<#R{^Qr0E?iNYVisRG!m_}y=Mzd>l0Apw8Wa(t216VSa0wPu@n
z(N7xbVtfzQK6&x%Zi8@XMCWHJzVpyy;&Bt}h+JfzR&_E5=*<R|OI7-S>LDK~MHvg`
zvUd+ko<-9e%sI^UxxvnguPi$<kcu?s+3Yjy*0<94k7S;@Z(U`UelQ)E1Y&j#<_#yM
z;DG39yDRQGY00ct_#`{i$`xy^a}P+aieLd-&|c0=k<JI3?4ZAJ{~5s*){kMr7~|4R
zysS`iO!hv(%m?f-@$x3Z3+{d?lw8`33WM!7zsTa^x%hasi2F<4$^}xSVuurMDUJyA
zYWL*n0jzrAPpMp&qD+`AOn~g7(DeenQCzy5V2JU6xg4!l8Kc-^ALM+~B$Cujv^+3#
zpYxe$r-0I-Cc`{~f8q0VZvlS7Vt;KyPL4Y#3nb)NYBNFgrpt5UsE#W)(|@x#1=6}l
zI=pdL94lRnMC0U9oa854M3$G@c~=bSDj0{F<MBxFl9t~7?f(rzLP*E{ObYk=FTWPs
zayzQk1wPl=_m6Uy5s9G9J=$r84uY3lO>h>=YWpL{5Ak_6D|%eC6ht{^KrbK9=xZy1
zCQv;b3<bGwcithcY>YnzX+A$SXvShtZLXeXv`^Dki6f|4RSKkb8`&GcqG1N={zA&+
z3C}-ji7gwL3JgfYx7e9r2b)}?VVwBV$1)5Uw(2lH7X|aAraUPWy2BKVD(9Di4)}+Y
zx=torDBG1H0VOU`)-BYG87tpk>g??H=}oy`%|%DeIHW66!R(ZBHSY|Urca9qG5V#9
zXx&yvdk|59ch~>mo0fl#>Q^b~r-l^}ENK?nu(;RLTF7o<IODFhe~MZ!-#3^nm6CDD
z&CcXJOlS`DI;08oDyBJhHT4S*mTzCFo*twuI$hH#KP;3(-!*WfDWi9SSsy`9PT<(j
zY^BFR%3sL@TCSNIl5GU65bMXAcbq+BAgnrKIvN37=B*hhVEo%@tr>p@UXD)yG+|?L
z?@w$~r~ifWebI+ngM(x^U=(uN4~g_<b%`qWwY0R?GibWx;GtSE7~?L=qN4lfp~TET
zOhnQ!>$!0q^f-lD5avF0F@6KG)yKTZ!FTiIi?bq6!~&b0*UiZ=y7&AmqahbH0f7HQ
z8!t8@+;@`wN_#4-J0&=63H64>R#-swpt;kz7aJSIa(7<S2m3i1%Md0eEpH1*S$}^Q
zDOtHEUxN~aqaJ_}B_~ei@n@aumFY=^nV(kIcq>0!0`qv-8@bIZxE_uCo}^x01)AxW
za^}p6m!{<$NIVZ*d`qj;$&ZgcHP58Er$1r2**ciPq8B&YSbO;TKB+HkgPunvHHrGu
zNius@e8G2-Kc+zz2>*~0)zrTuw?THi;I^6lP8Lfq2nMEk(E);o)_vg8i`UXRjd36A
zKyy90c!I)Bv^5mPcy@Wf(+h%5=ET!>#0PohVw{DiRnnD4Q#PWHs8#DlTxS~{r`Y49
zg&G{oYovDSMsj@6A!?;P{7J>1SS}6@EY%>hpE8X+eCatM)BDNb-KMo1yRpe_F1eyb
zH!Y+fZ%PhQpmE`qV>-sazGF;3Q61Adhcca&7C-7K;>Jxhm;wgETiG(q<S8<WO8QjD
zqV4!Ls4n3Xd$7EsT_NokF6@O@YQQ5!El8uybK8_MsDC6s-Gi>_Ocw2@hF-1`K8J~k
z)MXw$PAc+u)FU^3`x<!{Kq&PwMtHv!Y5hK~kJU1L>u55Z-l;Yoaiz%TB1@@U$q$}R
z5AP8`qV|pvKYn#_L7wy#Ag-JNmc4*KL2QUplbdC_HB)mR1}ouO_}H-Ht8utjXV~Uf
zzYKqKKBzz$!)H>u4Xn0uewa~#XK;~tR=o2s{Syo0E-*=RD|_(4Yx=HpUPGVKk4Z<n
z*~<sW!0{1c{xcN6u6(}!u^&DUpa*vELF*88<abq*1BCKVJRVh;|7Nsl$Bx39%NVWr
z=kd!WHOF!u>Hu{CuS23picW>1{{%%bBu7I(k8@(){Dw=c+Bmoj-hwa6s>7<u;a0A}
zhv|r0GeMe){oW)FFhQYXiZl1jAj3C)+tr-_V4C$*x7o9xkh=tUS8oZjrWIaCF;nHb
z@J@w9N^DoPee;;%e%}9JDcao(JK3Q*0<5BSujzNIGs)e5p+M_nQr@y>+X=Xf-b^#Y
z>H_A~PsZQKoXn0)cC@)S1l@*BxxvxjY<znLQiVGMQ8Nf%mVh8f#E0ui89ncIN_xwn
z^ZvUwB6Xp+Z6NoA8q}-Y)5zc7YJx2%X9P9>m^@)?G+rER9h|$^1;NQWcc^$ed_!qs
zq0QUYQhjHO4KuF7u)WrR#)hCr?URMqg`jzo&k+(g`k}a7hdq`UAs=Igt0Hjpj$50b
zn7bXP=BHbH&}U!qY607!1aYbAp4^Si8L0V)0+4X76aCqm9M)+Y7Lemq_{lTU7%=U*
z1IFx9+f}ct$#)i=!H9;?+1?B(sCE5L!%Y7qm*}^RV-lKL0OlFo8)2X_v8t3TZZ5Wk
zXx~2$dQ`1Yy{9h3BX9@xCb9_t9p3=k8P4$hCkqz#0-vXY$eXvJVZVcc0d;Wd-b`MU
zGG<`P$9Mv-EU{E(^myFf+(iP!o}a4cI^QPvv?Afy^UNZCSJ+O2(i$m)JIUG%DEm#%
z)~EGW-CFy;$bL$(M#pU_quvQP2D!W&4gX|69TK@QQEA!k`Q%T4d#y+ztNN5o{g3us
zsWX)qSZYZ27(#e-GSQxDf!58L{8`?>qnm^G6Pm#uhi;9FVU9`Xv)CmT?5zOf7VMiy
z?3?Iu7SZQ=S*|Yes%O<PQH$#HW9AwL2YZpnakpRiftxecJUZVqW_k~+Vsb<~ku?rJ
z5_wYHGpQGa>PN5E@9GwO_bseBMBwHusmabQxv`hS8p^ozOw4_&qt`C9_o#TLTU_*F
z6Hk05%xR0Hc#`#eX)Oaioaus&b*FXN{3E|t%9SLDovv5j^u50-V8c*{pQtZ%SB@tE
zKyEUPKzw0+ralAm43cSDKGxMrwlp;JP@JUs*!+4JaS5rHIZY3mXpS*r^cx52b1(@i
z^215-sP`u%G&|yFyKFL|IfWbXs^<MN0L1+Y#9<5pX@lUJ?Kf1K@5+xC!&$-vnob6C
zF;qMyaTsZaNeb`SlZ4nlqFI{`>Fl3m_hWxAMG;Q<WXl-8YLP#t*%z0PIqWLkjTbn&
z$%3Rg>vL?u_-XX4WAczEos3>j>ROW~gpTjVMf%jYuEsG+2u**t2-u-1-=deK$)W=S
zi>#BNQ*w+e2GDtiib|!uY=!VJ^GS}w;(=YQ&Rl+P>h+?e(U_Q-aJk;i=DRMY{<N=5
z4b2~K<p0`HOW@Lq1uB@uHF|!NT+TcfJLVTp4KVw0;P1^q32;UVn`?K(d;_7sMC#u(
z`edrfMMSD)HSNW>rC)~|vF(Xi0Q>Oz+CbKeUdFxId`BO#@-eXUjV$aW=M>q$qwdq2
zUw7ioE%3YrguG4na>%z1ykja*JW<nSqjODN)#r&%&yJ}=kU=WX25J$$_oL77OR>qp
zgp<RZOGog0i(l~?fnO8O>goOxZ=g57*Vk$L?b5R`;k`+lxN>g8#6uus!BA0w1AUbn
zJXPX4#YNn91_WRh?bS{}OpyL$NtSbSn&(oa?u~gNO>mcT(9w``IF^`WE8SER=?Nr2
z7CDcCyhcRz=Jg!v=jskalZJeKEOf5D|Hp0)rr_WvcC;AL|M3~uaYcvo3HdL=gkwrS
z%!{g>(;ZqGd@|$Os(oP=0EFH=^f<NS9wT#7gz<Z_9*4DI4KN0|8JZ;w*^fCfXV4Rx
z9{u$9&g<g|JUdTD3N%W$GOk>P7G)20aRorpUD<fVm$Ck8pdoSEU6?^?QMGs~wr-%J
z{}iQ<0nFl7+E(ckG?(-dhqfn6u>8t)?LbA!UeZJYr&{xjC4x)*b7ppc3F`}Erl|PL
z8|tP?T4lC(uo1N7C@37w_hKmN$-s-sz<j3kg1Lab&sOvCxFpb)d8xe7=%B{XPgsr^
z^VV5yt+dAdx@^?^Mpr`Fg7UOKX;ouhmM8mG^Ul=n*eoaB=kP8KH`{f*S3_a99{z#C
zMR(=p^Et1R{Yfc*X8#4G6~aNSg`)3^9~#eXI;NK8b@bMSNzP5}A?}yk#S6Tg&=uOG
zzvqH*O&gc`^#$K%&SWF@Xn91`^=w%i<Rpha8Zn-VE8xN>e}3x1aR)Mfw(y#g-dJX$
zX7i>RXq_w07&6vaR#6gfS6J~XekFgP`SpCh?Q8fdB=Dfi)%QWKsc&_R$o|*XS`K7m
z#@mBW$9jn~`SeXYs5?5)Y#*Pspqi^An;9(He#cV|yqQAU!J~6JHBlTg`teu4HMPL9
zyA8CG8!Iv{3bST~cBM*|bTTxE()1`t(Ygl1ClfB}hzZ-EE|bdC4Zm%NzBK*)qxT1V
z>^{c%ACCzlGh2-t-L3Ay{I+cve(ZmhlY8Jc#zb573nt`ss85+NM^UmRB-6f@q{>jD
z=_upVw!%e7kTB=jgxrxbw_bap0X(GVkk)aKVk&Zc4{zATa3x{9JOi%L@lQUKhw$ay
z&!2NR{zNB=)K%o(t6Xwtsz+?L$;vY+_Reh?Q-OzlF+0cY7syJmXv4#)$qwVgxxL^C
zzMB*->}Ayt$!ri_)&|CQiN6RtVp^`4Zfy~E_)k?^)RZTGdBK7>>j(9A^H!Q2zC~8|
zE}v#C)sPP;3B(xG%$d^7$QUb_4o79M_^;o#VYuH1WN;W)VcFy&)m0W2d;#Rh_(KdD
zmCkapK<on2O$vZ*r#&Mxtk+t(g{spsI#%ZgSXML8$|1705sT)z?bi6M8Hk2eYVmw2
zrnI88e``RjAst8y^E9dT$jyMfQoQ;lxMnNJMN%N>Z3jMt@;DJ}PPQjMbA#;WJwaiI
z{;LD2V$Y~E1oT19tejp{$9I`*5QVnRg5#5{km|3r!q2|o)B*;~z9QjMwpT{K*E#s%
z8sm=FPW^<dBSV+V0p0I~F3-v@FMntEE7u=PEr<tXNHVJDbjLDS0%#l00AZS+9M~BR
zwtk$M1UbGj*0)*So~rFr<1N<<D@#x&NV}oEpi92gFjFPSIyu^5^iSWwnlWJmR?D4e
zXM*21bSw|q;o%C*$(}FgnaqNn5mPW4?eV-db3H%WSRGPkA%`?RCDH=$_WB03=sgG+
zb7nu7_3&-4RO+KFeLR1*7crvaSHw@g;8Wh3vC}OT>+}65iG58ziv$3_8Vl7tf835H
zuFr`V8acs$!VuV4YP*Tex+~Q73iaHYL9|}xVjt*<>&OAFpzqyv_FiZ!vfS6caA&4r
zk3f*Yt;J&u&F#M2{H&yzRror0b;zq+QpRb}YD#HkYx8&IN8|r!=(Y8Gz@0PnGa1Q$
z-&)%wlq>TkTcEO1IewMER|<rp&I^fEc5VC6H3)bf2$&`9Bkm+|A6mT!{T4nTFiABa
z>{F)UB7Xhbm3}l`0Mt#yUq?~EQIk_9<1m*6{lmF=qoB(;Q0b3Z`WMj2+5vLn)0({$
z|1OmOdCm4^-rVBVGRyBd`3EHZIhqF4B7f)m_Wx-6zx?dJyk`533g+L$@TY^{N&5dc
zp#MuH^!bK%s)&;gKlJNsV(}J>%%GL2IH7S}6wg8M^83!p!dTU_7h)@2rN_qJ$<m$r
z%QoF!(stZjn{LhWnFj?J{)X+lu%mdJe^#nRsPu$n_tOuRG>PzkYp_zCI$F%{7aMgB
zYve?JG#5sWm#1@8nm~Wpm0zq#RIst=W-i@TkhmM?qW+LF{{y=+(J^b<dHMqn>$ax8
zTFdegN1gctM$25((800#<geBPMqOs96+W$Qo8&^-<*xMcE}IY11=45#x;6*HFDw2O
zTstr{ifBjh^uwdJR5H&8*Zm?^eye$C3!Y!F@;-{sJbeq13CHAP3b6bdmDyv3I^be>
zmlq71dR*I%fJm=>(sYq44OTjPUUBxk?z4>_gEKlZvk;d{D}RBkzL9^qdTUgG8ktO%
z_S``QE3LoxGV{>5URp0*8J&+YC}ly88-trN%iv=N{nEcp1gl2KmSqKaWb<wMs^!GE
z)O^AOeAIHYvb5Bx&3~&5&WIHMtT}{%HHHmg(vLMtWy?20x<B64eJ;ya4TM>Q;f$Ut
zeJd9N0n;xtspK=&zS5SoSQfn{qa_k(`Sr+sL_?ggEu50lz}#o7V2cSUj+SE%Qa<F^
zy=J_f>MNUzNeej?SVPodv8hQx%pF7wn>aoplJZPh6yLXS;E12h!RUpP8Eafb-n>UJ
zJb!{W7`~6{!%}!)G_aXJxV3W7&eaa318~1?wkFNi8^{J7zN{M5E{ymRq9^t1UcV5q
zr+-$rAALPz7VR=B+z-AIb^maZW}hUn+NP5TC<nSUGHZitvFz@M+~`X4u`a)Ue81L&
zQOkaKMW#J@sljM;>L_BHEKQ`QWerg#WaGbcD5P}jDRB}0DDI{_bR0JrK30Kpns`xP
z;DLrs*3TaowfrSQxAMK5bjTW`7Fwgy(r-G=Hns4o*_o)&^2JQ%p&Ps1`+DaQfqaOh
zPsx<`h=z-~PzRCWx5XRk_pF@p8_rh6;dH^i*l2pu?#b>2SuyEem}_S8*7B6$hZFW~
zO{oH<*tH%3xJYH|dC3L>4mR2gBWJKbN=_<*JMnCc(BH*}zLiE$mCpbx=x1;dyyDoF
z@^G{?ljp^z8+r|~eZ!vOw_|zF|78#Aom^LJhUGVKI&mWg1U=SzTIZV_I%ArNcAU#R
zub}pj8?`hHs0sJm)YnQBMT*1FU)u$o-*dp+w?2&whp6!i={d9-Ukwc-q#k6D@%OIF
zjCstU!G2qt2zS$sPAx*(o&*rHZOWgBj&(2s&kXBpUF<t>|0x(&SSGcvGP1c>0o$dQ
zxi%bmN{E}NSth}#`!*z}6F?B=?cS&&iwY@99<lEP1k>E|Xcu<=bXOP-68gWml`MIn
z*PBIxKh)arMiF6MM%nUs#>kU+b^iO|Ouj}bx_CT<Jc2bkfuXi<bR@>vL8K+`uW>g6
z-K^hvj41&IS-K%e`NxxGjSw_qcIs~IR4ip<d=33T(?bPg2%s2E_qpM5u{Lg^0)@nP
zGyjeJ3<DxYt#Fd{yEKZt=*JV5twc+TH?uG|o4kLR@&O{?yS@5Ps=zxQX9Av~mo^8w
zH;UNhJ=B!^S$Dn_TM6`+D(sL3)bScy?my0|!V5Y-idcy$)U##`<ZcAUxIp;$xP%lK
z<ALDsDm4I7+Ahafg|}1F28_`-6g0WWxm79EVnJE7u8E}+wtr*JX&$gs&u@A4I(MCX
zjofD-OF5wpCNsCS?WfAZ_}_|7(quIT`{1<nlz6wRg#F7ft<&I|D|2szJ-C5qcYWa;
zdhd>+><Gc7dOhKdk~cx?&j-k4{gjhcfU4RRYcA{Jb#dQ8Ci_3-1fDikEvki+Fdms)
zNRaU8H>wB~!JTFq)=M!U3SCCJVKL~wGUM9r_dW;j<P}kL$YiyroT1BShJJg<5H%rP
z5w+xEH6M@#I?99VA=>B0)hK>>G@n1o2l#Wmu5)$y{Ir{ge}jTAB!id*j$26@tbXXy
zixG@=gU<H+g$@`7-IiMXF|z5lh{xkSGen|aE){DvTJ+TZXnm;qbv~x!!KwB$V6>rR
z;bmjgD&v*_`KQb1Hzp~Q7s~p!CtjjxOb|Z-?-oZw@8v7Y7Q;tFg~;;85EkS`c}Ruv
z$g7D>?|%gN5rk1NJqK!0*Lnnz;vHYZ@yGxHn?0~CFOY}vJ3ik{A^eSC>hS91J~piX
zXrx&<Pkyd#e<?2YxE5$B=D0fM$3y0MUpsCs3nSuCB|>lIBlHP^#guQ&@OA^aEbAZ@
zpwpS!_AEO$7S6NHYn*BSQq<6XpsQ@Jo(6{4=<^x!{Mn{X74*(*%}yBm3q$mstYPx{
zaGSDg3lvz=8=N0iLe5QdzOj|k;5E1DfrEz?DgM+Ef8201e62E$Nr{!`C7ea>eUk9%
zZS+|Brs1XF!25Q}K#H&kzB6h!vvboOc~UzdE9{WrW6%gQ_yi^?s)H?02Hz%ZWEHTj
zeqSsJT<P<u`qpn76C$sC#T`9!y?qi%eBWnNuXdv#J&dB<ad69D8^>QS<a{P~cDt+{
z?=l|g;fFalj{U3C#*j%f2?C%~Wr+_x-hil@$#j4FO#s~LhHN4F?@hQ$@zCeN%D+o(
zy)1?Upn1+$gg~#k{JX?_!xi_3^EVB|rWF9mF~71G|DD#x!FziRQ0i;#@|<bZSr404
z<qh`qf&MEGY7aRHuM~-p{Mfvr5<=#=`7jlD7CU~KgMTw!MZo%viL4I1PU!fa-@5pH
zp+9g9QmxwTI?$B5TJMMM#^s>RKYXy#3OV1h5{&$fzF7zNFU?-ybx=F*FzXym`)oC_
zDtc!!XD1S$J!_bzz4u{*X$aYYFIg?x7g*^t#XSxGJa8oE7o^*e+MA2z>sb#a<LI}j
z6@l6O7|+V$2awK|pV}}H+`uz?&n16q!~;A`ab#5Q9|q19k-I?Sgpe|eE4cJ|WKXZ!
zvK8VW#km+uX&z+>;?g)hNfYVn&6Jrr+L{pfPAC1+{lPK|K{spG`4-n^W#!kGL%=gC
z3fw*j|L0lMa4W#3#a#{l3eNy8kW$RBTAkdH5=qs7JzyyM)~j9J$f$X;*rGQ^3e%Aj
zmT;)~q}(5s_|whr8sQbzYb4jSBe?%9JMusAKd?B50TJiH1Zn=*=-&(W-(r+^fHWRc
zNB_yG{4?wS9Blw$%dmM$q`U&&?%OcHm$uM}IBF|w)d=E!=95S7?A&;U?Yq)=zUB5h
z+4G$KpE|*n|J{6B8-i*1Ol*VCM<8M0Hwhk&gim(5<`tL!(m5~a2_Zjv^`+=(Ztwa~
zZrig<@Z^7+c>6}^1)!1_zGYJ=yb8>@gwuh}cLep)KjXVXtp;fKBO2mVCv9MT0Q4<d
zwJdXFa)?@XWNPox#}L$`jSXr%9+~|9V3Rw5KTPD9*&xR?1fzqfwD(1CDgTknItMUK
zQpF;$J)<J{oZ)JQ_xGI=IDwvIagzHzAF@UPYTU%;E~6@WU_f{@M8-}=U&buK0PhEI
zV*iXMg9A_>u0;D2CNd-x)!mX1+xXXLxW;b8=DXsayk*PM5vL=&d1+s=zOnL>lL4};
zRFQ9=cm@<9{nZu@2js+)ErXlr#9cp~0c?m~pfu!qJnJ=(IW2?wD<QUi0~jbQFH7o3
zuo%u{Qf2OG^w&U}^YA=UJpQ`H4bY;l1fX0i++3ci{0p0va>*f)m0us=T{-&rCFQ<N
zFHo&x*>OEvNftj`?Gsk#tJf2Wx&AM-n+BLksPV(`Jq=)OxNX+%*%MCd1Ex#FROpVm
zWsIS)eXpRF^!^(<!Dk_7i<IC=eL3bi+Q<+!Ro-9N8uV9UmK0@{piyU_a<KZs5@pgv
zSr9s_3ugGjvyjuwehxg2*nrH*r_0RDC5@^NV5^yY`P9Ir4$QA9SJuxLf`AE;xt(<w
z+y50{6`44LAVJ5*hUQeA6q%G)SO@Fz${S^afT8z|7VEYl>;T?NCfx(~XA}OMl%KW`
zc`7gDwy+1rwf1kZg_Tv`NbNIx#_t2Pv!qLU($@4-C^T;DQ4VJOhIQ}<!f6BWPDF08
zEyJ5M6G&o!#R9;?BH3no8Cw3U8s0`JCtWqibQBooZxzR`9D6lxj!4hnYHwSp@=GTR
zm~j?_2ny|Xi{fi|x48$*8w8Y)Zx(lM+HFk+BvZ?u#mOAM+bjoD$S#E7zXCW1wV#N`
zpH-Nnk#X>^8dbn#unyLJ0m{?b9<mMOf*Nkg<d;j)?uKJKas!~j&|T8aXx2u}eW&hJ
ziF+hP<huB+oI_>mC}obctCnJ=3`S+p{gaNlF|Wc|f*V(3omfg$skOl`7?7(s0L1$o
z$azu{Oqh9TxOXyv-mIB3aJW5C7ZapwfQq(Qe>HS8ME9ZJbmyriL}TXt)|hI$c}gb_
z6Yoc!zmMc|2bg*38K8Ly;RJwc7}o@lu!Pq@-O)e+AdQ5I^Rjr<ca=fxp7gy<HxKob
zD1lZ%`1x36ufCyqVwx(0Hp$+BR>o8)RWW>pcQo0;zw0WU(09j_#G{XqnPwYDh5FOJ
z;VL)pjR00Vlsa^)0@i<Vu_hPSC$XOO`WvX^JLdr|E&=K7g=v<H-e0J`3CF+|nu)?V
z>4SGuR^d#&7iWjM!dY9>0PCdp(h~awL!T_4Ly>pPOI><)JPww->`TiVJXvhoOlH=T
zj;n8}brWy8k*eWx7Q1et!D8G-9~c<4MUu@tBdW#O412w(Ml-QM`Zq0tiYshJTR=(D
zVIV_lf^YbGTf-QlB{a`h@#Vp4IPy7J;OSy`HJ9!-?VYNxZ=R*QG(;#K0pzyTX$Ic2
z8x2uKllNz8A9uwS)z(&WE$S&5gXfOlkRqXGNHkzPuX<0^T7rE#=YdtixGue|g#ao}
zJwEOdo;^qf3K4^cIH5^oi~-x#)rXb+?Z@E`Jtu8M*X6TB@yz7vI)+^G0U0#oPs2O(
z1#TU8R8ERym;!nG7tomsCLOS;<q<#ue7KGEK*vfR{9u2rAvVP!X0D}`KUc1WGD6!u
zLYE^-3X@s@X<H*_-E@C-x(w#CsbAIvszx-W@+&HjBQ(_kThe6uVJ0zKledwLX@HQ{
z>{^hO_u}F4jGOY*Ay{v7=!d43h)Qzl8yE2EB0KCn?xD{~QEqIcjDe>7os;4gGn(wY
zrm@^rjA9ISy@`L<t!2MsG7$A>IiA@mXlnI5Tb3*7QI-X+Pq^PDrOl=@mRu>0nJN0#
zeYVp>2$pdY#{U`s_)6(dmr^lu8kng<`CL1He~whzAOl(R8c_e+IiE=`J^TzQP3isb
z3jn9_O5uu(;pWSFNT1luJ>T6izv<TG@^763xe)Q>&gX6Syj+A+^<s}T_ZEIe2~hNC
zPwY6!y-L{HrwetxJ}B`8o;m#BE?l0sLMwsXe^c$L>RRiJD~!*;0)uqJm?eWWHr=**
zG$#loDY>f-Ms>B?eV;+j<Hq2CzwTV!!r!r>;Yg(_f~Sgn7RyZy`3Yb_YXl`0YXHzB
zXq|RT-@5`nC`VrI%dF)zWr=^1hhnYcP|iZCLng^3xAM~0g$6$sjjyW&C{A}SaLZ*6
z>c~;Ac1zNK?J+m4B$-<Gv;s`A>O#W;jr5!82q%_izAPQBxiSYjLrVW#Ov&OIfPX>8
z?_`V>FDF3y;NF3Xj8AQ*-R%8iZ*=`!_hZ_7g_-^(Jxb2&C+HWjx_5nBb(C|y=s~BI
z&V6d-1;86n9s}g5^^2d_TL+y!4psCummI`iyPYW0fU<oHbOzTRGx9K?`)=VIpmH|p
zOluzcioZU-;<opp_OuiMI%q<;q6HONQWpdP&HGg|$PG7?Vk#1kc5y!|^SaP!j8Mfi
zC%k_n4<&O)U#rALnxPC#3jk#X=&QcoyVEYvxu0}(kfT@wkSoASTH;v+jSy_QwbYf4
zX+UjddRhSXbm|`Nd@yP!_q<T7);1^T-raa~>Up}$h^bc^33my)*PLo=?mHJG(DTUr
zc9sq}V%7ESR)N6B;Ibw%TyPP~1JWw)TZ@n~wB2Za669#u%IEN>z)}a&R;7HmK^CY5
z$<y*z9Kd|$v|~K|^Pb8*Lf1X}`pOYvVu+QvlK}qe1<=Jg9YCXNuB1iYkI)*Wfx(78
z1bUv3?h!{a(Q>NIy>D;HUJDOM3%Art^1Z|WS@rh*jb??W1YL1+Id4x1Pv_5Ef)4$k
zz8r5bnNKvm_+lR}GqjS0hi^Pl<#$-QQ*2dbGMpgg2Zc0b27kA|Bl9q!LDMrW2=s7F
zJ`&zSA9TbJ@<v`95a2KU4d~DFMnHU<7F`*o{4Kw}RSKQL@7KFDo;{b^SbdB}mmHZs
z1;eLJ&Qn|s+<mm#fO7Wz2G$*$`>|;(lGT{z^p!cm2yHF-f%1_Napz{V7?t)YNFJ=#
z4M(+J)Anet4{tJEl-wsv>_B!fxIG36i-O=&pdJ`z&T~2vzqFbDRQ2h;fAC|TTk+$a
zxxsIE*5hY`$JZ{sE^15;BgH-1{?J6{<eIA~4?<IO?uPcp$MbXpT9Tsgh`R~)ddE%~
zW?*!U2PnDmurheT*K;#zD_oObA-4R4fZBbGk_pu6xKhP+-dK73@kwoGj_W{;j?4ri
zX4}#{&i>9^{@jm-O>=`eh4^dh_P7-+H<N>TE8|MX3jHgz1|-%#tp^s$!y%)$idiG2
zIyVXiTs48TVm_kcyfn<}GqDEJ7g%;tn;DxBS2Y#R>CSkzF*%S9T7RO%X=1p9Yj<OD
zu`Rr?VUB`Mh-0wtJwI}s{LmF^+-8E?zU)t+7te+a;>^+sPuOiy!CV`0$SBg^AEor1
zj^pxy2tFSza;NMfEvJIDeh0D%hR>nI=U-EwG<yx2zjs{EeB9-q`o0<0OEu^b%;#j%
zhEiNY?1%*=v-pO}`cQI5feRh3UFs~EjVWNV5hvCI5S%2oV9Ziugi^x?ceTHdp4qGS
zzHyW=6D~os8S9kcXYj6Ldt(D)*BgLA9sPdgKcF*V6{QFucQLA<5?~n#(D|ySVH&Xy
ze!gp9Tmq`yx^XcPSG=R-#m%wfJR=4dtv%Dq?0{FX8kGw_-)m3{FsU4Iqwf|%7M8|z
zq@InyoLZQW&}pgL!}E|wD5F;T2@>SQ-ezAA?VR>Q2_NA-*+Oz;?4$ho7Asj}{}>|b
zge{Q2d^=O6KbeQ!W`U?MN7isiCQG9>)VvE|51K7<=WK8(3e8EAb7_8BAE1DdAEE{)
zg#48w)y07-k#mrY#!vX5&XV|?x2`iCR)!mS5x%&mfPMn<-0UDb5Sl>LHLJ+yA40VE
zdp?hBR`yQ?i$_VWm1-9HkgQ5wV#m3=Sww+YNg;`ft=<Gp$-9k!@o3{GLc?Aq^qfO{
z4&(Y|E_vl~UrJ_5fISf+fuUuPxH6A9={kTRNes~cNk)xwWKTu;Uic9T4-a4F7MKfT
z#a{#w+KLElX>DzqpaGPqI-@9}v1d4>p*?n-m<XQw<k3M#<2M>()}nle%|1_^uzCN)
zv^>Xk_56X$OpFC&vw2Leuq0bYJdD$gz-O(+b*n(4uaSyAZzPs?pw3_%@uH%M^Ke?;
z#j5Ku%19@#bs$3<%mt)Mq}-Yg6Dq<T1~Qo*ak%-93N_yxNq8x_hdd@f>>7S9l9gfx
z2~oRjK%lqjMVhRJNkO8lM%CZU(LPl`Ms3IMj{aKP=uF`ki3e}mAS`zQYO`1RJJFUH
zz~QE4GSW$UJJtcjt$r*e`_5aB=~(O=QryHm=QFwFCQQKwmG(wOh#J=z;_PtEP<pH=
zq-Bd)sJvB}9MC|+g&ioO;-${Cxc~(~<9!KG3UxfZiOo%tiClqH_Heq>_GI;tXlYM^
zoE~O)o*~C;9WaX-%@wb1)$1HIo1sSIZ08c^Vwc6m&O6uQwp{0$yw;Rp6OEbNhOK%X
z7!94FN71Z<c0dfuLngh4^jJxH#kz+qe$|UPE1Pbd#~t4(1|(>zA68xlKbN}zP}S97
zEDAvk<hI?)G&ixswt{iT)q(kws>l-{go@dWs9-@ejOXXhhs~Zn`D%@=zxp|-EwJW@
zo}8O=f-5VYx_Ivp3+zV4VnzDWKH}g=Qe5r^+?gZ0ps~Sx*)Qy=p*98$K^_-P*ndFP
zSdqP$F>v`jU6s#vV~gL}@Nv`QyJ`k@hRaE&U3W-&O77jA>$w90=tS{{;8q*Rl*4N8
zC*ieE5^JFTPXjF$@P0E$>Ka-aK60@tHE>v@mpKxGi?5@0cR(vsRe#h9h%s8y*MO>Y
z=jY?`a#Pq=z|s0*MRF<tj+$pK>{F8frX!ws2B5>E8cV=T_)ef4E<M>OP*v3dNLmNZ
zX>#D!fJ?D#Pubse{fsmKL4d4m>R7}@9;QU^&KM#~?Pc%%&PJrqP>$k57>6&g)IGQM
zfN&xue(xCyhW)6ksTzdZfku>9+Fk_!ITb+66P8BMt)gszoiW9I>EaGgE&!?wnY6($
z5LYU7*hGD<Hif!|PK?(%jf}?MQLo8tHHZ0zc2*kKaxPT^n?(YrJq~2dqBnrdn7UN*
zUAeQ5D3tj8%iLRqoEii-xU|bp;8UbRkToid2-#=3AYe<3?u>J-#`K9fun}O^2LQ+6
z*gE|tX@`Esi98<Mz|`1W%z3=`O-sL(#8IGTyKT~Q5-zhJ*gO7C7Ls?40x~KAt<X!}
zQR6jF0Vi;w29Uv-Yw$-thgx)fo{iwjOld|R&x`~ENp;DA;H8cjuaWl-9;?B>0I+x|
z((Qm1pRMsTaafNW=G0~rU~sFMgVD(1{tPLYQ;!4(%T#8v%WUJ2j8d>$yktbSM;v<`
z58`nNGOUAz@e<<dT$Ue9xpf|3<nYil8o2hT?ZNEVrqah?A{OvPHB5H1ce?(C%)}`W
zaON@j1Dw`o)MaexG?P&G>KvsjSf{h;rai}=r4xj^qlU(aA`O|-rVuL!;1GfZI2IF7
zO9lZl5FyI)7#z}O%k7Cuhy(ly8IzRn)hFzx3%7OCJe(80M98@vC?kgR)!FX=c5bhb
zNnoQ(;p3ALV8g*kEDYc+2r<Q<Gys^kFr#PLxW5V5Pu4QBDo;?Nk!6An?lI^;n`N1C
z>hJ;c2>4#^Rw)MJ8R<I+w*gs{^Yb6?Z|{PhCG(qaj~MI(#xtvQK7IFg9Pwm=b)c>n
z+XN&8>KqI+<NO}532>BSrXNx^35X7;?(9_ndmv`K;H#Cj1+jLh=)<>X&V)&PS?h9{
zA(56G)1G`&8nl7^BLG`6(Y1X+f`1@S+DY%*w@NVjMLutlS22+Kf68lEp{FH-{8(Ft
zVAG@G(mb+NfcuplvAo-r3=+<!kX&a4NjR^D;8SRy$4EGMp^kUPfL%PBy;`Y4iW|^_
zZxmkRTLO6cP*-!{QFfq^;jhdN8L?G7OikX<ExD30z}Oc90Um=xMr(DXcZ@3@jhF(u
zyah<xSOei_+3?es4)wtcd3nKxJ&!N{=S9q1qV2-SO@;?PD~X0BCiTz##%nM0ceg@Y
zgHSw7ID%3AYXJAR>}cjoiSN#ICen)RGC$A^?D>;PN3z5`-~ZX3>635v9@P)5YuTNT
zk~rDZbxsKlvd-=Nu<aag-fIK)Xto5&pYPbEx3&pUnbTzh$d=VcXUI9Frw?+>Fu<28
zpowy#oJMBL7znX@@x}tN>BdUMW)Q8EqewsS`RP(zK&J655Py~cu4Q#pA0+|WH5;q3
zQ|H2t9bDWqwOTU;*XF~Vr-bSOf}<pCBmC#{vxAjPg_p9ci|^{*1GN}1g{0qst<SJ3
zSMeMOF&8A}4`g<@=XAa!{Q%O-=m%xy4SwZ}A%Wh~aAyA>Q**80G2qx7G$;C8=)*}u
zfQH}ky#xyG5+L6tab$#kRXFY0!<VshPN<!aznqOSI0ysotT|R{IMDUz9s(};p{3w?
zu)^c{hQT}6PHO>pF0TcPqV-dlLPWMRa=7`boLmJm1S)E@4aOPG0M&KwuxtMY9Yxx*
zi_D1V8*f4@E$FOgRC?HBV>y84?NIAYYX#N(uuB{-`xMMu38XkU`1D%r)=yM7J?{i#
z)kaLQSh@ou2smDx02$K?7VQCcP`Be9qjS%ghXf{<IoReJ(`@<*;Q`a1ZbVzHWmWv}
z+@VmKXRCwR2>PK8dIW?mvf=qzj9lhkG&av?8eGyoFJ}OC1W($cRXC!?X~hf7IDszq
z7PZkjC+z!Jmm`AcI=E{tk|qw{2`crFb#goHd>VE(YFX?csgfMOyZXu`Ky5;{5E9&M
zN+|#YXm#o<XGZj@<20&ea7m-R@dW$6w0xk%xys-3xfm|~|26lOVNtDZ+k&7V3Wy4_
zl~8E}1Vp3<F&J7>NhL%;Kw7$VtAKzobPYXp3k+$|B_S;+<&a7`#PD6?-p2bp``yR!
zeZRgx989fsuY28bUFUV4=c3GVU*Cu;tqq?k8}v%ck44tq#bqS#9lqoA*<6i@W)xp{
zBJFK?rHU&d&LZWJ0!P9ZYu9qMckS`0Z4j<9SORR+JC%m27E$Nx0n{bqPG{mmvrVQ-
zLhoK0$?==!^z-uhH#5D(BC5^mUQ<h8RHgBkYrcI7Oe~+D%)9KFIx(NWCF1nKv}Ej6
zIF|_^jR_O%eig=V4`eDSZ4%09*f)uyl&YD~rimr54T*GrL}K;lTPj0wy&Uan9<iHN
zGmkg*YS(nUA7y>0ap%p7o9i!d31~0P5Hh=~>nmc}E025JHyc&Aoj!=FP@T0k^W5E6
zh7erq(^RRFZr$*F{pX=}4r#zo6v$WQc(kD5r6Y>1pKI2Kc&$CV4j|=puYiKCv&0p`
zlv&^gE$8x1`ITayZ|S>t8-#Pa3ZI)@g{7v|b)w^gQLUa+(!e@TXSLVYCX46R1qa=%
z`aUEsRgYAizuQ&s`TAX2>RxHzJcl-CI3w!~^w5&GKhP>K^v@iz8%b(e2^|A*aJJXZ
zW{1NeSB_J*vcnr?n<VLBqS&2{+Fg*_#D<r@YYbwXFvZ3u%9!#_>CY7`YOAa*jg29-
z2`o{OS<CwBA4;R@(i0ziqU_{$>bknKUS7ABJAsdWe5U&n-CV&)RE+tM*TcfCDJZyJ
zMixj5H9KhGKM3a~&!3;f=P~act`v=k6aX!8L<v44&PF}Tz&S=(piwnJTniaX!1>ci
z-`5o%1M;Rq-MNCa$80fa3~a8`Wq^ioOZ};+yL7pmE>)+%P$7hBV!mK+fWm&-1U%aN
zZZcgO-ED&hx$Hxcko|KG1yc?yrUb_JudIN|(5!6=AbqK=t&k0Yt<=xU>&4zlO0Doc
z^Px4#^#K#co~}Cd&HWdZ6;a2IajIFYiRRMa3pWd(2&x}*|K&S!o;<>zeLaP1%Tnz`
z@~ph#6ng0?>`qr2h%9%~o-TpS%POhICH5WS67ba78grzUC@=jSS-!9>jXm+UP%eJ0
zn{>oGsC1scA<f89K|<$+C|RH#Q`*x#q<yDC=XQ#AtIiGEckN`=u?NT;vM}Qwd!$5R
zdZek0(84-S*rb1PwzKb*uKZVzvwF>%jn0+ctNkn%?{bZ%50Mq>%|=h8i3FBvi-C=v
zhLBhCN!u6AnL9z2ec4(T!)EbG5amd)_jlL0Z)oOd=SeoUdKSm*qhX%633pc%D+qdb
z+JAI^S#YHGt@SkjU~Z}EhdteT`>T3%J>r~}2eD3bsM2o=uQV_Ccb{mpJcMat@!-=f
zz0X^;efzjs_q%4)sMhlxsyowD4RQqk&<s-o((l4;|G?M@r?DY!rjM&Ev;1@AyXwoW
zch^OemZ_N6Xl{xM-r-S4ch45++je;$%vc;sTHZ<>i_C7a|Co|Wsj0hDHfN5)qh)TS
zHWxCGQm`o_2p3npZRK6aWH+8Tf0%ObUTk@l^ym47f@UNEv0@Z{mWD-Ia?T97q2DcK
z-Su^|y*a3H|ISNE$b^F$C#$Y+geN#ZgAlM?R4kY%I21ZC>E?ZMl_}Q30Q#Ic_4KjT
zU6kZ_ILXU&D?1Ld6pBe*?<wi4<?56k`DU|4y~?i7PY=IYDY|#5c`V=kphA^-xOs6*
zMy$u}+OF?lTlcb+m#v@l+7160E}lDjBP0>kRomT79i{g_USjvp6rSQJgkUdyt5ics
z?LEETy^jao*bJ)W!})}dt7RNr`^MRU))gvz56fmehow;A^MhvvEH1}@Hu_BZUSmrr
zV`vb??VE$0Ui4Gv)@2b9ay#kAX`6I>HscCTHlFf9WRa^#ESyhgQAkf%hiz2{%Wi*b
zgLHH1MEY2;hCt1#`jB1m>EU{m@BX(H%dk;uJv$*vt$ZCT8_IuZd*k)yf_i4$QKq9`
z9XU<t!ua1P=~UiH*j=fZ2-fJf4wXXcoF~HjH9YqXI|K|w<@*mpG<s;H(U80Uj6vLB
zIQ55%2GK)v(Zos?j{&j5s8BmRRq5v_$o$?@C4DA*PC%8!HJ@WNdG>L7hj3nA`3K#d
zvE7}dApqa(v9}UKrrtzUBnVVa(v|C-t?UmMXLsW6VH@-87V#XADnFTH^i2!<_G!03
z-^I~0LpWqqb-$r|o*T&47WxuhcV1OH7zrfZ3FF7E-Vjj0lJ-nE#PubrGO1~vxf(=q
zvO{(jYvcM!l4kwn(Lz>(09@3=?tH0R{l!MdHqfg;iL7m^MEsyx?eVod+GSgpwbCIW
zaDEQ^v@VQ*Yk%!UnNhvm!kfI+x(`=!lR%M^g=~<UgRr%sXRbUh!3%E|FN>QoBHW4)
z)No{=^lil1P@$N6g{{ygK#5VNYN&>tU7`E|rIL=Jt{dd2@iJ^vAqI>L2tre&R|y2#
z8}%k_3XGiZcK18?jz2^CtNv=xGCii5;ljiLuE_HHk~-HA?9A8Hf6>wr=v;EgD0*;L
z2)5r}8#G~THtxc>r#2%49eLuBe&C{CKjT47mot%qjFiFAFA)n&PNEeG9~4k8FS9>L
zt@6CTuQY<S1ET(TdY|VePC8UJXhj!|6cB8rFng|ESFOLMTSBHx7y4CoS-ZScs18Ix
zckxS&w|^u1`jQfB=@d;LDW23rl-vh}7|dkIXF-z&A3wwuAnNdDS5(|c^2w@Ow{+bb
z55b2Yd768_Keag5mzQ2&4#)H>xRekxqH2x9<@?G;nl>C~65aE$AOAdoDwS-$fTK>-
zS{N!TS)jM5Vpc@#K*8_+dW20wKFFHo8_kBZu7xlwY*-~&i@x~I+|i8Qvr%_U`<8zG
zH7MzSyp{ZlOlhZY!mTo|acar$5+`<>M>BMSR~_M%yzkH-Y1Cqqv?2hC2DI&EFsOmb
z>MY+D+vG^Ek%WNVNY?;PFa*&}mX^ciI>2u-c1#G#8w|Nri^bFH^5S^rDvKTt^Z9y@
z5Ya|tUDrfohz@7`eZQN>a4R9++o@r<<WGgKK9X`@zbAU}OrGwi*fj6?ERD>Dh?BVQ
zC2!PIXsVeyt^QR*IC1sBMT`<z0I|}ELXtQzrXM?3r$(l!FxN5o=`fnmFd@$)a^}`-
zfu}Pm&x?I46l7HL=PvCNsr#G~=;!inN!N8;z`q-$XY1Lp)+KtH<vs^2y1OJFMJ4oY
z8ib*r#nSbch&eIHS`yxKBtEMp$7Rohw}|7u?jWIa)l2uw?!G&{7B{xtz^MPBj<2Qo
zppd`|=l<Kz{t#JqTsy{|Q0QrT^0;@*qlH#C;`sD!IXq4%w{HpHRc1gFGU5s|?Eh-M
zrV7)zg2~bE%{RWxo)re8K=Eq((*Raze!_W@V|xCsWI|L;dsZR3&JUC2Vh}LQttD()
zf%3?mo3Y8j>yHcfKC`jz?KA>GTH>30D8hbEDc(oee}rV{`e>EQMtreqlC&jY03EPN
zz30Y-DJ^p(2}vU%eNOm#Wqr)sn^S&L>|)3x5v%tY!x}ekMpNlwHa@qsK6zpS1`Z@?
zf1JM?>ii_l0U}L?qngj{!|n9lHWm{irFp+|6nKn$w#HfnUR!pMtd++q)-A6X<`NlA
z%X9n0n&j%YFCA8E-WwuksSlO(i7WNwN55Yl*<Y9U>2N5k&q0)pD(DAwtcOaAbRrxL
z8zd`XWs#bRzi5{tPQ)?h6pFq8ASGsiX&zr$1}~2>F2-|)UZy^qk$igMChczq({;F)
z-->;2Pm3zFXPNg%K9L6}iaq8YnS>~s6(EUN=C#VlkdaOkYv7cHVpry2Gz}n;g%3$H
z2t%1{Xk)8$AqYuQA+;(p%6S~riFKgFky?|TwgT{p>WTe#Uiy(|19Hw@L#^_ce+Yt`
zykeIx{lZLup3mlqaxPqiI{F5Y)4;9x+#%wb51z;(xNzmwqe)i8PCH4UL90rAuhG^k
z3e3PCZD-Ufd>kn`49o_@9eK+<@b>On$QkyEGqTLPYzEPNSsIOXv1WfQaCB$tcWjLV
zig|>gG}vOSs+19^<-J0UX#;KMOHV*9;=P`kC2QqqcZd^Y(Iez=t22PTg^s;ooedJF
z34Q*x@)5;nbHb#(MrU4{vmZD*_h$wjU&S4=0QN6n7lNT?5`PdDWmA^so);Ty=!Ro=
z0uiX2${5zkY7K_|gZD+mteA{4dWL5G0>P_9BwE0nWYN2|(y`*jJ?O@Jm+lFe`x-+9
zb}!FcEZUx*ET2z|)P7|LpcyxzHz&H8sp(s8zyN4?^g7+_*f`-7?_mkn&~Ts9tBexP
zxjmbvFdjRKs2KP)6(i3g6*C=XI%WY0xWDgu0hsX&tyk#C2S(dXG-Ms))y7;2q|b6)
znNCkgfhpX9bz7U|#?j4#`;MlbZH?2AS1wGtS^9B8GPbp@+d*|FZ<Q{0aW0df?Sj{F
zTZFEo#Av#;k8iHfjr%$p-aDzu0hb`eO~HRgz}I-RYtl*Z<U_<GD&X@qFiIMb(pM@0
zsX!ATjOMA(_>CRN7yAml)_BVxc#IFO0Sr@XV{z0${Lci)C-9^rGHi80;qh{_wqc5*
zc-K?{2l3@=<31qdA@fBh`80j@cPF2}huH(X4f3OOQO9}Ia!<r|9VsU9C8N>@W0-93
zU4qz5p;dprCAi~=u|&0!0JT?3?lE=y6s`Ffq*yuEQ?E{WoZJs4U&Z7V-tx|P4k?%3
zv9zfB#Vgl#l)t}xOC-gF89Fo>q4&_F?Y(*9<|(+$1KeEv!fNOnqUU`%P1s2(87MYA
z1Hz_|;T+sJLQZrKpk~=NfTF~!3YxVXHN%p$;%`wyf4(_D<~jDx%)vNN!j2KHDPKN^
zN3M4)n`iNj*^BRE{!{h=#(>lHdvRO}^BVbZDtkW0eBJ;=k<Hzd_&85m`PlkTZv*?1
zA2Vme@#C6qN_-pl*9zxQKE0H*6g=j=TjK_U&R*Q(%lOM|xand{uR9=*XMzsZye7se
z5(U>6dCzGwa8r+ID!x~rK8HZ3hO~Tk%82pDfFB=xEE+F)@+_(d*rh>Kj}=>a&N)mg
zvCFKpiHa#vu!bhpL-g1SVH)Orfl-p6Z*w>&FYa`1Zx3J{>BQPgQ06E=8+`yVX1@SS
zm!Pep!z5(cI_MiTm$P+H@lopaBfDPPUmGnU6Ya=yC8)K~jq5+60CwcNiGf)9b89Z3
z=OdzQ9aC?<?!YRTp%}T+|74Hoq{yodqzN5tP-I9U009vZ6!KJ&P)~hv)m||t{Eyo+
zX%uWKuyVaQW}$WG?&3&gwpnYW%7=*uXq|{Brq~8x35i_od}>1b4LnUWYuwi2QJ=mM
z*wJ)y$)T60DDTTbsifi9E)ev#WT>XUuY|=^xrQ~m3x%(#2VKs-Vg{sHohT+ftj>WX
zqs4H*1kTAS4Uf}dmgg+LmvhRzcg&J&YI6FykAi(`WPF60&xvI^V~6YkV9FJRhp_?=
zCiH<XR8BRXvB=b>;ZKTKqGn-_L3TDb(;`@7NJn-#O7wI1bR<LbU(l&)djZ{_r_-|a
z5J`k3uidiVQ4*V`z*h<>?9dC56tO+T&7JbdjWWxKu=hN1qPG0H9j1~fai>`f+$13m
zg?A|Aao^b^uz7B_NIn&t_I0FFEOT21>@Klb#aM+B!&ao(tkFzfaY{f;j5FvuEWu|u
zkUv#$6sQ_v++cKB{}veuv-~xLaa8d!IJ5P;`?n7h8FdI_2C`!4xJ2bA3sYx-jyu!+
zap-%;egE?bRuEB}>)kn&@az7E;xj>taE2&0T<=5BgT|`O#6LYU{->nf6<saH6oR&v
zUHHu(;V60*_JzDno=}QC(6=eab1oc#slvB;*|ho|DbLjv;Oks(j};LP^!UYR{2uE3
z&p#skg*Kz&T6JEYGvZ(!{jy&}kyBlb2k;pU$S%3QU6TJviu(s<A?*{zTM!Y>GSq#0
z6t3@^Ow0MqZ!b^xq!rMe)T-u-74==&n0~bQpALj(^(6vD5X{=B{9$%P?Pje<<t%Wy
zWEm%J{oC(Y`F+zBqq2bnA>JIJjF=(<7%<HPctg_s5!cDV!z>GsR;^x#(2JyRVJy%q
zMmj2<g8qLbH1q2^xEP2VOgdXdum=o%JjPC`l44*@$8ErJusGep0^0MOO83RR7|ZT7
zWM#A+6UD;=z1do=gBL*ECvtzhzH4W|2O$QW)7B!*iVO^_M;P__Uu4fvGB|v=T0mbM
z@Qv0YJ?ZRi`o~^a4@a6K(gA0kg%v=-Pt;Yu(1%P70^rH!k3axJ1Re{apqk==w)Tg8
z#HPT!Pvt72`N;uAsFPLfDk~th&80DQeK!}8%}#1NBCfnS^gQ<0?es&NPbN{k;iFw`
zWi#+bA|6rt+CxMb=}u|q83WfC<2dlcmJDXvd1CvtQ~DxUW^J7!T)OQ~XnNB?6wW^C
z)qM2Yc1olQn-+!sh-c_Ad%lx_h-gHYK={03xzTOoTmeqlS;Qjt^@rs^1f(Ol{);?`
zF@?kOR<2e!&k8u9!^mIFE)AL|<uwE|&6(!at33(7`fe{z*x{JA%#wN;3Y0uzA8QzY
z?PQP470QfgH!JBKi^3p;AS?)Pd)XI^sgCM;2YPy@g+OpX9J<X%&Uu%|bWTo%o{T6#
zByZGZe2DF|@8Io^l!O*7x0J@(qw)xm3&(lX<p~ra+nl#TbWYI}nuXa!zRFM<eieMU
zR6kQa;aYx<<_yow$`d*6*5=--LWiq|GNK(PH?!(eXsr)8bUL12+u&p>5?+I+6>P?3
zak!R8IMf{AHL|6yR>V(UQ|IcdoRqZdHL}gWdFjW%E~fgj{gE@(W0678wII5Nt;T&a
zoCTSTat)XuL_zJQaz^7r`dKU6g|=Fhsux?I`t;MyIKyE4H$@&i%0>F`=BykoBQ$(?
z9qEYIYB~>vEu2sr{%fYm`<Z2&>Y0|}eECI@%TiHZtc>gyoznJ7y{qkB_SSf%>2LY~
zd$5Wn@gzx3nQ-<;DjK%cikZu^+zLLl738*ayU|=XsdG);+k2CMCJ;2;h57^rukP$*
z)tWklEqLq_G&6a=Ziyx>sR#d^(?Wdn#NPY!7$=J$!E0hkwCcmRIh1!y-PpIS_I!cs
z)4X4oM7?W5ich|MRYx;c=dnBFbXrfUq|RITt-$>G)DFd*Fu#mKs<6P0$RhLtUjJa7
z+V-WvH4cnnjMKV6xMz@eB6Lc^C*M_7kyL$~yO_FnwhdD*iJN*cYyG>PZ9+fBe5kU|
zUnf1Yy>rLv!Hla{!-{a#)o61w*(6EtalBlw_^tjd`yIleK%;^F#8!`76U=8L1joNE
zAn05&$MLlBIiLrfJ4N|F*TnDR?8hI37!^tx!Op_Qc#z|+_++S@pSHO^L!@?t|7wGJ
zLQZ_=nXCA35$=h_5uVAzuiF%+PWKySJ4CFwzB!e9)>LmmbJ6uc)ndi_$1xEtSC+5;
z*GD$*f2v$W*lC?RwX(N(nq9Iwv^r)EKQ1f|wZ0AGhI<}6q04edn}?zK+W*x@!g-yX
zs)W@8Hp!n<L;sHSe{Da%|Ho<pmGv;~l*#@7LK8OzYcM(qZpwezQXogCz_8_AZvM4o
z{XKJk^eDW1L)1JwtJ7WnF?D`#$zSjJrUA1ZsrxjH<zH^&69drd&Zn{c^y~YF8;Lvu
zc4nn?>@vTd|3AM*TLs%K<Y~13a_wOp&}IvE1+V{Z>(5`YI)euJ?>L5-&HU$ii0`o}
zLz}x5P}~3X;ot8ehzen2PIlRUxpw(jXmeTw7ydU0>OZ^FcK{HfH!2E#|JahhP6oRG
zBn`aRmw)P=|Dj<;R3I}*K#~0tR{U2l{67yM$Z$J=3LpZxfQZMp`)lCdm4(Wj*8jcD
zYFD5`vN46tKh>(xmB=Fxren-|p=$rxbPjyBw87!kzPnidf7~Io5{yR_QuaRMujBQd
zDzuxpT4q0`um*u{HbfsPP@^=o>^E@93vOdL6$Qnv_*gNAjBxupubtlUm;QnGBbViu
zdsJndW;>uhuLael@3mmSE!4sShpSynAl!)1h3F+)uRMPUC|7;Jq0A#f|6wZ*biduk
z7FoFcVkpx7pW_2n<RgiQ;E$LW-7O!o<n1fBHFA1)g&_+nEy`eRsOt&2fDM1FsBJ#;
zgK?5Ia9GWT^vpfj$))9-1GE$aeBJiZ2`H~<6?aHK9;|@ZS)eD)=6f;lD%V~n$!pW1
zug=?>h$*=wNS)~)gu-rLhVqHN7uI_28uKg5v>uR6%mTSjNpTBg2HI<L{VslIiGHj^
zTzJHD^3*yo=~AJfIF(NoCkBO@J^(6niT+J447dYx0^az*s(6cO@TAfLR!4P7AeqM}
zt}3M&UdQ3Ng1UTaneF?cF&ksvMo_0~GUza$hV)1c@(Xnc{?c^oylUbe_LcyLB8N!+
zzR9_@Hb1BV9)s;AN!vD#bMKYGoX^}6;=)908{<>L=J9ra{sA69L+=5eR5^6wf|3;Q
z(k+Iuj<}Q;qt1FWZUvRof6R<nNE|D?wCoCXXbkv2T=oP|ClM7ZVwI8B(H<=*N6f-z
zfz)DGuDE9!7(rSPwBFG6PYo7t!7~$`qedporXjJR+8JG&$1pI1^Tf=?jr<Lw(_Z@r
zZ|2BflObE5_3bY+7aznvk{wpjblaG{-j4Nb_iN^5z@!c3)$|8e>a6n3UGSqDtw~Xu
zIcM>i)vu~fDs}I%=bZvuv-_9VKNhun0|u5QwLWmByX+v7c&~#YQ*MA`d{Gz4;m)td
z7B9@UBq66AHJRX&5G^|0?l?d2u3ZnX0TZ!S@JslZ!8n<|m%YQ)&~vRXXp5XF&N_h_
z^I#e6PnVUGkL6i*1-xcfthS3>Y!{3}|G;fyhebMZ)9rB#xQix2_s0>PUyBO?8M~$;
zx4%Aqg2*((A9t|9uMTG#9Q+H6`)lTRkscubZCDFYR>QBjUDd2)JVx%sS*uTzU~IBJ
z4RUNDzaYvFAo&ci=h<m+3nr;;!z9NlU)-aK0BR6ZH2KbZH-&tW7BMrHod(ZYu&6nt
zloJ{OI`XyQZQl#nzK|JBUhQ0YTz<sqDcO^byri99@%uDC@kn&OeN(}@*2>=xWVFUG
zEZLVg?^K4{hGpZH>UO2m8}Ct$EUosasxkpd(sx)%S2l_8coT%xs)};6e!ZMoIfV0R
z`Hf<uKqI!836%u$%~y-LDYtoK`B6%zRU;Q57^%}WEvw?mG%r{W9SyA6)xGhX*#iPr
z6*Zb)i(v5qFdE*!xL1=hv`Nm~J<M#2WC@rqrnV|bD!LU~EtfM9VUH6DNkpaU);EZM
zqvPd*i#koZQJw6Hkq<g7=-F6rulE?}R?ZmIxH(yZ83*@h#PceH^_BhSmBjI0Ev{|&
zDTh}NDdBBY+z52k0%i2Wgs}&C03j)>lIq;4DdnJuo%abi9=9{)0G1s#y7tz<6}Q!;
zvqKr^rU>R<gjq6b)jwMHHZ(<)(pxbCOkumKUVBFwx=tP25P$Op@JN@99+Sk1VKy#{
z;W*dPXHiulOlmhF1*$Uajz9#ERn8-IedQA(7nYB>Al|BM0U)M?G`P?Mtc{p3G+L}&
z25r0^ULVk$laM(1Hdl0OQ)a;@fK0rdfIT6piq`B{pm|7dyi`a1+vK4=2Wxu}8IiX1
z_m-JVO7KzW3SN$^bgh^LO|IS15o>_P>}ZS%Z^<fz2tN;doDZDP@T$(q=I~L^c}A%_
ze^!OWw|XJm-yYK$sKTxQ56Cg(`Hp`Em{kprUh7^n^6nD6-ro*BaLRf?ejHLZ0aYQ)
zChT_^ToNQNUg)4xYrZf@a*Prk-Ru=AfJ35r?*`<|`*nA?B#?;)`FhNL^;}lwnXm(Q
zw1@7@QUvAkTDR(>E-Y2#f%9h#YGUR}t0uzGfxUJX%BxG>yThZ6gwfByy{a+{K9m#w
z;tFb$Hu`C3#PrZq4|PWL=)xEuPDo(b)Lj8BJZ!pO^TNG>@p*pP2<<%G$_w_^vM+lt
z&6gN34F9&m=)ktI7AYn6GtW9rb@U8+8CLyDnky_|z}y>WTZ=o}s<(bSk7Qn(a`{3X
znMs?RkywWf{<e_0%<3U3OjE5hgK~G@b^RVFbj=v1>d!%yKRR-?;n@idNaG#yWpz1B
zW9<)N2KczZ=68}UZ-4wo^+y5zY2UKl5m}N5kCu)kn^UdMq7L2UT16aRh1L|A7h7L@
zxEJfmHhIx~Dak}X-s}#OpUs{Z$dgW+J4gAjshDy9<?h0&!75lE7EF7liNwW|0{nI=
z&i#hnVXaBtVo_u`HZ`ugz`+dty*wFnA{GfI9F*~~LeD$Ftu0zPvGdZr6y<kx`0o+o
z!;V0_eRYfeSKM`@o*L8i+Q{PF$6Z01r&FggcdgyFlB*p%9e@j+F=_3!a%&Z=g&bA3
zfdZJxw&(EqJcFwDqej8u2;b6sf;6dS36#eN8m}F~fRL$8StfUwPhwPmI$e<`TnB&D
zA11~a&R7vEz3h@4$-My0T&{;gVu30$@G-FnMJYbW=bkn~q{4M+G+cL1emYb1a_rF%
z?A3|ac-~Pz-;-cautT%-n3`!e(n_tc!wVxY#lJ=vvMNxPxJr7=nTnPYJFv!%A^%fm
zzq5`B!5YuM)p-9oTD?EtKqK{9h)O#skSO;}g*oSg?sU|tLu@7u$}eIw=bbkkK817p
zpb9+s@wbhBuE+ptf$O-ZnM&}FG2|Oh4vWJMTJ)Z@27>)_bm1a)o{CBMPS959AaeoK
z_-yX?EAk%MJt~40GHEy{%EY)T(SanTcMjZJ=S%izQrVl*%QOxK$G4vF{k`k>_ScrX
zUUvqmF$0Y;crG*Z?tqCNYLBND0|SC~@rvD_$Li|l*!eix*koy|6tmG^&9`aRp2gr%
zU!VAKNZe(c!DJyx6@ZuMxMG_>OhCOC1S>6Mnqr#>+L#jyp-QKd{9Fv0UU)j%W1;=)
z?<f{68${yrkDPuj7|g!*Q137)jdx$H<i<HGcmXgp?b;>oP4MU}gPS^FlF)Q(*iLoX
zM+eJY!g6(M#_S%&pg@nNJYovE@oT)1ZoX+;3pe_7Uer8m!$Zm=#Rgt4htu89SH;uw
z>wiwOsFLT6L`Q1jrF+Zw@;JNbkehRakg?qrgdI~=k!e_NI$i0*gNbLkSu-cpo)_Jj
z_eqOa#p<=5pR!8gR(|mdFl1Fzq*zrxcjUL+Aej`{AnH)svwt!7IA#|1QN>{$8lHkx
ze^v7uo8%NQ%ocTF8RZM_HhFll)vb-HFIjHC2t18I#B2M3dBP(08t(B{NA)6B9z0T2
zI;!#4Xh!l@U*9N+{;J{9pAHWxpm$-}LFkLvj11g-KPtp6)$ypRJ(1`)nWX|A*ya(;
zclB}O{xxDRTt^oo!?vr}hpalvB-eU15=(dbmj*Emz&OsF?Jro$eb1+dqISl+Q0fyp
zy$Pv~ffO!rsK?AzSNrk}^k65tio_GK;8|cw<z#DQ23Hv+o6T?>eN7a__ue#5m`Y<?
zQ!C%Tu4hwec97d#B?XEOvru+(au%8bRQtpl=t{)r9HX2RXmpbzn_yeXavrg}W?n;$
z<r+QE%q=sz6I;@eAELgmSalTZ6hYmAfznJR4wt_mY4*v6>tm3ORJvo=o2>E@^G<E7
zS@<R#aD=xjuc6nEoN8nW9Rr54-C~9>aI(13u~xKayExvVOfiao7HF`dIlbU3HS5J9
z3B>7HNGraFsN$bz7sCr`;`Rvt8gEdhqSg3``v5Q<=QDbVJvZ)j6*)44zYaF<iTDoY
z0exMSCdWauD6u?}H1Ytc7}@f*o!rbaL|1VsJ@}(<e8KQ(PGXhZfE{r^M8NSXb}LNK
zm_s%3WHib#hmCJfTV?uIfs%$apXTaov(6jB-Lo&5OXT+RjI3<w{V~8NxXfLp<l1Z&
zSBeUqyPzU|H?s93`9jWK&PCUBFPu+w)!F_M5Bzfd>Vh_1_p3`B?rLsK6%Oxlx1NP3
zjy{SheRa;2$mdSjQNV|6Q{!0Ugb6BY{klSnA-xt3&z8CCtYR`ebTLg!DW1Vl464iw
zwK}IFj`$3;^M6>8qm1bHmHg_Wm!xkgFikORnA0>l?WWAZEpk@EIMEF$8cj1Wvs9H7
zT(}l2NMpE=kDfeLaFgZy{fhW&b#_a*?z_`%G1IH$-Yt7JrniNtFL$xg<hDyM;~}3S
zUzpBq6+x%yiKeAd2}^j|m1ja=|7KL$qMOZKgNF-!TH-Rdgz6=7e#zA?xkV9$n{$!U
z<Qd0z?o;{=ygWHvY(4xUnUY+pH!8ub?eOTQ(`Aqb&I~M$R#mwQzdSO^^V)#Z$*@K6
zUKo0$>G>HAZ2Hv!ez5{kS?1;($_M+{Pvb?IAuBTK;$OtV9sucPaome|Ic!@AC*{#t
zhpnosyl*~8?_c@V3lswo+uS=e{bLa(AtNOZIHvYBsZH`^gsSi;7@sc6X;_dSxj+m=
z7rmUv+0LR*%N~RWQ+;_WT@Y^qiRSIhX4e|YgQB?By=J3)VVs95gFzqCFnVph{pi^y
zSgjv?xurTJ>z@gMwQHBH>TQx^?kq8)uBc99rEZ~?csZ9Ctso6kB@I&g>FxEIRa}o;
z2$ef#Ne1EKJCl)DRbqcZF|<><llUNkFzkF%B)>sy*rv_;TKT!<B^D<<JV(Hx2Zfjc
z8FGv>W^y!MM_Tx1)5}US&9sO(%0h)9tYQ(W*VDa)6K%{Qup+xo@ccbeuBSNnXF#ol
z`LF3h!B7Fj(1;*|+YQ&AOF2=I5VKP!(9~r^2y+us-XX>#FVj+39*UdTZNoDLoE!As
z?QM)x?#MjJa(-~8EAPB!XQI@o9Zwq}Y#q2i5kU^8dy36kQHuczxW|<p#rSLM;S(S!
z!k8XBE*#@9JR>wbsnOsY`jz!ltjfiYWFoM?soIATh&)zP{Q#)9yXG$u6JKyGciM;&
zR5$gY*m+S~)luslo&?A|2J$0brafPSc-3>X`8F=b2yPAYF049=s$_#=RY9h@vub{o
z<%IJxd1qkK#aJ9{``TbPpL6qUb7qiTF(?Y$JJR*8a4~h=Z%8f@ZHnd;dLM{NmFkcW
ztvPAFH9X<3?}eWtCg7#v+Cux@)ov~3Or8{?e!e}>14_a^SetAMTC5EBYxa&2)<rvu
zgzj`+zs5Axdx|mqT7#xw_@r`q^>rvrmz5}#&)UuA1=*(PbzPjB_h>mFc6ByyaaIYJ
zUoF|s*ShR(t?DHsA;nEKx)wV@_qfK>@VBu^d!Jm%^wBTI6A9TisZg+At=++$=lP*R
z*8Md8!Zw|bxuc)n)Fm;7T*`MDD7>9KGXT!Om&)-ysPc~si;dg#cz&0~5wT3GP(ht}
z<o=w~ECQ}>1pLgxYX@=dU{RBrrsM2hshHM8swcOTfiSXI_N}r1=_z~@G9L6Ti%w78
z0xj*22f1}lNKMhf&Qi7K57XDw`6u>2GEg1Aa_6Slownnuh9y(7`<11*XA!C@jWoy4
z)jN1(jP0VW1P#32nk`ixr~j3j(S9pH*bPm*4*Utog&y~XO3S&0ml!gZv^buav@Q1C
zy(&8#uGzL@wcNPirp4*TI-?9T+y{A5PJ-v;NcW-Wn8>p?=`PkjI=IO1k7+IY)^o14
zyC&-3L1~X~UwXk$&?YQsO#>?%W)+D>K+q;vaHV{oJOr5+7Dj+tV}tf@_TS0UD^GI@
zG_?&8vL)qt1W6gkfg6Irv=52V*HKm`J{Dz0H?1`bjU+wgtYdq9q+Qw>@ivXmB(<qR
z_72OI#%y`e^CLPsM$_>^!6DmTO-tiMMfRpU6wESFHtct(9c0Yf7_vgv%l>Rvj50H}
zQmx#lsT3+=<GvO9ofG(Xr}*&Ktr|%E9)qOQeS>f|!6qe*w*Z(O*%19>AS=j8+TET>
z$36@Y4{=<`Xsoo$zbR9#LnHYGamBzpo`HAtao&xhOSo}(254Pv&svn@YpnnzHnZoY
zBKz7pq+S}`ZzH$mqYR)f{v)@e&<q6J(vsU^8928;-bRogAbLbB9zTPX-~`=4>wz}%
zsGwh;1J(ym_X>Nc&IYazKaa$wi3S#ff{Q>V2$WB!H##Bj@;B4}JShBJ(+|$2z{Q>q
z74{zk!k34>Yr^j0XsVVV$<3eH6nyF{M0|qSzDM=!zfOV5tcSTD!R#zp5SRn;Og0c;
z3>)ofNgq*c1HDthYdOZUKR<8s@lQt}Vs-Jm;YwWLzMm&Ww~#|>moDx+j{#5RvTAl`
zpgfDfx(#}Ug6f`e1eyKe&~YQs%=Ci9Fsb#n#n$@gcXGfD<+qml9d*GF0p>o#y-mMN
zVGyzPZ=N2}f-%?geg>NYWN4ydq>{OTKrzW)2pDG>fB-X2xx47^l~z#v5&F*ltT_Pt
zp^pfyV!t&@{_HtKXg!F8lZVP}3qF9QE4uyp`vth0v+!Gu1PPbmQW=R;w2u+62=~EG
zo=oI9=-7i}XgEN0;aEtFR>TwifAlwWf_yRD2DT`E*Iy4A`BjAd`#;|sfT+BAC4c(2
z8~p1&{Pka`DG{pXH~9HzrGNTv5j=11>7(bDe>?wwo{Y5_E;&peI|;(Y|MhFehoRO%
zb>``RHsa^-1A;{u!0T+4j?E{2cHQq|_OI5x0RvWc`m1#R2Gg?vypFWTfwts-Z4T{|
zD}Vr;4Px{A?^FD2B9b0v$z(UX?LT$Q-|x{9=iiOv$e)I=|Jge*fWyv0o14=WKm6~_
zL9o&jYaQppDE{Tz*=|Cc!xW>ce@lRV>%Z?;f*?Lc%=k9j-<C7~c*g$^2hc9f0#`}j
V(_jGs;sYd+S5z+NU%KV@e*i*v-T43j

literal 0
HcmV?d00001

diff --git a/docs/source/assets/kernel/v_vec.png b/docs/source/assets/kernel/v_vec.png
new file mode 100644
index 0000000000000000000000000000000000000000..bac3c10949f6c55b60bd16e023485a9281dd7e9b
GIT binary patch
literal 51256
zcmeFZWl&t*)&>ZK1P=~@gy4|iG!R^Z6N0<DySpV20)$|}-5r7jm*DR1gamhYW;gG>
z?|Z**=Ev03)YSaARCRTy`<%VcS$plZ*7L0ABuG|T3=NqO83qOhO+s8)9tH*;0s{m0
z6!8i8=G{;Fei#^3elsB<SqUK_5?Om2V>3%57#Q)OXcYuig&w>VjmYruVZm4|ckC=|
zfp>`Xz9(eUh!VcC`l#@@9%_oVyZW%t>u55jVaw7!CySKn>CnA;wXOfNq(K5!>pXgY
z;CvwY{?1{I-()bk_WUW#7Im$@-OB=4fu=lJEMmO&XUPejBu<ZT!eMc1U{n}N1^R<S
zL*c%o{_tVkw}um*)OPr0c3W}Zj3?EU=8FJ>g<|)l**6ybkQC<NU5ctaB8*(8#(M^w
zKBgi84Y)`CuIZ>IuWhqYO<o=F)k^8}JpY9UqnM{zLV*HPMH_B{b6B4CNu9WTkgQ1-
z-k0l<{{78Okmu(y(jNhD^gkP2^?xCDEWgW%XU^*RV~nQPq4`^cQdN~?a8>%2f9$YX
z(mTa*IFPMq+)~1|!E%tvrrMrbw3i9@m$sa!BWk-kh7ZQkpEHciZ<c1Rk*zP;e&K%C
z8}wypAM~`grDzw$2n;^T$o%Fhn#1(%5wj-g296#nHKxATXMAL2L9B0Y5-G7)qnn27
z-XkSu{~mInv~Py!pl8)e&}H9kTf&Ovhvnm%Q0EA|_nR&kk9&buM%XU>xE3kmU1CJX
z%+AK!=RvI6c+HgUhj4PL;ey1pz2E#hX?G=$QJ$=aSPkOYXGC8J=sD}xknyXcgwci(
z4?eCZ!zhb<;spOVGxN!MD?WVStF^CAD8dp*?~>7fwWhgts3wsOhA7Rhu6<}=BCvT{
z=J8hUgFRh}CL(^8An_0mekT>pul5j!8qBg_*8?LG<nM2pbYWf>5$KY2!?yldywY~f
z-9UP*Ndl`T0P`ZcfB7?xTNt7?wgB@5HW}=zXN33^`?$Vj-9AM6xGG}dulMOA!@3{K
zkPae5kf1M0{>c;;T8#XO-Hmv^l8i!&-kFF{SaiR1{Bg(S)8qJ8l;lJw-Lk}$6rV6Y
z2CcNqW)N9_zFuu|#c6obx)sb~xBt`4vznTRvJ(3xrZol^QZ+T=`aADndz=mbtMpz?
z!<#sic|vpd<+nA&ih;c~4AzJ*>|8bi0}up;UWOaduErPz97dfj86UKb$<9V3_q<XI
zdm;Yg)-5ZcF`+9wm~M)HO$^hbQ?_YFh?oD`<UZVs&yw8dM?0QALYc$#>m4{})ItQ*
zA|#6z^|Dn6Pi0+7x5`vnwW}LXt*qDuy3a=$<(MA0b;9+R>x{uof<JL)xCy^g2j{-?
zSDOhy#k(OxeD(r!$c&?HVrGH}X69Vv@X8&V`rewPu6U`|%?D<Aij|4U_JkXM1ir<a
zvF0|+bKNrjU@RTE<&vByKTG`QN;wK_6Xp1dGz$z>3*2<;WeuFnG78*V#LqCyB$!zr
z4w<ejYX&KGA#qRFe6eJo#Q06<@=nA5_>ioN&H=Oali3zI>2YieuI;1wfWlmq1{kkp
zqKhvih)C(Aj_okaB1ISo?YMemcjAax0qx}Z;#`pdpD|uTh~fjkk<x~rcH?Cc&V;>q
zD@}?Y?)>}B5ys|QZSquE)a;DeJZAIIuLAL}P(p>VN9;`TxRB^Y*E1!Dt4wk0o)-(r
zq{j|FH>JLU+wFqM3EtE<jwA1EpH@YU`DCPzWl5zR9BrVaN<WF1ddyaYz1_k0OE~E(
zi>+AAhqgnCq(GTglV2f+rx(%<Pi=mduAm?C-8}abWg_Fi@I-vkYAT4Q2TAvrm7bR7
zc+c_ZTpoXgC_;Rkau%a;S^GWZ8b&TAJ~lhXIhG98Tu@lhEd^GO?{9%4@{aJDEJ1lB
zGa@tg{2<5ZagnREi~{|FUj<kNkvU}Y@rCjw<&uB=dzGS1Xi|F!4Qb2KIY0GN%TO{>
z+K{tHGDg*Rr@l_Wc^*tm84)QIFQg`!pPiIGpV=U%q{OCjsvN3-TQI1sRahr;+OQ=w
z5vE9Az#A<^BpEFqE9)XFBX6UyG;GRhp-w^{sw}P{SeTQX!<*(Mn_0-2Z#RBnUNKHH
zzBOJ_(5OgVsF7bW-c^WS7+XlJ*j%(&_;&L9q^^Z6o3Xi~>5Gw#;f<**i#pRz(=`i*
z$@y%zw-p*8Sw_XU868szrBzDXm6HZsqSa{?n><%MS8uKe++*L!&50&vCTD8pG>X+r
z)ys%Uc4<+onFLD*4B912YrUURkIhWXtyetv-IH37aqo1e<Mmr4YgBJ6bl*P>+(g(y
zIgva)-&z{JH*8@-WbS5DwU)K9UBY0mWdFjFWLYzE@I`N*X;-hQ&9&`aAkj1DE=0C+
z_F(n|<I0J=iM*xhc8-2k%Z9;=O1sja5{Wd4uRSzT*i^hZjtV{s>6<iTRAcL?L{CB@
zx>Wm2^-kgm;sX*T27bg<#e2P}W|o1ZS~N}FG(GP4U>0Fk)|)YtF*G`E|Lu=sA7LN2
zvLS7b(vJzkLI>sjVyhyBB7D^;4VSr%Ia|wMD|5@enNiC}mee!KGuqYUv&_5pyKkno
z%EWegcPe&icNn?To~Aq%3`Bdn+lJKE7#Q+N1#RCpi>v%a$P0@X#9W(PBAf$G1upKK
zt@gsRpZ|~#hF+}gtR$^B3|y9k#7O%0X~v4iPL-UM@Ju03ZCkTe%T}vdqgj{E6U{%~
zd%j@fHq0yPMt5L(cCoej=k;OH>CR#K$-DD{lPz6y7K8YZ`1MAl#kY$U4PkCRCx<7m
zPdJy4V@oq_O7<C+_9EVpKPImZ_1rPp>pGqLHMKc(AalO7vqYea*MgRZ?22QG*^NH`
z^5Tn8SFhf#aU-hz(@1!tr|v^%dPBOYsGT3$K1hC?6RZ@B6r2p?L1aT8KrTkwML9-H
z#B+uuVZ0=0;%WG3(7$3=!0Cmljmbi$PL>~m6tPWCEHWb|kwGYyo=GNJ{KAQGmPQ>%
zi};Z~2Q#g~Ue|29bTBWW*q4yd!p|Or`&j(Ecbag#dV;Mb{w_gpgCw!{sb;7oq7tGe
zqPnP5sGDVwq}3uuIo;bg2r?P733HGYo{x%~^_b0#U3K|?QRB_ADC(MyEMxDQ`H~3P
zYjtfqbXyIRkxgw$wq;5VPl;@zy+vXFq|vcEv}~wod~!J5z}A3&BXCFK9o8cAUGRWz
zjL|{F*d<11vbQK@$9LywN1IfmM*D(>n#V+8@OnOOo~ZVb+iN$EW2K|9<FL<_!nGMO
zK~&he?=A7UKTEbJM|n~er;JkveY0c~K<>n4#W8OqzEsXHKEdB?BfHNC&Oncfq9#tt
z;1prab}G0X7g0#X4aB>DgO=KvILWhMd6l$@Zt~sOGCrukKW>JnPk-`8O{=2Kct1%f
zK8kz&ymrt*rQeI2(MWsMbM>-fo&Sz<CpH1A3QIzw3vTIgBHk~Q0F*hrT6*>B4UN~$
zC;IUYYHl;J9St4$K2DcTyQPmLA+eS8r<!$k1a|SJGXsBM`&>$fHRT$99rnu6ZYwh@
zN0+$QySHwk{Mq}HI4L_RKIK>vsy_F9(0-xZNoUfev{?>Qv8;MD&tC4&HQEiD3i}i5
zq6Nc-e9N)if!2ZQAO2SPh2<qQOYi2@D^E9n{I**6Hn>x6T(I?+IV<HB=H_zg+_%g>
zD=1kss<bdeQztLpoZ)pokLLa9L1<27QN1&<pz#BJCu+~5iQnsf;86rZ0y+eJ@?|-f
zqFbDIP36=4MD;0^RIR0%@Y!K+ieKUmnKr8{nbKk|FIc#h?KIafMzOzPAIqS!;<BHn
z;-v2IMmufU9s94<=Zd8^En-cLI#V6>Usj}>$fO!1TX5~W+)Y1DMG<3wFiz?mH7HJP
zjksU9ge{?0yZxBZI;)$v?YMqtaNT`|t@5ZOr4G+?cgJb)RFXZzrgG_hom-)SrPw<C
zUGd(WTeZ%@>$>C1y&VsKkL+`{y;y!GciGdx1!50wUhXh{RPX7lS1Th6T@77~DL6cr
zqipxDcg-$Y#!My_N*21@1wBf5h>sh#11>t&7bE%E?u$Ilu8)Rw{Cuj;Lie`aDc#r(
z+z+3BWanP2Yqr`8+x0nMxL;U5&n8CY+qvn!aotns%{57-_969ByPnvySgr21Ie&75
z@T6T6N#%~#_h+!NG2DSW0!*bM%*KJIC_yI2eGf_oM@28@g^a45=r?>B81t2T4^76K
z`b`+!r8llOAFj3xU_WQWhHs$BvR9{bxhhAc(i#OI4#6fGG#mdm@%2%_F&QNObyxcs
zqU3VmaLssE?fGe4@E06F6v0NS62?+eFf`yZA`Cn%A<Sd&2^PHgV4wf}Srqma%%g|z
z!8w6uF!296MjE_B|H8owdd^?(k0L+8Ab@{iftPC<+`o^8hon9F_h-1L;5V4J3PKVR
z;9bGM-pI(>!PLf4ge}e;e1T*uuI2y(gGUK{!Ai(e9Dwns%oJ4}Ri)l@8Q54c=o#AR
z8!@<A*+R#G;c?{xAFYfW^+;T;EUg{5TzSbJj^F~Hp<gqSkvtsYXu(USDkV!IWMgkc
z@`izlfr*R{nS_Ld$KKGGOI}#?U#EkA@sgQ3I@)qEGP=09Fu1TV*w~veGIMfrGBQCJ
zArN|S1igcswWFRZy|n}RUxWO69AP5|1A8-DM>89166m;k`Zi9Eykun1iT?ik>pG2G
z&HkCm+TmZf1#XZL`U@j70~6!lV}nz9px<)Inz<TTstKD}0h@t2_}CzCcpi@bKmGF0
zjQ??_>OW^PGqJP&=c)hk)BiqI$-&58$i@oH>B#rbdj0F<|NQY^C-N{tZ~Z?^@t2(+
zz6FNnL*`-pd(rrirFh7z!8*P$6P8f~?@-SKf3Q$5diB>k^s{MG;6BO>42%Gbgz#HM
zSJ<6ogqr#G#_BFsh4&k}@0oLi1Owq2FdiY{qoxPgOTfjx<U+!DUyGBtxMH`_cn<SK
z5DDJ0Wm#YYo~h&U7o>qu>)@u#&H7C4(RcNuob?{c_4_<SWmzNRF26^1WbWfPaaB!h
zF??a*Nnl_R1z_O*{+I?2?@ns?EiL*L+5hP`7&uZ`f7F-o|Ko(e&V45YLz>AdCGwW^
ze~kxz7U*6k_`jNo6&?ffZcGse=bzdCz7T)Zl>d2M|DEPxrPBUuwEueA|M&Ywzyj;y
zsE={rWOK|Q;Cdv?+i)$(VK9yue}EDL-tTfFGwjRP!2w7go$=4d_>QqkVNq04kn?$m
zFamz>hRfekGW}#Bbox(FpXvTsbhoHDXhl&8?UVXDjtq;#ArgdUNzOvU&}>}GbDIXe
zr#uG&$6#RP@uP0?FxK2uZ}$NHmjk4>CnI8sg(Y<~IFLwA@9W)a(GQ|A*IR|9{mVZ;
z4m`%!499JM;eD~9cdaZ*%>rTnJHQFBBas+7Vv~$OL)5qEmN;gE{=+U}o6gYZ_;<Do
zdVxg9N+M*PBKTV02oXrhaU~UaCQ4{aYV?h@C;U-^o+VnA2NCePI~ses7<3W2u(B^W
zDk#ad2mC?&J5p^)fGC*4DuDtbnm`cZ3(td}BP!>$>nU^ZQ+R5J!$$dUR?v;7zw70*
ziFS*3jN%McFL}?=RnObUyD4Eq&X#<99C|2Y4LhY4Ie_&B(gR;6oU9T49Uoc0!3c^9
z9Y%71(Cjx!`MnjHHjaJClZ9qEgt5s~S;MMoqi!<Dn~B(YT5oO9W4{<NI9$#zEB_^R
zifcyg*{sFDbeRF|(Et{NRmn6vTDbtR`R-~vQAt^?qv5ER)_6Qm4)^@@1zkly1ozJM
z@3i12iYd@-ila6f0P@A~FZfE=OuUbddqLDGOwTR9`_=b*jiVcQ^)!PaGC2?lFoR$6
zx~&A@_a|7C5YP(6gqQ6%T_^N0)YIXe)oqus)bG_<{B{{&n~LK-n<lTD?AiCZ_i)(E
zj_tgu`6o|=Quv0Ch}UJI0qdp(AdO3Q%i>0jy~lv4XRa){trJ!3l=pQaM^PD?21a(^
zY0%+GD~Pf>QEc8_H{Z```rMu}?+@`^_XCfmUh8`NcP=x<f%X3)8bPTAbw2v{0vh&b
zyL0Tu5r-G6VJ7N^CtDMR=2qqYUxKiy6Q;C1=rv^(`O|!ER*3`0aE`2Dw@kKd$hU58
z?Qaj-(ASEq7a?GhqRW#pnYIS+tFcr<tH+65uW!#6Js9aqN8`<M;#;jmDdCHnuC_Xd
zXl2|P`lob!thBvOmB{gkeQwI)Ax6P!x_*yai{Hnpo4>9&8I`P03kX~CI@eM)$-dvK
z-%p^7)f}a3zByVUB$&N>QyOk}`6o6o2G<*w?_&Am0m0#|Bg32CHW#cn&ND{%OCb*b
z>?#g8=jBfyp*h~~DT%Jn_>;3PV_p0+)E$lN;iXm-#uq!lHGekW-|j2Q2fnZur)yMF
z(PEo6FD%yg_D%sV<*Hk4B_-8&?s<2){-~Z?)HuE?jF{N-c!1qcD-9w`aLeOZQa5WQ
z{)pE8qNj5(S0?qPDoSK<jko%{>o|k2NIQy2s`EuC*9QEk&4XOqic%_W(`>Ik1io~C
zMVqhaL#qxyXIUXNQK>{tE3M>T_Ca1GLF|Q{Fx@=3Z*9!Z^JlE4vUCkvv>nX@od2xg
zE7-B&afufXyF&ew??N`k+3MJRddm4tR+RL+Vj21F?!?6oe4V6(l;e5Jih+Wd%O7*r
zC=?0fz58IHTqH!rf}T5H4=ciEsk=Kbe0~*LiEUvQ@w!1#@*IRJX-S5)T!@a>>4et1
zr9a73h7=;XW76F8F*ZkJuRSfR7o2-YG==_MsrY@u)HM$xnpuIy8Amu+`q8`&!ur)G
z=r1h?Iaa>}Q$#9A(!ePm*8B>l?br6$jVAns-+U2J7V|v+JlNITLrp3=&Z49?!&G^m
zNsQRb(NXJnJMybe_24W~H?YtsyLZ2+?eA~TpIhuMdvpw<^WPkdJJIU<nQLYhM?}p3
zTsxbyVdviYE@b?!C;!!UNqvdi)n+bBo5Q2OqsDj?h#I_Rk$6x?z@thdqTBffn@K#Y
zW#^pWUxtT@8zM$itS<OzQt|tkMC&PSB~#Y^Z+`1}4wgQ;ii^dSOY$%teec_CokYW=
zVBeI$<R|*ToeNc8AQ8-_8=cLVpbPPxO&b_TNxeDL?~o~u4!pcM9#W`gR1(k5EtVRa
zyt=r%-VdE@8culDr0}A+DxKq<$H~S>8HA>&%96=t(Jhylo+~zuT#tGnD1Gv1*q!1|
zn$=8i%>T>p)>3lOFHDaRp1u!xd4)#IH*Z<L`(wnbY9B=J3=8Rhf;%|~?yQjvX3&K(
zeG8{<Yk%~+N3_+S5o7u_igDxVq$K^JAPT+VTB^_8xW`dH7G&oew!^$#mk9qw)j_s1
zE5RE}RXX}0xJOS=KVR0ObhrVdhiajaYqpGCQiow75Axr8W@@ojBIXpyNOS(dVKM0Z
zG8`;S_BDx)#n5kgO6yL;x;C40Bkg1VkoOr2%aZ4DPv11#TS3W3+`Dblr(A;k=V3o6
zC(g3Wq(eTvJBfE&iw7~|C@R(e*QaLPyxywgLGDBl8)@GOlJy4?dq2~vkIGlb{S=Z#
z)q%-<+Bv<+$?S}UW4Hn_AAg0<>nz;;d1V>yU#skgg(cAaN-tB+5W4+ONy2ZC@7u9u
z4ENOKX`5L|g<3yjqu*SV#|(cZY<kXVpLKaT<g@3Ylq8uH=QOECd-Erq_{3RGW~@}F
zdD^QZ;<%W%q~YjY1&Cu^tr9Kah-53I175%%jdPQoAwIwt463VydS@+4iH;@fWjQH2
z-^Vfi^gQgq7i|p}MnQTB^1@rX;G+bCublecG>JciPLoT9IM<WL7ZeR~%RxRMoM79;
zfJ6${eEJs#NwWlHP4A@sH+*nQ0?CN1^4;H2LX=gtE;^*wZd)Fq#|^MA&7;fPrBE88
z5<n;_uZErT-R6O3+3YY-m!)Anc0b<(SEJp{C8YeaDT=Kf6bI`bBI{U|42>tF!}`C#
z`Co1<lEpW~k$KA@BY3%k#USggffyK))63y=|0)DKKvLcAHztJj)HpNr%~4cHG6=*A
zE3D7fd&aTEK9K)A1`Uva+=SZu8y1?I9wSk7_`9ncnqGaz8|$~1q$?BNL2y<HK)tQ0
z*61}hirQ+PH7|5>OAa^ufW=H<OtBEkzG%7=lf^bwDBd~dln@@F`Guu#mNI_PZ9N%3
zrtMq?8xpw*{AXaL5}!ZChk;c5L%2gi&PhMZP>tl*f1p}Auoz)*Y&*1|sbuhT#(*rk
zZx@euh*jjx@wU9z?h7qq^}Iiw(plmGHCR)u#NGyGef%ofYeUn5pys*Y$cuIUd$UP(
zi{gZQ36fd;5PJQUQGZmmHlCAVA-S4#p}>UIFn+!|=@beVnHj6<1)3tINV~sR4EPzD
zZ@3&|WY#079Sq^Q<ho_C=|S-jUmt;E)yg)d$&&PI<yTKZ_N+RwdblD(?QmX5seyd=
za7J2>8RP7hqDIQ3z53XyO7oQxohmRyl6A%H*<2#`UJW_kQ1ivm{f*2qNVH<w>WZfz
zp#+t1`uz=A|5t(FSP*_#zjVtmFV}8ENPjkio13i68qm&Zg}c2kNaj<lkWPGA{>8RF
zUA$;`@AWLQN`~Klq~NE$+ml>=wCWP4mCEki)Y4;_mogkH@D9I%s5=|R{85W<%er2A
zw6_|m)#vpmq^#D@*)%Wgd~bW&gunjJLg+yk;!}+O!$Q3A%L-3t+-bh~!&r!SwOhUP
zG}o98cwm`rkCtRYm8j@s*skgd7Csj29hkbqbNf(;$BG@#s<gm~=SE9GvvXEW_&gF$
zFd^Ko1r|ZEs3ZMHxRXST2PlyGL7A&<$1T+z$LCcSZ<6AsuUGLScENehYT#g{E%SRh
zB%akQa&s(K=64s;znvAs^c5_(&NGCEDwRmQB^cAs>!+}f^TNkv5M}a64neYgVv_3R
ztV@rQ@!c6MfGMeUj$Zhq)sGpfx?I+Cu*qAj+0`Q^L8XcPg^2sjcYbe!mEFnwpTB#e
zXj}VM+tB$@7hP9Fm!nm*gF58+&KF#CTxHG|QWr=mzALcHk45J0U7tFygQ8he*C#x;
zvto?<!+NR@UkxbX;Oet2b@8iF&*mM5bmwdA<XmZ=C}vqaPHx%zffX<kq;*VI0}9ti
z^<!cpuF)cwo*=uGaZL7RlI{EoVSK3yo0Bo{ZD~S9hiH0Ni}u&6TTrwWXT0xye`-fJ
zKVp4N7%jB<(rH`{S8u)o<MD6!zq2B191I?TbLB&QG#ms!nxks^eBg9agHbc;Co*Q|
zoh<uVpbbq5h`<T-)$^lY2-#>(n|Rn~46z+RW(eF=OD9btle6+d^ofwT{>WFUJgoSs
zq{f^(LG^9xPW^sUevdiq(B18IARcn6=TT6ZU?>zkm`{71&2+t@UmtZAj_COC12XX;
zxj<pDShd(xSDJg5HMa6{z{cCay7o`Z#%!|cTk@o>k!qVITk1G`ZP)ZQ>(?|<MaFmQ
zK6hP|*gu>`M@8BHRFA%+4N%bFyV}Tn)xcdQ7*&=LRR8-QP(mmTK*`GI9%#h^aNMwC
zaI*7kzGQ%g(}dCha3ZtHIT8AEQ2r*OvCj{`@H!b$sy1}mtjR}Dn6+&~W9Qg!6}xST
zMT}V65reA$&eoA5o)u8|C)YjSdr-@DB_OA8+p323X0?mh#}ZG|_UEug@3+^+9k?n*
zujMZeEs`~8<4hNx22|laTN@TaFUF;>E2Vd){Ji{6@@9c1@2EF!Z=uPX8Ls@@0X^~&
zj*RkitY^l+G0ER%#zaweBGVS>9^G_LP5Bgiq|?hIKuavH?IQIalSLKIt}g^pZaNln
zrqyUmTcj+uouOb2=BCZq%IZD#8>?#pHZkzlY77_YhC`vl5K-|me4Mc$SGR2#o&H$t
zwpeTTB!bC{{!XoLx4e&G4GL2l-A@Z}zg_>c%bVZ{d7J`%LpM1Tk(dqygA$p;f%oxs
zXV9eZ_sQpeljav!+ohR=$?jYEehVL1kyis_n{ml9zE5V5OxrZyLXKxaJ^BIRr7H<C
zzxU0B?wAz2l+@f1^{!`O@egFtFV8Tum3D<KM5kXf)gG5AKl9t#Ee%`AwV{6pM{8&}
zITk%7&5pG5l`Q{~2A!L&Q|1lVR2@ZKtfonO_V!n0L*&Z6!1}YW-s`nQo3EoMGCO2h
zKP9M>zr{Bl5rOiiWA=qNVbVXPt0-0rS=-m02Of|s*;mUKqKVMTuw!<Td&<a~ovzdD
z-HD8~f|p^&ZBvQOF10W$O`_f==%f6dIU%KdT0bP<h2uCEPQ@UXMTxl&fCy{(xhatS
zhGXftl3(<-zav*t>C_w-iKw@cXwUrd=*rzkSP7$LG+KKJDQiW{M;a8pX`jR+VN*R*
z`?TQ}-qGLr+~$S)Dvngx!9PaV2BCFJB+~J3XZbt+W0-OPDEEn+>e1hb2f8wU73Nza
z!0ELqBoO}I@Bg6LPuKv>a_B?;!J7Y_?!U(T|C6hjQo^NroyEItqzCpR5PM?aSp-qW
zFwI%lTcwjV-QQiWedXR`A{YD0+URlq2Fgp#*!R&(HGAJ8XOC_9DGh<VKK(g<9z-#V
zNi~yX!HzpnCJ*#6He0^*xv@J2z`%bVnNmRaVYlQx_j5nlE@6tImU+Xz=$Z*|iAjH!
zL{vXW{8n!OKtD*{X#IAX=Q5Qim)Y;K<b|(n0BE<N)0F$Ur8{G)_i?fnTe}?Ad1_{G
z&`p*{4z?v%D@A?^x$SzgO9BYLQ%R1a62<@>4AFH1*2q+vW5s`?N%%9AdKhskjsg&{
z%d&Dr)c99>P+6M85N|cem6d94f1(ZUid{*cDCA1Bfodlw-Y7;55>oT?$#egY9Qe!M
z6r`EEUO0TI@8@~)R(G%I+S&n76N$iLOjv?AE5Hg&Ac*&V=|g+C(O1%GBe(A5!ch1!
zQ0jeJnC0Ti>%61NqTAv-?!mjuY1E73FfJz+kZ}XJvt+d-v$33bP?EDd>*zNuhEyf-
zeMKeaYfxr7283js&do1;wl>Va!VLs*nwW?K@mosEs3arNv=|SRzAK#k({#OuQnZ2b
zNy(MgP!Vl^=?0+nN3mhQ7yzGBRynEZFd(3(J~LBlC$|6=e5OF8DnkTE`Ls$<bmCxd
z?RbcvI3%}bZwQ%y;hET9i}lFQ_+;Cg86=alJQ)|SYmje<o6tdVu+kC?<9!gr)x7&-
zE~8w<Lr24+Rlv#qbZi8fpmwVqnzw&lnq90bcXvm>^m_=WB+0%K=biX)f5?{Gk!~+a
z{;&$nIF3oR0ltrPHAcfa=m|BHl86g^Vu|p=R^YcyZqj&fli0q7!c1O~bFkDvd4C);
zIdZB>_jjEJI160tr{>i&#);Fq7^In0_7BeRN;r++wvIOZ{^fYs5tOyP099M-9M+S9
zphpa$DLmSaBO)W*1Ja2_YB<~htkAKZaXFekGa2GNM?K*?xd)u+{VQIJT6q?qhXH*B
z-oWO>OtUkWG{86AU0EDYYS_d>X{(}kP{&$7<vkfz4Y)erw*g+961Rk>VX61hb=l9B
zp)@DPwD;raB}k2}w#cQfO>+QXwVkr<;8U}B-?*{>NAj<(`qU0k5T0~8lmzSNJ)d6{
zvxs+yA2;9I8|2=95wrOLg@Nh0b&Z}P--OAGQEYMC)k@BXntaNywY$o#HIQa4`yR?n
zN5_mRW*nQGcw>l3lAWk&FRf2b%xAc#&;ra0k+nkS9&qVWOHE#xn;+Z*w;EOP0M6cz
zM`3JW@}$o1SHI7YN#&C}#0RxhFdNg|VVIBqw$SVFMD>iSw>O*F34>hsH^*X|s?1%g
zAz~GPib({>n&oKfT4oiwW5QiQT?78FYJY(wyx7hb0j^l8g*~_G{&7a>iyj9xi)l7e
z{b&Nftvy>*Gc;8fhos;N>)RPZ3YoyZ*6G*5Z@%5NY4boH3*G~9qcl&ays0iG``u`*
zZFzUM{#|VI4J8469pCNASY|_6_vi1naL|fK13qD)RL5htN=}E*a{7A<^8HWR(grL~
zz}GJnh_R6M1&=oN6%fhro{p>9?aZpk@u(S2pQdJ5@DtHSw*Si3;sFE7%>x#Mksi_e
zAtlTD!YD{0w~(1yV1CzO%XsU@pC-<I4*;P-z<B_w6q}ub!cEO&9eHpjY+}6$Jt~=x
z3&gNHe8ZdePW62KalLyM(XI-aO7_M^t3<ae08kn3Ts}lb+DU&fAm!c5=+MjFhF&(5
zFqgY;2K1Tex;w&#K}%t2)9h(4E63Vf1R@;T()+ukp<ThWvPg;mREwUtgh9}uzzm6X
zwrj@}6SnILeIAPk&;M7D5%A5YJVX0L%LuzmrjG#MsgC6zJ#E)T6xxh+2>`q$VCmV&
zqvV!q^C_JVaHVs<KWg1L1M7z^U8`yk%9f$U3nkaOg~z>vARv<2;%HN!b=8&Wcnb1M
z6%Kz3R(1fy6LT2fRbH8eyT8a7%{l+|%ut5@Ab{JVPG?ejPGeH^Wsg*j0icS9GT2`-
zU%$~OZGv_(zDXbX=Tl0MJ<ykV=zhj$+s${Ur2|?n3rbbXrOkKl6X$Oyy3rc)=@oT6
z4@C?_TpU2u4&=;dxVyGa55Pmt%{QN4PU5fn4eiM|hlZM(hvoz(c^>r`M=L2ahcMLT
zg+#iK*9d4-YHUZ_76exfilYTmKR{6&5?O&0l2`ERF}@Xf-4)ZhG%IwF9iQk8l%UaW
z_5Qv=w6Co_S#LH04658{qT^AYz}3Q$K0P3)bx^bZ&77^|Ne2;zg*tW`Gkn5CDYeV$
zFoSji*-Ep_?%D6qe8XgU`nu^{n~0S~AFf{gG4JJWtpe&)Pl$VLPAI88wXnd$0)0kg
zs-h75DG`q}F4SEEOwEEhUCKE^(!Q(u3t`fo&^mnex_Z{F@kxVQzp2&&CgcUu1Ld+M
zlcsiC*F5My`Ek_x#o(p>r~UoC>fW-Kf}foKjCO^R4HA<B5-eR@u~iV76a=1tG>FRN
z8XF?O>yL_rIjhdlx49kFYT6Rxc!iwpVP^53qV4Rr6PSpwou-)QDJeNRWh*p+ZV>tz
zpKfDaC+nupo6qYYj!SoKsEZ_E6oRRqZ6w>cF!m_vTS*kU<oqfI{hQif7>Ey5t~sb{
z^9DX+P?9VI=2V)TrVelEX89w!ewD3G7zNMtW%<bR`$wUmUZ=&I)Vk17tlY?Ua89gx
z)r5C!ZnTc>T6N_<I#aAf`^3b0+5KWAFmSDMPrZLP;du`QO$jE1CVcsU@Un(le{T0(
zrb{wj<H-W(p%E`sY`?m!R^OV(hw$B9tOl1$Bpu{_xloX-%&Tf{n*rgD_;*>!GiH6^
zcB-D@44A)qO9IxQ?N=*@`8^t`dt8D4yHf)oe;%RnLHRlX+QNE9QWb4CBU5CL1*fUU
zgLSP|&F=e+=c13$vNK(SOLs++$KE*Xi8YM!fN=P6r7bYfjqCn!wX1%a-fcXsFWRYM
z;0?-4_l<Pj-`x>EK{_C?teQ|B;VtIlcix#^1`UueH<cDa!s)ADxfzZW)Ybpo0_=y<
z1X5Jq;5(K2dedDDtIe&W@k!?+$?{M#duW)0feS!nE!3IX)7tYY;9FpLM#OV-(DrjU
zUC4mjXCRI_Q}!~E;36l{I(Sj+iX**>7G$NS<!OU3v^Vp%Z3iixm<1m@G4HIY)rBms
zPB#4(llPLTUb>em&3%}cA>W^sA21Zuh6cahcL2c@(Tmj@z<XLCvA2l7JbiTKk1hTA
z2}#{PAnC{`+P2+6ef9Ovs!eJPYAL;H-?Fln)5Lh))#-@$dAai8ErntCrB2?>b*!en
zgzLS|{gnz`3UMd@l38wY>%k3_w1a_LARpfFUWncUX|ZIsS@Oo(=p0<JGJW%+T=|<M
zRx^1Kj_PMMMQ?6h;<<-ry(hgm^wcG2`3b(E!C}0DR(g;hghnMKBo;711b0(p>@~ya
zM<CRY4f80eA#-e_EyPy&I@Ir!OIq4y{QOeH;hD?&l=FA^^b~b=CZmFpolWDZGKb96
z;bB;LF9vySlr-1tP9eeYd9ZP@)GBLg2=gSC!>>U<fo05PWo-IW#N}U*4jJ3jv3U<)
z+`=di-Q~8H+k94-c7OKIR`?5qnxjH#kcL#!I*N?EHZ-tlzT`}H_XIPm-8)TbvBz;a
z74(rg^8gZ0P`O<DCzNyW6}Uw_k(dARvGZ*jJ)&u;>+X=XiWEC&1u%iOWl=m}vA49@
z{~QCC5eKVZ+Ef=0*s-k?u}th2EXH5TsCq=N=NLg7*El~r1_B6B$;=8$uZM7BiUHls
zlffge80+u8ZBCWyT$Cg`401kKEWDz&$nAMXs~QLGHP)^r^i7{K^k;-Hn7f~DDc$#5
z1{2b<((Hc1i4%rW4&Qu17YB(ji0mP{{DXp_DE8nU5GV%}(BSdEK&>nx*kajcG|0#g
ztNlNJ7JM@FKganCKWCEyRyQFhBkg}4{Sd^#G^qcb=5K2CA8Y?VH(J3N<Hl!9@_rCP
zGK06cO1W43vHQbGh3p@z)PXX?uH-v14*c7_@9(PBYuRT_$yscdTbOv0w@YcI-5jGQ
z48nM0xOJFMkMot4Cq&HFouIrGq2lJmN2Q7PHAv8ODS|j$x^9Emm?--71%YVZ*`SnU
zncRMhdzjGtg}`QS?RF*C%0pfyN17%!<3K!l1$%8DBR^csf1NR;g~0v!^*5D0f2$TU
zr`Phv(Qp4SM0(>XWJgWqe^J|{xK;W=#z4?K>m;Wv()(5w>h!!KzTUk2ZjA`)mqe<+
zd9$5M&juwIX^$2MWxj<!#5O4bokTl~4NN!@o!>q9^*#&Tu)>%eB2Uz4j(ez6lepp}
zo~q+amfE-sMwzJ;>O^=xe{Gl*%^CVunV@`@vUpF;Nt*WK&1i1~p0ZnZfReWvLvg>_
zX&;S)dYziySZ|V2a}gwWg0ZmQ>{Q`~HCAb@^1GUmbYbeR+-S~1Z!@F80-XqmNq`is
zgYs>8Z;~=NoV~ysdWa7B97VCWNNQ~*#AKpWJTa?E0kgT%KKu4QTxaq)21v6jBBPzv
z3Dvx592VcamR~EnN%F~5HL7wUEG#nVjAnLwFZt-96ik+eR)SW37JPo{3V1uqIkM@j
zkKdZD*-0EPsN3!MW-5noG}-JCy17#~Ar<X<_5*(1;4A-~s{?>8?&<G2MB141*lD?^
zAq=~!Ij`|WRB_E787Wz~%S0-byP7E|D3(^9^(Gu;z7emZTYSgfBW3iyP)uF*Pn5E8
zRp8P4I_Mn$6NDD%D94X&L5*+~^fm&L8;~vaOGe&hj-v23n8%@bl<L%VZS>9t+-~=M
zlxrbws;KOww-<ooYk8SpA%GGvK=r%#;VxXz^fzqYxvBUnqPofHqM%wn>ToAAI>Dk~
zF`LucZFEbFYIB4RwuawRD$pML8M>sPG(-rqZKP#LGOX~YRIG_GEw8ESMbVd@z5+bS
zy@yWiW4M#;%j;7*Od*5Ec9t$(*QE;@aaSNFFMuF4Lge1Qi{<y5q$CRdOGeA6YPQ-7
z9I4p9*uh~I_>HM!yQwf&HTsPW&0NH1B~5B#YC)!SX(mVy-mCzEqN91=xozWufHc_?
z{{BMtPX-+@C0x{d&=~p(1@`LnhW<|x#1)}49-wAx;}&07(IKn{oJI}$sT=LeHW6F~
zkoaa&-RM+Y(uXkY2XpIwr~E;QvJgrS0D?b}A0V{P1ktVspf|+@jnW9L5GDVABC9DQ
z9Cfmze2s2RGl#jyMEEQ;fOZ2LMGSo6{?8a|DgifEtklVKJotQWvxpMqT{;w|vqFb_
zlom+=UnePGTsipd&Cnjz!a!-!VTZUhN~_r&YmwlKhisl!2{j3OvMQ%`QFKJ=b8x|a
zXV6yV%UyB$S%2Rr6zbFcoOnV`6f|KH%Id6mq?wV}d=DE^5bRhVNr{^9&-*X-1s+<R
z!><AL%9Wj+Z(@cBWHGcfw1T4k8NVnvmyJZOk%b>hxL3GP8x917M?9Fv?+hy1N!fi@
z{O&yrDY?!n^YTDfknDfV@RNq0RuEW!7pj!vor_kFjsths)M<bb8;1RGyWaw!29Eil
z<gM%vj5rGwFL?Z*t)gKD#fE2S7YPF~wIDQU|7-oC4XXe#77sKOR6cOfGr*&xAm>rq
zw3Ore%vR<br?{=rYm_CbYlG8kqCRp=c6WD$K3tP}^utsV<%ArzX%WN^>z)hbsvF;8
z_Ey}@sh{3Ieh&W@JvPs&;ymDs+wR?gLe6`$h1N6Oo)ex+?gkRW6J3LFHZWtf?^0>S
zXNNZhhRuUv4ys1j%TIw)Y7=U}TO?EYTTIB&$1{WmM1q<5$VcEpES2Yhv$)5A)5r}C
zt3}Fn#JYWz-oWkw*GPu;YsON2sV0a?$@xuvT0(%M&~q+}baxwf5P@)Rf^>1qB`E5w
zL2>QmkNW@WWMx-?pc;VkeDad?MnD-C%WsbmFz)NJF`O<J%C<}dF%)+kfVLL>l)(m#
zD5cbF=5sqTJO&m1b8uVMU!ze-nRUnr{+BJu3~?-cW%w@J=_-c>SJHlirYbF7{P2VG
z56c5H1+#U1MCUVof<|cgy^^*w^x-G2Ah7n-RnF*>xTKlSJpI4uHy}R9!J@&=#<3v!
z=L>A;R@W~)7f8m01iuyQe$^TBFoJqK7@=(P?cZN;ML>uA;f_8|0ETq@XN3QoLkdhG
z=9Sh03U)xlrqKu}upU*kV@bwMC~FK9RWG_5M~L8Us%NwpfL#ZKcLfN7qDnlt>{SQU
zC|=6CKY=F4)B!lB)nZGrJTI7>-Y;D43w}D-r~2?Y@8k8qa;`mtDgd)YlRWlnfkaNz
z04lu6UF@oO2Hao20RwV$oWP_fSkp#;lDTFgDh~XC7m>uUQjGhDC;ds4fb_?8KyigJ
zB9Mme_{^UbBL7s0v6O~-TQK(UkC;U3x_qcy(_9n^a|VD!LWSi8Yyr0Iz5sr(_d<j?
zf8DOi%D2I;bCK=?Gux*?(}V>kmNwtT%i9Ko>gQT}l*&gUpzh&{Y60|){-lnN*YS)=
z>K{0gzAxuXK1&;)f#OU(448Sl3W@Rq419u6TCx8Z#-@u_sH|WmP4WUy%TcbP?I|~F
zpdpccJcnI|+wTam4A`8tSEY?-vrghOaivXHz4JiJU<{Dk8qgrT0SX}2M5`b1YM^Pk
z9qGL&3=9!T_5$jNS6I-d+W=587y*4%m1?UbV`C*$-(sC3m1w*NNbqT1tt7x|uYD%}
zns?9z7Gc8;E))y$frJTP%Q4~-5XJ9P`MmmHdhCoGM;8krQS>hY!e+&_#$|sIs%m0}
zDtn-%JCQs!5-@l?{Or4Kjz8ZN`hg!mKO9rZh0+Xl)~8WuZ>C$Iono4ttxV4T1t%Tv
z%j_9wdmi-2H6zA>gu>VvXpWHYMcqKhGnj4@nuzkGm!XLkN)^%#gDSA?ydNkqv916)
z?!yk15g6!!eSyP(NO=4k=zUC)pcfCAWp0-3qKZ#WnxF+6+G$gjl!IQF3fO+9en2K_
z(YlW9Hv=wh#Zk+P<0P4GP&DkNl`??SONl@snAU+*86AFD-F!=539;?(qcRk)<k+no
z(>#=F^#1}Dp-&SSTCdKXU;uOgI``X8u{5E~rss3F4}YsNNs?O>WO#9<Umxy2C4jV>
z`n6<@#j4PsY|evdVAJ`9V3mOr+#kJYOYu+nAvTdZGQ)Y`a8L;m@}_AqBX#%>IED$6
z09<7zgRZY=A(`G%+-$XG&LEWMTz2u_jmR5WKeqk`3s?Leco3@JLlXlS(`2vle9?AT
znJNS|T8F6F|4FSoA_0&Tm256G`^2U6ueXbA>bNX8iNXcW|3o0R{Dk%#FXdQf2wT_b
z+S4k{FVVTp?knybQP35!k$#fnokK!HES|PrL5D=n$ij9dM6r*b-g6Nmlg!07-=vsv
z02&zhCDdg=tU!3%qY1hY##{DIhH~d1R6alb^G;{~bCcKgr_GY`UK${Fal@J#j{tgG
zGGv-`o%_I1Z_^FlGN3CE({my_J~F@2_p9|4z()cVIl>gG@@$OS{kc(m`fJmwBs>JZ
zW#31wx>$&;l-Wv2IVzx)I;dy&mG8>bVJkm30q8oOCtDzgCgmhq0{VZ2^B!aiN)lZJ
z8>{uA@N_vBT?BK#WEA{)&<y$sR87z4fq2P}2vNDXl!)R9Ak;N&Z%?NXeD01@KLTE#
z^}EG8#ZUu;GC!|$_{l9&j_aL@j{w_peOC~Mo4ZsAJ>Setec~+jg(=mdwDC>93Z|m-
ztr}^jxhJ>o)19RC0=*&vyKgC4`Fwbj2`GW*MG{&EKw{OtXE)*iuX(*~+FWv?`Bt50
z9SCUX+<$cRA`{SS|FXxGxm^YvC~lt5L8^oc!#z}H^4$VE09RoN8U~(Bva+)^{MuXu
zVv-pq3{Vq0d|&zW+<2`S#4Ci!9nRh*?@$!)>$#;Tlk)T|Ul++`_-`F?x{_5#c!pVO
z09-fhbaI;X$jVdj@dEub<0F^dw4?osAwE5h)6KIkAra*>6hje$T`mILm8354y$0rD
z!k7BF7o@{Xa98|)QmlwJrz+&wBcm5aUO_oyeIMuIs-4zpZe>ffCD@W%)~9D<!>&Ai
z`HVj-<{zJBs;7D10R0KtiFA)Wki7?I7k?*o0Crsdfbs#f3P{Dz)SR_y4rb(iY##r5
zWkzc(d|R~W#sM}V5Bz(#;DqKZ>2BL(vm;j^|H`H^E@p?TzcAXN-1?=y9UwsDF=~df
z*-8;M`JO&}I^2;EM&p#7E#=7oL$bNxr#=;as*{Xvdq6RH#HA>W(tgnNzWgJn?YyYl
z;l<<r$=AY%2oq91mlpA{W=E5I_3KrhIse|R$vDGP0zpo|nq}U#H>dKb)P>sGG#rYS
z+$g=|?x3gp;2&prIa)Fib|rnAy`+-cA#pXs=%<9R#^8k{ghds3c&t6=6s9RZatP$K
zVcEGZ3T)?q+HP;9wXulNqDMX_k;RAB^~`UAqxm|sPua1mE0U_r#Y3ascd5T^j|@LB
z&sECpoTzVY)VUESY@;3{;`4M3_*{H=m}t{PAmX&oTA%FrdtG`yu1;>BIu;%DzS5fD
zx#YWL5lp2_3@C^}M<@6z^Nif=r6V)0%A@+C(r5<jS#&usBZu=sq6&Z1{9oP;bb6w=
zjylxY!qmxH<Mi*ScS8&yJW~Ut%hgu1$74zImEH`YgzZ({1J89@eeSQ9<d&BC!_3y5
zxre?BNV7_ztX>xbIVM_ui_YCqtfrB2>Tc6XtzWTDQ>8=wXGcY2we9SHp4Hfi-1>GS
zO1XgyN|~2e1@VsVE|+$v6BP-P<?pdu3p>_d!q>fn;^A7+vhpz@{o8<T?gAtN1R_US
z^B4G2L|!2rp+={Cw&&$vooO6VdqICxmF26X1+)RS+PgJ<*(1-nx#RwJ<W_xhyr%|A
z-Sdj2?FOL6W6dpa!NnI}eq`YeWXxc|X444|=b{@^zgz-M+A>C)J6~m_4UvpeEoW^l
zQvznUqu;|6gYn<<S8o8Fmf2vazr&?~mVqmypt8f$PI5_>_huuyHhXj=mMtF5g!6nt
zR_A4uw6RYKUAgGbswc2hR9s}BR+Y$nVmHI-JejN>W981rmgXou_WQ<!eOHrz6xuSt
z&n^)=ZM^!K98<dQ0)$dxm$O9N=AWh<(X#2OEfDyNf7foAOpz6s4;+E35WBplF2~BJ
zD$28VFEvy{t1F)lNV+xr$SU`4l6GPqN-Y;suT4&v+Lfk2vCWtz4z1en84<>|{Fmiv
zwqBBKq(Z;GcMe?dH~ZjdPvT%HFQi`aEtN|uU~0``X+Q(0z-?^Nm))455#G=twwEr_
zf5tp029Sj(@q&Foq2<3y&0Z{{Afc&kCRn0n%3oJ{Vh89zPQ=Qpa8gY2+NKUb<q|q8
zH<|59u`l2_=^n)fCEUldRvdHdvsLulW<%zz-V321-ohLx8T+{RSl933{W&f5xh?$l
z8ql0V8Q0+15fMinTw1I{9(AeiJhrY;TKBG$y4d2c01bq_{mYt*L<2;Vo(A{v<Fz;Q
zVIHZ$lPK@JZ&h5YqW9R^HUqCq>i5*;jM2((-A2H2d=Q_MadHK*+`t0CY&i)v*Fr5r
zobKGsl!WeQJASv;JRr0OjfjX&rS0~q@TJ9q@ZYUR$?yfWZV6A_g~8xc574O}E_e|s
z<)}Y8HB>Wi4ABo*g@aol5ok^!){dNL@q2VEHb^jff1xbZZQFA>mNPcFZMQOxaM@bC
z`1-C!V;vMkM&m9}9+Sb`v64GW_<W+&jHrPwFg!x!i4WqpL;(#Hf-B5aN9&hKP3D{~
z(|Um-$?5#deO$4p{z>mAG~Lzo{_LX=Scfx~kq?;dRY?}U{3uqPmb)-kPsW}9hoRJb
z+fq?@=R=@&O*z@+Sn3_l%ldHPE@3;Xj8y<lCYVnd)JJ}kNDtwc+`*4FZAjWmIe;d(
z-gs`!x@rg&``l(<O5YNZQAF$-g+M*i-o=p59SfdfPN$gy-{_jnH7474XR|tk1gDSu
z7cMIlRHkn?=%`6G^czu1Gbioe)n!ctEh0@M3FGohB}?Z0bPDsmqP{Qn$gi^5F>Edg
z<F9dqlTbFof>a)Om#5}CC!f8z5d1_ZUclL`6GbUDda2M9bq*EEs>i|zezHpZ`4Its
zOLoB>t0$jC-oJ_}0Gn1bcL&`6a=a$bATQa~=-RIdq3(l$n@H{?s8~Wy`|4;`oVpKA
zg^|c^2HsN|lyWxCEl-=HUbJtD!Idz>8|JU^$C_DCb$|GYp6MVJkav38j7b^eBwS7h
zl~vG|kIru!)>L|{<c###UM0MeY*6<xJG}W|>YJQ|MP5c8){&gz#_v`2c9fHOE+Fc=
z+(3`~aDY=+mWX)O#k?f-vg+37Y%MPGORDQr(HoAT!A4D&gr5a+cm!r%mG?drZYfkP
zSAwEduO~~CwD=QaVxDK_c%3*LLQB44u{3%gxw_f~H+o3rMASAbo9{SH*TfwK;)Ss}
z7Io#KKmWluf+Wc?e2C@@h5Lr2$>=5ReJ)yF)QW}HwKD$I)_T(9t}KC~=n_}QLh1QP
zBgKm-_uM7vO-|8=AY^He0bFgMpx%X*sMUwTX^(E~+mE8LNd3U8*x&AG3-n?U7k4Lo
z(JHMUn?KLh*saoRWx0G>3ah{I#9pTe%$WN1i@Uh(+xt}@&prESn6k}V1s?GT_z*Fv
z@y<b-$Hi^Doa}HthgO<p5#8*MH9LQoj+p<Kok^*)ZA1dzz5;GRQG4@hb28XADQ4%^
z4|g)+bR}>0r043Y^1@d%v@Vpp@0`b=kFvb6g+h9IF=zqnbs6<)g-Z-=xdc26auAaC
z<hy#_fSRPKtOrmM1bkGE-@kG=Wo^DPufP3<M-_P*Fj(aq{$vYsu0&n*bQbMBtFAJ2
zQ5B>Fl;@_o7%n;}xs5kReR`|c89_&iT{IO70FOb$y*bA+?9@s5t#pgu7CsTW>Ww=q
zx0Oi{#&5O;_{}vgl^;Rf$h=ahyIW?OmmO+N2g#?dph{44Iv?zgAkVxT<V{tdS*&GE
z7y8(r;@H#?Pg^mkC!&mDEk^Ak(?tjSGpNaXx+KLSIC%dlK)}y58hy6@oa~9mDD3B(
zGjIk_Sy{XP<wDhu1VtY_$jSf9TKVXOvVjr3cRs%lJB``}mrtnESZB^%f&<}_IjLJv
zmHMlt(^|sBAs{LFk(S%JyH&yVGDa!R8SmR^93O-E{GJVM@p%NiN>I(-eRSuK63Z)7
zcA%j|w%xkdat#eRU6Hl0yU@2$ZwU530Qgk>lYj;iL669QW?=SL^Y+h}atLcTP7Qxz
zG+H|ks>x3GbCc^uxNegrMq4kv?^J4dw#_f>lo*#bA1|X9jVbG%wJiZ5QofVQODxGz
zZBg$V_at(|-?<yryn>={s(<_9laePYx$KK$oTf@9*H^u~SqRgNtGw)Apcfjsz)IJ0
zn;)@KO0MJ@_ClI8W3URvh}Bm4%Dvy<K$rO2BP*rVF9(&XZzD&v6Zx<iU;Q}+fwsep
z-2(Np1K@eq;&-M1WmHih)?Da4?4w+mY+)Ji2q2F2zy)EmH)zSPJf*r5t1@?!s0@!X
z60Ne`OBt1ND6g^Aixji^84M73uL#uToNNaP+ni*t>L^y;I(SB%XB}N>z?z@B-+p(o
zixN}QHkG7aH|!|`ZC4e473yTxinD5uXxs1~*uF;&!Bu@4#*OMQtedQ1Lje)9T75l`
zj_Q6{O(}<wzAOUJujV&G`#n$Q2WnmSyU#?=<cn^ap9&9G(paFFOs&_f0*s+JQo6OR
zT;cpGU)k-nqG`DhBFP$_olM)zZA{-(baGqj%FuA7QyeiK-3%4QDqG%Lq>b_eSgU_t
z@3GRm^HohjmQ_oB|2fEFjbXN@q)_wZ4JvwF5tt-l@`apU*lF`t+b<QLU&~Ck^^|4Y
z@6<Vj7T<0L@QJ9sqP}M9Q1B^w9CW*zIw5ndP@r|Sm&Z~5+1WyOLr9zcP{?i&fTCvc
zk=mm%{fQozkIm7pIC0pZ*!+WZB9CMh=y!}b&FR_cJlyFW#DYR!8Lnno*6|()cck_g
z8yjWsUCg;v4c5;HEJ|K{sv`0<p1uz;`m!4ynw)D@#90At8}@@|Vhw;CX&<47r~0F`
zfsSrxFpDvG^s*N<wt?BW|JgA>sLyCC<A`rJ#Cqn?>Y+mV(9lmPS}-uLyrKUWKvi(i
z=z3X*Y*vcu3ccsqjxW3A`nZ6rU|JA-(PxVyGdnsl?q{sdA&qfMfGDxvL0^v3k<q%D
z^0|!#eaW(&U6KV=SgND3Q?N{gcRnFJjo7qj34C5xu6&)Ug<K9m;T@;l=-zQtZ-2N-
zd_S#<$s24gf&~=`{&o-C&@vL)`hMeVYds0bl~+VAk?)s+-*oP6{jwGl>!{}L7oOrO
z%Lr2g$`yTYO~+8*`=a5}DP!<tB03@W!eo+dD^jQT>rx^=40@54l0?HIUA-MH3FDI5
zHON_8x*AY_v0T3oP2H{hYP-XG+|LrI$99%t({u@0b<Az;-4DRm9`4dK;C~hmbY;cd
zJ(%F(ppDmXSP)wbQ&w0yNHC_e#rK0hCC}Ie*}wWWWz3u_PVnz8kGzw%K%by-u;0>F
zzg(Do=dBK~t&Hhf5}vk#o#7=Zi`4+_6j_Ek)3}u6$?Ao}u-4Tq*5tkUX5iH2x7P}-
zm#Yj7U)4hFbJRvncmEIW-a4wPEs7Tg1f>xr1VxY#kWwV15u{rh1w<MIB^4w@0hMl~
zk?w936hylFP=eB-(#@Njt2f{HzBk4j<Bjp&ANSsKIs5Fr_S$Q$J=grjWSGr4O`2Ew
zEj`t)Er;su?giJH^~KVt7F(INoFEl}tCx<sbcu90_ks;3!YV$W{Xm<1&Jf!6lh)o2
zo3-%MnMv}plF=n1b3^iwHe;C}vRY!d-8x=i9k!(`_XXjyc$R$7NW3=CC$y|#=33Nu
zTaH3Iy-7@Lz^TYc+HT+$YDnU?VL4C+Z4Jq-0zjLlZOXJPL8kp_C@#$}&cw_}efCaR
zMZxY==2Mx+uwkW~j<GLlxdW=wwz=+PmK=+|2uB9@n%kG(I((({^}GM0;QuonI(7lW
z@g;%b;Fe6geD8HG|F`|4z`Q^0gV$_3|FTxzcNUc2-*SB{6{dSh@LrRQ**KEA89pxb
zVR*~z?Rc|ZS^3o%)04N%mmLqCBv_-KZ~*upg`vjzJZ{9AODH26EzGap!3vU~r4&1t
zPIJ2aBBYC6@dzjpf7_$L_@OW4ep8grZODtph<{)cv(&#_$b8<>%4>!s5wFq=?}L$(
zgB2SgZt5m*EvI!bIoLRM%xwe}zDnCU=BD^G%og>?>{@p7yJt!3?4odaDL9sOdR22s
z<#f($JA^*#jLBpuHQ+2w;h`fJJ2KsHxkk`PJPC3Mdt4Y$555Ol3Zk<O7;haClHcf0
zlGVu|iB!0UDls=1u_`C1?JMfi=3!jnihQOU^RZ3jXunW?8Fy7uU&%DuJnfBp_T3@s
zTPD72Wv@tQf3Z{L+c-MC$asWc1p4bv3M>71bj?INdD!7f8&EE+P2wB|+MdYX@wJL~
z+npy>73!fizuA9+NbsusQ&3ha;hkA~3b`LYvR(O@h$QoCQ#KGvUyGxc%ylDoCX5Zm
zB0Q){4tDF~Dg}`y=iQ}VT#<Q?9|Sl<Y$5f`W*SXjDyO25WUUY6W2LwscWQ1TYMt4q
zTP`w8=t%Z7kGSB@!<UOHW{p>J@4SsuC@But82%2q&Wfp;0H?HEdbxY$c?6HIIf1Ij
z|8S{n%78z5^{rsZYVE+4Zhe(4$b5JS#A+>`xo+N*Qox}H%9T0?;jGuJ5^I+P0Rs_)
zC*5wSpa`2POGHJUyJ@!91Nmy#v#K|b2@=q{=v#nz$RswOgybe49M3jLzpP8+>9=XE
zZ}A6_5q`Y}S#GXCZa#p1auITWDJpqc?nO}drh+`yu_OaBsb*);aVQ`eB4BC3{hD_e
zBClM;Ey4knL?_E>g;KlTDY)i%OSf95`5Wz{=Ec0Uk$|5rJUXWXBqmXX;hATly)wu)
zvvd;xdB4NhuT*o=j{s8Q_k!+AFX7r?CGdc?Qo^|ZgC~_9?S;a#1Yh>!(6kDLykFuW
zcaus7Td3ORUufZf;jmY&|Lbs=s*+a^SwecebCwf~fVL9z-UU5;D{*HKTEyIZak5{?
z>I^i^*&8q{x0QmHPM;v?rKbQ}nsqv+YA_)9V*bx_pQgX|oqa2Ov~O1oI-w|_n(%P#
zAwnNP;As)GzxV{$!whkverG48#*Q}e8R$eMcm&JCd3{-*0GL_jzWAcTCDxg?5224k
z;;z)&^gSen-#|B4PwoW4AnLk+BzyQ6wGj+;21NhO321*{rLoX$&F2dqG?)&CpxC^F
zRAPd~=ug?OZbk*(xh@9T$;T$YvcFbZj&S~WPCyoI2L|pyfuQs`XbL+5FTM-SIZ1fo
zTn+hJx+vfsXc*IQZz?OBEDH0b)t-7)9auNNDDqv#-WK}|`+ZBmQ9`gkree=I-5(4Y
za3uql@S<|lHc00dbebSP-oL&)GRXSwN1cO0<R==io^Y$}uvQyR2|eH!=`aXacm|lP
zXgYA|=GGY#Wj3Umnut);KYqo@h5rXYxc?u-&u>3RYw3Cn>0_9^zwlO+ZK*6SMWE2;
zo9=srnlksAf-afBk~ISv7NUnm@zM-*J7yoUMJj{oq$mlVG!P!c<Net56CUe}^K9RC
zK_cotHVUcrWB$_D&;D{mqmwlO;p6VMXVxe*9k#`9Zd~d@RF3>PAD?DbIZs;9%w7=l
zrh}Eo8ozeCfuk;_K-Scg16fguPD|rbtS|Q8S;;Bx;Cyk4i15UvT{Y8&W5<O!gw>{c
zr>1#B5tm}uBGKald)x=+^w|3W;05vH@`-VK$!!D^0dQB_mw<|&>k9jUvnTC_5ipZ^
zK_+1Ogb1uTh~p9DkUt)VSEC3<`-v}|#2R_{ibcamps3^qM}`OI(7G98&O#RfamoYj
zzT2Tj^fg0o$e;)AK-#`uI>$XMf$DgW{>Ha1=2`ZXM0}KSd_k}MbA$c3e~*8R!2Pmr
z+h3^O{Qd<lf{2;VlVvdKr>yddg^(sGxj7y~&s{#q@==bfYO`8si+8u$utv)hATXM)
zi_mwdWGXpTJl-4HkfQ~rprNelN<34>p7YCB-DlUE&mPmz5M01?0ih(MU9Se4ly-!F
zJZ_X_%}?iop!o>1Vtjcdq|{EI5cJMg01nJWuo|+E01UA{`=nGXis=MA1bCapA?t#p
z^Om_k)4}8vrk?XW140#l8ILVw!*dr_(Kuvjv_n-*TV!#litIEiwIv+w&t@6-e&EPw
z;rue6X+PGo(NY_~=H}ZcCD{5&cAAvs7;sahMHacz5&!V)z|Z(KT0aq{ap|C*0Or@d
zSw@NDDQYG5Av$@Irppj>N$y5Lul#d_`(KK`*=Tw1D0}))PJzC@_b{k4F0UncFjLW~
zHHWjWm78<o-G)kqWZ{(%>-TR$06|Cdh^L2ZCAbAYuBw=EaNZ(uIyIx9IazhvL^Gb&
zf$8?;W3DX>7Aud}FGK^+bhX}C^jgvzaymsUapkrVn#wn+l7*1+cM|T~$21~eTqgf4
zCA!wT{-j%fj15t`y(Y)SLEm@14cz&b;Wn@IXJ`#W-fhqy-)a%OvY77pI*2-LJZI;m
z$>csK_jK|*pnhJTyh{WT<+RVh6QJunhjoSfF$xH$cPxfVIEg5^GAxGj2%o6W0Ix5b
z)~eRce4}%9tyM3dbJ6KX19^3mwuF=V#oD$!o1%sEALiCzm!haJk{{`-neE0N8=!5x
zc`BXV1>x<a-_SO8oZo58DUeu*)R@WHO+_e75+k4TBLyCM@u(aW-Ke(Rg|BnFE}Ji>
z7nz0+Bj1Gc$SltK2;2EvoUL`Y8(qK9|J8M>XDv|MiuR(@k<a=GbMuAaXh20(*VbAW
zI*Q}@1}{r|CU6@r-eo73oKFuIY3&TxY|P=Tz6ww0QJx?>`>_HASWVoVOEK7kI+$D>
zYKx{r<mFNP4i?*ytdo%Yj9BOV+8D@t_Og7&k8+NqtGVC&(NCPDe=mP@c%WJzaDlIg
zb_ueJs^n7$Hl?8RMAM{SPEo4AKR1fl`S81lxp5w;#@5E{C=91X#pLAdgw=rrI~_GP
zZIfEJ!{^CUvH02ccMtXpBwAu4R*=;m-bmbb0F+lGNlFRP$;?I$*G6iHjFq2I6VZC2
zm)+*(NozowUyU%nH25fi7iMc;?si`-rA?wP?I0mZ;ntU<yQrhkoHFaa>*+oATYN1*
zB0C7vf1HraN~S%{bi1}>>-A!zy5mG<?FC_%itnWHrv06`yQBu{GpP|eS#=`0ot;?|
zl$JRvKgTy)m&S$o2Z|N?2zNu}o+dO+go`^>+cwo|#AL*FFb96qs!z?n+FW4%{mI~E
zrxurue$B>~g_e)R$lU++%6Yc?r$F;h%BCz5Al>rsx0Z0wr`^vdU)GRX_=CQStb+?O
zAGCvRv<5h*Dd-^o-y#M_v$FMjPcQXnYihB)f8~0%lPBkjI<gYM(g1Hzhf9e>qR~Z3
zg=g<e>IQ{i1BFEl-ues=N*tMrp5j=J^OI35cO~_q|00xnm=Fi4d$Zo5kmB~eu>&W_
zm4s{b>!h4$`Fab#hTdM(P*vi0I7cal);i#Qk3j`C()6(I$@VBOtI0DO{UCiCrxRWU
zzeZX~(<P5Vsbnq{m7^OKtxdLiyi7hoqQ%RKZlB*ji5hIK|D&<TNB{9r0wI1dNIv;}
zsb9B)9O3K-Z-c<MjE4V{?|cw*3|YS)e>>CfkLw7p;-a|H=ih($-8{G;2?N9#Vdo})
zhnL5##=r!bu7NipRZ9m0TNfD#bP<Fpkl$Ke*&gKqvRraxo+In9S+YNW{3*0Q8z`!Y
zV<w+kNO}kLIGpAu@|r~2WZ4>CG79}}b;AcCwjezt+KPn9m6zX&!sk#V=*8h0-gk$N
z5ybhkjfuk0uM73TYH*@eWR22~39tGDOkZnZTtwA!d?GyLL~lPQGQo)uE*uz=MqT<l
zf7fI8bWqUS2H{hL7tFlzDQ+j+lEFe+fCE<zOAJP3zH)AcIUCYnI+Cf9iT8W({Lu=J
z%Sfn6`=IErQT12zo<j4g8Uu&^{L8Or4xP${FmTQqgp&UAbjOVzql4F*klUwUo**Uk
z9vrp%J%X@xnnBE_?mQ^;(uT>tsM7(tunxr3Vg?q9>mUk<M@TJgf@^JtJY)8~k0w8Y
zqNX@+C-~f-S9t_Y`X%LnPCIjUd#Sjy4WaHS-+cm6r#O1O`fU8um1_{;hF32Q*dY8z
zc(HM7-nH>kiPyH?-~O5Aesri^{_`bFxm%)mnGZp+IH?~OLQjEoIGYm8i0PP&_qa`Y
z49orTeXyIE)5bX39jtAGEVDHT5iVVKUY~2Ldk6HPG!R!uZLN(WVQJgWY{Ele{=5<V
z*sdZ)WsC@5?i%1=!b{x$S($$Sz$Rntqvze{vh~=pG3>e}$fKy;wm*WgRpS!8i8f@b
z8ha@O9CMuZe>VDK>Tjq*8rU{4@-+rDssWiVrjmZvr@LA;IK}{p_v1J<_C;C^Y4Mq>
zjwti=Xc{uWP1b+}Gk~wj_Q4b9Y#T`Gtn?sAvG{l&5d#PpL(=pQZ>|NoL=`dbw19Td
z-5VIJZC(OG9FCU<SjaRk0V!i6*Nj&c*{`-iMES6fqdBXsQYBe3>S#Og=uzYa2w=`2
zgH^?_R9uCa6ZMuf6M~g4Tue8hBs8W48imW;Zv;=RXou>wS~s)@H9Ip=z0yFKktznK
zGtWh%yTI=;a0dIR3_chW?ef>#)vmiD_o>U+B`fC5k%#!fnM7!t%sV$MoKw|cfDNCW
zg%37Grq&NnQD*Eg#ipRln3@?&n`?1f3;nH2wMU0hRBG9!&u7lIVr~Ni4coU2;d22|
zr_8#cZ^{}rjD8e|Uk)Omg)#U>V432&$s53hTlW!q4j_yH$NpVdQ((UpS@quUG^Z`^
zDRgAXP-7?nrRq6?I#*s$gKA<ELgplZ4|E6n_bNLX?&pAZhjenoOGB&yB5(5OxN0=#
zQ}biVwqT6l@%gM-SkT*$S|E;0_MOo_HW1-vLBv=wCkSuQ;HUr0OG_#7>D79={bSO=
zy$sa&Q9y%aW2F?6%q}@S{Y=ybBlRq2$!=AR*v<<06kIjdrqZo@K68z~y!mpBXPrm&
zN{h;gMuhLNlnCQ#k_jjG!Vy9Us>*X*g&6sz9jmLI0Qn){pPaQ|N-=6&4v6Jgoz$_z
z5HUL265oTNtJeqb+C^~>Xwh9rMD!1&-^JUpCcrf!`H7PfS0M6&%Ren?WOdDcPWhA1
zI~tr*9w*P3c?SIiWLJuM^lo>gzMIo^c$U?zKFN;kRY=5(WOm0xMZe%t;IUWSo8cEa
zI^3$QH@8G&Z^t=U<8XB`mV8!ULRWk>inv}%@}dgmgl!}UMZ~(X18K?MxpXDXcQM$+
zyp>+rVTbvO&O<6(k5bD#hIh7w-pI^){Gt2ah}k_YqDx-^7%WDR+6w`5Z#0__RJDHN
zic1<Q?cqXSrsRoXFdjtZ!1GqA8sod!G)E{jC6?%hY1AEIU-YQe)_jt*OeZ#{q-r5S
z!`osf-K+D?j7;cduY&}~@h+O`hZ8lDv>!oLbY3Uhp_I^v7r2siKexvoPY|RVwgI(j
z63rbZGy&G@@%K+3^X(ueANOPWVV^=yMnxYacfkn+&4$L6z$Iz(ICb&d8o;!t&8RV;
z*A*7(;?H0gl|353_~1mg393rf_HHR^s*k&{>#KjVs>HADs_Q(TN~+PS^GNG{)^{WG
zOL*5@m+Tca46YRKE~kh$NuQad#9&~!cgh)bV0m}$39$Ot)r+tgg=G;AEb$c3ifcKt
z7}-}Z0ddplO~ms03uw1cm&$`MDK=BhcSH|Nsl7F0ktNhamxSHNL^Ct61M%l+QTPI>
zEyAV9ow=jxMe#8k7_~%8xGO3QxsNVrO>!@&-o0^TmQIDKvE(<^x0e!STqZJpV!FIr
z^1LzU?J-0`{2}@-c~;i0tK~KFp$+=1wA{n(t=c2kCf!-sPqK$~PDkorl$&tNWF?uv
z+MFoNd%sM0a)k$>i74O<&0i`M{&a9uo+N7GPsBf$*mr|HPhj^uxj)CUR8jO#z|yO<
zY6glbO)f-Ez=}`dyU3nHDjDY8ag6-qn8m%m&2Q;-+Cgj}gr)-C2^iTuCwWI7iQm&y
zC!%*$vqPvdh_d}8{G)g7F__k<e>FRub;I#e@&~tI+^@qM^cZ`oc{)5ECqGkuvwV~p
z`C@E=xxi8d^TkO5+*W<LrfuNOoY>pQv$$AgPOk1#_n>Up!~o^w>6crEx-w;^;FZjE
z13D7r`)rqkw<6oY{(hQU!60t--WlcdlhN8H)jZ1TA20jA80T|}EnR5hzNgeG?&i4I
zCpheB09nHXtr&RYxK)0wdgeU4jZxl`_&9t#+c{kb7(X6y`WK~dl}9{fr$oAo0Nd=H
zWx5%rqfq$=9obt4GC-&>>;g9+o&w7OiAior4ayaz1MfH#F)=^*a3&WxqGWGeF7J(b
z5vk|Qd;>D}$xj!2bG+`7C`~_w=X}C)nApq4HGNm0q4rydoY+kr!xb)oY9+Bmu)_jH
z?_g8pomdNW^k|`Jzb$^E1)5otw5EF3RXYqj#C||6cL5ES%1#AIocw-|1-x{9_PEwg
zf-~7T6pncYBwJb8d9v+v?bq|cV95snz>i6hFZP%0L$Z<-b>g>?%%H*-4)-0o^`o(a
zS|x$3hqiSY#VfLSvwKuemkJ3oU9PvV8tecKw5j29ez8FLqLNx}^)1K2iRv2?fsWnp
zvYfVgH(q>Ds%@&SPqkZZW^C+Hc6Q_qX$t95(w~KKV|lM!*S;HpxUIxQziMqAy^ju>
z%woxABd^h^P5qg6<ko#kOhAZ!)3n?=pr;6rteQ0`S(vrzU0>rkY<Rlg>o7v%8{38&
zW16O`Ow5~~mq5|gl$^GOJF!nwbYi%nFR7!)A-XwP_d#>u8R^%G-ZIsBmyNbBe_7~S
zt9mEqtw)(BFFQf~U=d68TWV1G15beZfag_HG@?G6e6+T5<hJ?*VclSbg=?(3t?CRI
zJLP;%x`h|ZmE2Y;pJcqt8a(oy+V$j_v}H<?%+e6-m6r<f`Be*9Z*K&VQ13soD1N7S
z|2ehh>@L4{U*_FrdkbA(aW=e$q-h!?n_G-Y$zX)2T$QPd;dP__?1vY=b`XgTHid*4
zF&Rd#T%why9|A*j)%Fiw<7JX3MB_f^@Zwsv@!cPw81_8DLBN9;O3lUB?aVynj0^8?
zFpktE%UJ9EqSLEwq_fxq)?Z(fG$ORiMCOD52gG>I#7PvfurxKNrO*@<M04||yJK({
z(}517lNY|bbAbNkqkUli=7&>l9Y+^kfeNlSw~i7P@0xs!<_WTQ%wF^<STJ0s{rC~6
zgrWH3L5q^%e3kK@hP3EXsu%~{&y6P2GwIp=-1|`EGU*!<`y2u6?>wnf<4VqjJzH`2
zu%GmmA8~>}>q^UPj%CPnYgHLukVeX6%k9OC4YfxQEwACs9l6onP4>k1Pqy4(JapZN
zk(tG#D57Vd0$zI;Eh~^fkbzJ6Y_B7sceaM)a=-rT(=`+^tW5hiXCXrQ%1AL39)2RK
z&HONKAigVs<o;my?pn~YMn-ULM`#CcB#Z#2__pusnx7ohn}W9QmwWrS+k_<y%x6UQ
zwrV&XGp=C}44krcjTFezm$ftZRfUM^NFEQRT<aXEU00M{EzpuBTBpEms@a<VD!nu!
zgxx=8##8ew+5Ws+p1i~-qt`R+@c~5(EDQTvkqHs*E43`H?s1PBFjCAYa9~HVPmS)M
zzV5DMnC)D>f=%K0w9jYCOnwFP{)CwFqS+8t2<K7H0FGEw#s}Y1nEV0A{Wf#lDT;mc
zA{*Ww^bgeBDZQ#Sm*?<K@1iuUzNFc^T>1(aA>mpib`XPD#kvI!g{~9r8ZHG9*#3T!
zarmLIw(k|;qxGWiO8y7|MAl@?E8>YZK=tu9PxnjdwfNcWwh9P~%t^}@hxe^H4niRg
zp%R`N{rQ{ead9C(Ls-5d>}OIS1kKpgOecDV(`s-uZzSwfVXS`3y;{UBKhUWw_rV<k
zl=_sui{@nX%~U5>XXFrCrX}VAvyrq>*Gu8*aEon5->X|k`gl(K?zvH+ms`fcAVcNJ
z>l&TYT87da<<?atyWF`7-vX~QSRlNe$qq#nfWuF}6BtpXW6c|*XK?cI+J0-gx&l!>
z$+QEgs-=v3rKECSM%tgzX5h~zJWOihkDNe)9(s}cmwUf}+pifL1{IxA2AxBkO?>is
zd)Z5h-h=y-5M(o6wg#F}Od4$=ZENK`hY+*Tw*C=)&2|U%E6ejW&U~JJ&cXCl*!F><
zwj%-?nupd(sAlHjt$8U&)R)qy49(Qldd34Jl<vFwMe!2(#;%hB?%ZyvsfT(ewgeUR
zFK_5|ttTW!a46C|p6)Cv%G4k{dg&`C)d*Dm9I-cpFQ3yl=DlL<Z>bl8UgGy~#-(s#
zExKEq=;as6-r2{sL9vUL$vk>SO4qqR;0)Wh9@hDuDQ4Y?`LT$lZ1^NUa7tQ4r~~Hd
z08Q65G4F{=ivmXDvS^B!8lGk<yS8g8j4TqvybpWpEUdrG;ZYo-&YDM@ipKrQ-xW{7
zx&Go)xVWYFwxR~p%6Mv|-onJf0H2s-`43_W3<tWzeEpT93V|B1`7+q0yjPWPx)L7%
zCMk|%tKVhCHXz_}uhkn98jO^h;CqWo&E}4!TrMFDZSAD$t&ttL@^V~;y++`&m$iuh
zHzMNKH;`fAgbV|-Sz+Ehhoht30AyEH(W<M%#_+jiftEsv)<bxG2XDdhLUie7&=gC+
z=Rs=%e4M&wVMexuk}=HUTRI{&EiVx!hInEacm!boRM0WL_GM<O*-zol>#6{mMdONt
z^ir*$wLu!)=P$@~dn0Z?hOJH_`WrX1)~ULv&fT@Q{liysG&Tkg`)jJI0a-bzb3^e;
z@x%GV{Swlgp24ZO<^Z+0gMLRU0>;?wz<|xsh!2UobAqkRoxr2z<IJ|>I87Btk+{N`
z{)I`1tz+g*Q_0;37IJ+To05B3D?6%+MJ+*tpfciMuDh97h}pMQd9=4OH{kq3!EH=v
zLse*b0Z5H;w3@Xm7hDSm$zSP4w_nv9*lU<hPaap&wP~i3Gg^g!S>N<>ILSNfib?Ob
zf>B74EGQ+$i%GthO&9(OHoW+aVOJ7_F?Pw^n_c=IS}f)KEcHB%b44d1S0~np65+GP
zNhL;RJvb$lYBPXahk&Z2)1@Z)GvLL`0}Mmc1|fm`&LKX#&v`Bnk#wKo;|rPCq3Z=F
zR1MvSVgMj((Xw0kw%lgPV)6zyp(8^M{B0nQh@oJ2s>h}K?8AQTyZ*s~<_xehE$2)d
z@C{F874EIjKGe%ogf$P@R)AnV={u=Fxm}(=slk1>vC1KYg+ZF+=UPa?Vqwq7JOyw3
zF`9e1>~%HkKpFSFUAVY-j>V0VrA^X}zwmihp|92FednnpJ>wdPdi=)mYKrxz+Ai3&
zO+K9#KB}iaycePCIDGY8>GP|Izia~)*c<H(ce;_SWp8=uj?lWCs<N*}nJng($E!HS
z6_4~%vg}s34s|PbUxR#yW?QFD^*I*rPsje)lyah9ukd|VzjX2tVnlu~(czNwnZ?%E
z<VGYX$(f=)jsgAOK;7u$mwksmA@}~r%<psxnuxw8EG~wog@VvUNGw^x+hq}td|W?F
z!OdsU-6=GSRfK$;YI-(3oimLsV_wO!TTD`0aV?{F)n=!%h~e>LoB4Sb(^Zz(sUh?S
ziMghW^C@kMM1Uf#3!=Lhat-R!xBEEcE~njT=ud7t?BmmX$YR}>>Vt9{Nu3X9&SdQ8
z3>hx1TkK3|(P9f&VMzCk1+cp`DP?>scU{fx{rRFAAHs`*6-jy4QqTK7{X|53uz!Yq
zy#uW>zc0Q(L6&cCB)!1I$lT&^pxZiK@m!K`G?;!LyL)rHdX=v=`cr8|gZX<G;$3dA
z-S;6roeC`)&`!$gi!Te)?2y#WTdYrzG5F>sD^iL_=%xQT?6doRqx%~$hTs#rN97X!
zl6&#GV|-~Lt#6^-yLNB~(tH^vgY37|F*J1@!h;#axJ?yY{o^7u*`!s!!<}k8xazvM
znai|QU{LQj=8;m@B8t9DHu^bQ;JiS4=g_?4TkerS@DA_NxwJE`WfUxNh4p9&cYg-V
zY`O4Q3q&PqRtxZ0&sU#(RalLFYb*De)ly$G*gnp=rqrPLHYOZ9s;cn=i;@cLn(0Ks
z3n68crkSr7tdD9&h|v-CwWhN%=e6T1wjx%kIgMTwjsWl4Z|v~edZ3dn;?(g+(v4Cy
z(a_89WC6Vl@|RTFARdCce9=;KEVl}i_`&Qp%iK~8CRzGc_Dh;|g}R1J?@}{gouNRa
zR7PDtP<bLojv!1JdOvZn^7ym0__;MZ@&Q{~5*2zgY30J|`g4|E2K&sy>^Wm(S7X!W
z@<_*5g6_vZnZlV>pEJv4YXh%aE$$9T>gcFYOB86hPxi@ItG+I@n_y{Sxg<fW1VpOr
zw9dau0VoqergI}vni5GJ%F5mYd>EVUb4Wv^`T+PN1y!vmb3Q1}UsN%9eTpTw<aW8Q
z8)W||b4|ZhgOT7HaI#OGYOJA78>qw#hs=}Ld_{rd)@eJaN&wOjDr1TpU}<gixu2iD
zh%1=XfS8dOh)V-ZXS@pj%v2_}BDqVyfLF+0T9icb7)HiquUxr5*mn(NFpggCEKmBf
z+khcc3~oV=m9j;Ty3~TW3`8*+A#FC6TEaj(9H-u<1jf!<`;bN&0Lw7J8O?8Ez7?pr
zatlePA0JQIDKYlc#XC}6A=}Q82}k>V>ArB$xB(Om1@<@E5BC!)uM%TpD9|M?uPn(i
zY(8#bPUm7f$}>{y<Q&PQx{1O_lDWL%48|w~ON5s^$h7mOOCF`ys#nO-EoOM}l)kS3
z<5cc>MCPM>sQt5ezw}-<S>3><w^Ll%{y^E3wt34$-X1Xwa$cuU1ncTT!=TihKT6h3
zPD9TOrH2U!1qPl;crPE=w9|dq%>KpUaKnfuOu|ZR44^un_xi0DTPa$}Vad4?vX(yD
zcm(}AymiF;4U9l9DE0<(dk(w-m{YRCtfa@mUFQX@mF3U6sQzou)1u6X%?`}7trg_D
zOU@o-zO(+>$!{Wa8-nSZ7$|3TLl&|THtc{e@c;9WzznOwX;p^|yKO0XDpG*}SsD@@
z;ENLL5oJ#JTnJB$vNf{rEd2M3XvJU4hb|&VmUwgv-omm-dUjnwD-Rb=nEjrK?e`Ls
zD+gaVjChB@_R${_{@q$NDe)lR3`VPC_<hXjIa(l=?2bTEzpj@3vN+fAOoC*!O4VhS
z#~vF+;k|g|znLn3E?%dZ5U`u+THSvE9RwIX&FPFUM1g$AMD79Go_a)bHv{$u52Qar
z(pka|-UiWto^jE;p80*-aIhINL6*A%`Vi}HQE?m~GT@m20QSJ_qO1$ZbZ5ZH_2Cm?
zqdykV<FTcz2S1s%5|9Nsr)Z)%CVSN!t=$)oQ#^vT2KC-ikoK(jgURvxH=<5B`yyy)
zXh%n1WupYpepMw}9y?WVp&_pEC)wm_84P(s$&=L0AI6CB)iGsan`xfo&)m3Se~)Pi
z!{;8SR><qmDReLJ&;1C#)$vh`c9#Cb&@<6*v38@26R}6)-<R5I;<ik-+G}<k!%M1*
zg@ncu#>=bMCRXBaawlBCLB~8Tiso?{1F5ZW$09Q|`rF)|DkA@k0kLQdfR=lD=NM?7
zp<$4oegg#*5JU6xkO4o>Yke0>NB%4^{P>9c+%1|Hkz$Nk>jv3GW6hzA)x>m?_N!eN
z@X#@LnSMY%w;Ga<!>$m&xUy!tZ2?8U8lsiacCZxOQ2K$5v>`DQ&BHHEQ1CSx#tE%w
zqMy-LMmc%>MI|YodhuU^%VX0#6XnG{-8A=EFS!GoM%fb%rzK%dHb%#wU{ehyKE4n5
zm)R9jnq+1oCE8~n(8y?lu00bd_fwz?Qu(a+RuRsDoT$qm1Cz6=a4_RK1~$nhI2>D|
z#Z3E|sAQ{}(Ji!JCyK^p3c3bnb-XUykI42OTxkG&=xP@xwh?~GMK-6W4<xLr*_<3Q
zFCq;)ZrvYEY=$Gka>O$T|M_?L(g^Q#Q&ZU_1=8QY8+-hTDGF{S!~whi=0E=qJ^KJ|
zZkR{##a|cr^@-suO^`|vVdiazk{*OWmDOj2^7(xf32mZRuR}u!Jcu>dtkWvEH_<$-
z&(H*QE4=4^w!m-Jm6*ZpvQ3z%n9BUSM~Kn!?$4dN<Qhqf^tC#2_R@<SGEccF4M7&H
zVr5aPBV!@K{k!QNws@WsZ?|t|jJirJ!?SaR4n>*DH49-6<>R#QfYD=mBfycrZq&mb
zy+$Clx_}fNGm8<r+u2nwi|i$JlV2N?`k?af4q=LV=KuN2sl*TmeCP>}0O+o{8+@na
zafhVws|gYYPdRcLvz?BP(A-O#eyFKO#gV&I!f!O)zge<xK1~+Pn|;yTfr<7Zu|tQU
z%B{FHEt2jU>zwPZNqT$2sZRF04q+On`mZVpp~B(F6Yo1pzVFo%kIAhytwpm;!Iv*{
zY<)BAh?iA`q>-UEjkTZug&tFu<;#(1=`+bOkt3E{7On@8FRENnN+UhDb@Zl{<oNZ9
z2%VQ^Q+bRA%SzsXQA%q}dWW`C@xrLGLG7jN4)f*SLvx1?3;k$2jkw;`1ub*tY}eyI
zXvB^5*W~c0mva1Zl+!|NM5J)0d*Hlfg3*S4{NP>f-IP~@cTp3L8hfaX$G10y=NB#6
zzwe1w@6u-<_2@~h%edyx&WG*_IeEf!S5tjPOmTQeDDxeLc{11j%lEytMhljD;gK5N
zZn-jh4VRV19sBvRM`;&g2WnW8bDUGcgBc$V{PZ!?k9Q^sE`P<J?qotisxm9s$dcr`
zoMop`DbdH*&Xhg+`%p_7=-|fIP~OES(AaQZB_W@t81+D|WI?B5Kc5Ucow1^TNaXGz
zJC<WxybwIFf_zi75bXF4KK${WIuEkmWK_X`&#J{R6Z!Wf(WQss;mhxvMf<log$!$D
z2bc-!?pR%Je~zkObErxhCN8-HnYzTkW>4Y*49cuS+{A-_%uqO+eK1^~eStOW&l&S;
z3XAq*VH>F(kc#fXvi6TT3NIxL3f(?!H5kLchxPF%k1QCc8M`dcHtGJ=<Sq$hmB}d1
zxbpA2R40e&wV{c&EcMTeh$i~N>S3>9s-phSi@>~LhmKjuM?(evp5(uqOpXKdB&j1Q
z>)&^&rVAa@ua90r{qH8TBWr;~oP_$n@AChS2jhW@;{tTX6@XD4#l7)uz=OcvOGi=l
z_uY7S(23F*epzQ=J6=%DXnz1+T?G+pC0N`sw3Grjvyxdgtpb6qoAm^}{_Ig@9(c&c
zi^L1Z>undA$2G&x6_DA=&szc`zt&~oz&MON`Rg$&Tf_0wE7TS*kB4Z`hs!Y!rt$!4
ztm*;RUCeU8GX|`_qW}5$MJr)=g;l#}P$7SU_zU+w(;MP6O?}sXHV7>yFR%PHhQjW`
z(`y!ZwQ&6IBVqgj1nf!(iU$y)9<D_j*i|%;D$4xz(5Z;wp%R-AKL7J}prNj$hzIN_
z6b~2!d#g#XHuwgcW9@P%k|ELH1|aqd&`9kds@Gc|#lPP6<#0p)s2|i3B0ihlj&eFb
zoWNP$UI%*@_CV$B-Rij-+Qy`h;qa<Xkt0ErFhK0yUT+^(PLm;tW{J+DA1_7hA(O0l
zSCZwiFhGqNc}QMJO>@NbGli)=Ad1TN#<|F5l81M6-2Ldlx+g_u1)R)nTgn%*vL(@X
zwN0WJTJB``kAh9+Gyq1G75fl2RhACuw9cPH<?$e(v&tb7KR_yEq8(ggyT5{j;j=E(
zL+K8$;c~VCR}dSB=JdWH1ms&Q1mPAoFIuWWr5U?>+tp0QK~w+W8>7hU_u`DL<ST|F
z_Sw759jII@1Ee6xAR8zMRL1I;TOt{om%xAa5ocMLdc50>Vc^M*LI^|g{-i#XyUI|6
zSUv$lA){~kZZ3cUBrg|@LBKi+nxnI9(s_gTw95R}ok6)dJymm?YL$Q3tS_f#2(a2Q
zaKyjY?rXiln=a84>JYLBbpJ18Am**ww{rg?@C;eY|B5HV^u7RkH8^az->L7EHKSCJ
z<V+17i$5J(lV{!6VkT0ow=@fZ{C2Sa+J0;kWT%xK4X17`#19Gs<T|miy1tIMU05*H
zFFYs*qJ!!(5Yls4T25J7Ao$8cjqr`LgI|GlpxBO>IS=lqHd)#hejQpyl(kf?38Mr1
zuWjqsA|3(_IzL-={FLxqh;~`P+p3+0%H;cjz@kWV%2lk|xS6A)QC;sM{N}8{bZ+&k
zJCu!J8#D)dJ|9AxmC9M3rR2d6z^&{nK#Z=qsin<^E|%TgGB#w+4_%JzCkGB}G3!q&
zB?lcalCXrQCLdcn_LxNGI28G-H&V%;fM<b)Wlo{wn9Mkg+w=W<2mqzP7%+)50e7KS
z4G6v^_@bJ<8#}t9KLWGG!maP773`I==Rb{Z{!HewXnyu;^8i2$!x$J5>>c?BYSeL~
zGcx>>gfFo8od#{{sxp>f16`Fj1O#h#;j-05o4L2PB#AO7>B2M_qn#Da;c4eLe?;XT
z<bikW9G^%P{dk@rR4Q@R)h!LARv?~UZg&Suiu=0m8SPmK{SDUn(AUw_+_xV;u)FQH
zXSze$^aNI~MZxvEoC#%_y3I5y*;Y6wC^V?EUr;HHI80zrC<*?&HD;-bj`{xNItpLE
z*jWLg<&+}mufu_%q(&aqcfac$ud?C>n^9RcT|Yyo7oI>3t#D&sYxVRW8_a`03Kvpr
z5mrgn0cr!*k!mAm^1r_XP_&89Uc|M{Z+f4%eidkk80&V<V@`x5Tqd^aeXYT19`+*K
z0m|`6C_Sv2Py5Lugh*W^M;vS;1)UaK^Sj98Zveky%3{Zyn%lkk$M!bQ)HguF1Ix|V
zZ=I9>u_)5exs!>nqRm<8xRZIR`}I6%i?n)0&fmQ65lr=X^ct#B$@SG1LeH4u-<c;V
zMQYQzu{(vFH+}H4qc2nSYtcCDmd!ZA=hI_9-6ZgjEih8#k0K0xt}-Br#mPFHL5gL-
zx??WL?Os}R;|Kt1Yo+Nbf)f<Dl1$9fDb7REC(rX4dEieM9;%|zM6m`vf@SQJ(vHLX
zsVbDE{4(NIVL|Dd{MBpPvhkgw<JG;GYjR_HV1d4A@A9<i(#u<o{d4$oZj|y0=%gcq
zdhVMp)z&yhC2|qAzuz%PZzi%~G>dvG5|Rcf7ljtlo}l2jQM)Z#uQ;?cpvakw&v#UO
zw)1MxTUzkCDcrt6B)ol3EzAF1S1tx#vk_xwR;0c_a+(U7hSWhzzW&j2A;vjbjkLID
z2o1&NBHs9r&3-gZx82)pp~x*Bwjcvzts}3SjCMt$-jZP%7jdr)*+J=<a@whRUIQ9&
zr7NfJ-F$%*5b8*nRXo?!C5@eGL{qQ0#9dc+o7e-NWfAb3*LyOmUaV0P*@`HoWPjEh
z>1fdkygx0|v<iP(>Cn{2t9@zJ(|3MfDnQ(1R(^H*uXho=tKMReHDS*#TUNaaiNk)$
zIQpI{*nsgp|IKQUGky(y_vmS`hacP_4W;h=UIP0I{%Y7t@%g~VRg;SgLcLBI(EfoZ
z{W<UeBeN88vkY!IE?0UL@4=RHsX|^LSp6cFU<dHYpzKn{K0sJ0*3k;@R67K;J@9Lp
zOa%My19B87S9;do|3W2XBc~j<y@ZnIIkWWNgwS#BV}y50dqe8$#yD@KQiZ$HR<3)A
zp3iO0AOlMWPY)72qYNc(f5v^&gMr>x`2KHHKD6k$H+K^`#i?yQ`WAq3T_@qwLMXm8
z7oO2vsU_6TDe+zCwV(~^&LY|Qd!(000$tE07CVWv3z8vaGaB)a#QvLIIMH$Ii-(9e
zQ9=nRag%bCqpbzwi>f&Ajrv9vzs)NUufV5#RjRo~jn8qSh3Eu^4rX5u*uxH1mFc)G
z=Let0<yq(UT87%4E((hQX1SpS0XnrGL*f$e=q_n2CZ#ao88PW=r7f2OFm%f|K5coV
zg~$J}koj6m0=|*i2(XQ`$kz)+z56dM^RNU3M`$khEDFgx#g*g_yjI!Sjk$gMS!)~A
z_xxZMb1=vn^j47Uiwt{^<sP|{98I#c0`OP=S{OXa@H{>653X3>JZVKO;P}A)zQ(6l
z_;D34>$e^1(m#9#Sqv_b5xtVeMr(EIqn4Kg(oM3Cv`#%Tj1t;y%@`LZuhm&a`<%I)
z+iC+WzrhkkiHg7#%WGYa_{A&qv$yI+C5v2y2Aq-O&e|_pPFper1+$7&K7)!~))5?R
zLBVbM_B3B1D<glAHr^zer+g~Ki?3ZOpDOo#l6zS`2IZs<s}L4vPQp5G@5(Kj<gvQ2
ze}uzcxI;Kq2djw0yMfWj@jlr}&IeX_ac{gBo3hky&<j@%rDIU+Zj8CT)fnbCL5caj
zH&2Y)Hplev{`ws}d~5_rt`=tdWjqk5?0X(=&ipPmC+>^of``+b24lASc0Kh%nRzvS
zSP4@`9$8YhyH*djAE>-5(p2y@A~1+uWndmIN;737-FueDnLJP1Y@}GW`C{W`+DpW2
zy*uN1NrUhPWl%PTfX3r7#b_>Or&q#49cP=AYcGeEGFsm9UdVU!J9mxN`ej-(y;S2j
z2i(C@Y}dNINgd{P3<~G=A-75F1fx5?&hj+LOfoHhPZoFtVAI3$lR*$%9-0YD&RLt0
zi+Yo{_U2Ps<BR7p9Q)5;afU5nD(=`1cO;#ln7p3=e)l);QkQ;YKd1tM(XA1Op7SSf
zsEM#0R)W1+!=5(HeTeb`$}?A@KfldR(t7HUy~@fgoowYOb7v&)P8`>W+oW0WRVC!1
zdPnh&@H#!u%g!4GB&VAlQNF8j>}VBbGE7pE;yP<xWoqx*TyD3jK0(0~C+&J#F_j;@
zmlm?1c<0@3svqfhypD2Mv6NVcCvHCJKO>a><qIH6=WB)ScP1PeF({VF(p>(&9b%Ff
zWvp=wNj<9(m$+a-GI`Z0?$JvRe6~%qT4zgrEIfNsLbeLC5z-4PJ!+)O*_>d;Ma8z7
z`NfM&M-75*zpF!uatSjES^0VL_ZtUvF6fx4jD#bU(Crju##cJ_DvhQ4{FgMx9OmW1
z4L!*FcS?E{eX~O^y#4{%KB-PGJZGKi4bKIGkGTE;LAe2MzXU!%zd3raH9<{H_nnIE
zoHK+xpN-8gY3QmjJg5!NMDs_PqGK!Ysf_<fz<R_bc5@y$sQL3g=d-dE>q@&a2V9kl
zByMcPJ|}XNcr(`=do=$|7oDQN0mG8B=j%5$s`u6;B)vBMFE^vruijMkYE!q2F$>o4
z_rsB*Fu$geTltngw4m03PxmQR$pU>KxP);F=jlCk?5Cy@-%vlg^L|~Ym***g8Kyrf
zlP0kR=fx`sQj2}ep28C6c%SgE-3)w^!1lx=M#)52TPeKyaV7-@bfd<HOQVcV`izmX
zj-2O&YbV_HBsv+Su4`RXXlx3#pH~{;eOH?Oa?Xfbkgu;fUy+OZJxyD}yZlgRJnsx)
z#8Ny~AN_({bo~9jg&PrliX-t3G4F1BFOMu(p{mY1vg(#`$v2DoP0LlQ(};V#E}5+h
zrZ5-@A7p<^d9gfv&*19i<u?Xb#TR|(*XFnri8-5>7XrV9(X=K9Gszx9&WrXi+pU-y
zH)Y0Km~&mQ%EP!8>M=|vYXE$Z4{TJXpmtqXQB$lwq6_M_HfZ_#j@v^F)BgFx+OiRu
z2n-?~Mt#PgJkhiiP0Cm$0nhzs3J$n)gzt)~`rbs@RrSlnDe8oCane=RUc7UfzNN3u
zJ(`U8qWD&Sz@xaAkBC+MS-Xt2!nqVK-_7V<$e>ZQJqY|UcqmRJ#FaM;?BTCpEp-Xy
z_-jzvq4Oo7+Wtdb9BZyJ(S*Sds!R22S6?#6ZibdARVXBraHU?Ncy(LAKAb|1vxp{m
zx8wPVO_%I)%CTUR6#jYcRARx15LFj7DSz9-(mQpI7wo%kB68HCkJ8D)T&s0G5MB|_
z?lf??5l;}c%2sovm#i*3&``%*G2F|JYp%t6s%YuPbTtW0#kQR=+fu;#RHm{%&Y3?p
z9VhfYxZe;s!s;Rbi#awhhCfmBp6m2+sG}m9E^>Hld?%CA8k~IW4{zmMQzd`!v$jRR
z{mcrPwh{4GGW9_d0cdf$boR`NI+S`J9eR#}MVU+YH514tiv`{^z+TsYSoGoa5Yh1|
zvoSBiqv3@Do3rH7z6;Cl?Y6*hwmH=F&a*fb{%1DlADIueTlb(03L-|ywp<1s(DZ(i
z+W+d7eBVWJcGmN+4Y?C0m(@>*8@f<0*p}0nX`-{P0mZI7tTrcV-x<3jf4Iwb!{m*@
z$?`5SywccORSIs2#(a^U)k)AJEX+WK#E7-fLLnsa`BgUac$>m-R{6B}N588(5+6sA
zlE2nd!hduu8{W3uYstk2?;y5g`=0FQ7A&&`9LuqGu3q`dO%^$m+OE_)06yDknnK||
z`@<I2$}P)5Lc|JjP1CVicfVIjWO-j*(Dr*HVFToQm^wm2j$;#9_C&9-{pqb(Hw6`C
zLs0r(aq|BoB`q3-wXZHe3j5ZX{Zth@^<Mo_TAkv|J5t8mEG89T;6H0o%(=&+Um?d{
zNx$W`j52__t#*g9%$2?g;M#u2z>D-hSNjzXCEmw~jsPTtqhxdexHR1a=Xr80r3|&i
zx#J`~Vl_hZo+KqIMTO{oOzY2Nh`yK{91Af1)2L{b<|;^)8I|{%{e3w{5^YtgFfbl(
zo-dXBTQqV!NJ5~0uM979QT`RB9DnjyA%S#9+<|27-y?G5*BL30+>><_@c+-b1R(oj
zZ(6Hj`R~hLVTR;d$e3dCzt1(N2nn9C;fH<yxx6eO5&vgzC5!n7P=YJr?K<>(diLK+
z?Ek$#7L7#m<*S4#yT^aY#Sw%Wh1O7Hd<Cf#>8NF%8a@hk3!`8tFsy_A?XR$p$qV}H
zw*86l<BX?SIZ~^A6mlV&kKg=Xn)EvE010fECSt*ye-Gi02Uj8;3@QG)wwwR`1{<EX
z1k>%4e^0?2Cu8hy!T6MV!-xH!Z+^o^|4J7FbQ1s#n8Ut)>hE#dA48WGlDs5ur73<t
zx<6AsUBu7;DuWrC|9tZs?K<(VX@HK&PYYw0uI&Qmf4=cK4OwPSTI0XcNxxIZQb?BB
zGwQ{EzafUc@lj5}|Mv_zJ@*PmNx)UR)BpK~Neudi*awuq|L&asX-4w@;fIs_$iwFA
z=Ss*02^Eh5_Qos73sue#-z4v4FkN+k>e^;Q!az{;wsKk}g!<Z<@y%iCTP6Ia(+b&A
z0wa+cfwU-qP@Gm<1apr7Qi=8o;8osD>0P!oO-MOVApOZNyJ-3TWX}?6xTeGz@GU`y
zj!;d>_17eaBB>h*@$-vMHHY_7@|QzZP>W2<-<6ztV|}*;WLPvC<+7#R*IzG)A@lwU
zn=;)cWlr=XthAXUrkF#yY})}hRD48Fz+`Pjim^hjFHu0s4B%=-y_}0r%$<4{MmsiB
z0X=dBgv(Mt9@?9#RC9EPDM_hlm!s1%kJYtja4>dLX>YI9LcjV@v1OD|;@wQqVOg)9
z-SSds$)V~|I>T;?9z*AZjLvw@pi0zUO7Hg*PDhFhPhUyFh!EYtJQqAEVk0wswkw}m
z_E1jIzSaHrBoGb9T-SZ|{igbY4FYjG!QlPYP5W>X!$q20QP$(vVOO7FalMylwoX2^
zk7VQ3<Y81c@V}F9?>|*AQ1YXmd8gs6Je9ca?{`=j#x8o(z@?wP`q(C6MUfdK01F`_
znK6t{^2IZ(Yg|URJ@9|UMMw)fF$}K0pnl=JNqY(d8($tK>WN}~xrr=rhmdesZ|w=I
z$x}NHm5@b-DOB`a+m~v!C}-~~_-#5e-(;h|e!}3Pd~qrG8qVY2YmdiWjOhDG&rT+D
z+7<>=pktmyrWtxq@K_)I-j``|PwNHlh2J2R@mXb>7c4PhxtbY3OZeT`|1MS}$ck`U
z5<uU7EeRfq$dcsA+4$eZN(@<&>ZC9I?(;wAau*KVX`J%Rxxal=a#K|EMep+j*4^Qg
zcYSW5dGx@`kjZKgzs<+o!Rn4d4AH2^aAOJXugmk(C4N_{qP!4U^4*VD3Xt7ZWNB8?
zS4+C=IIo`Pr1yFyE%?{zyCmUqR-aNKLSQOI-b;@=KV)C<@bef<FF9rjz&-6s!DBkA
zuv5c4J?9RGA55GI&q9ZHz5}oSuc*<32U=uC$1XwlOf(b0T&kX))SCwA{+)2D#P3Uq
zz*@K47?(`@O!NqOcOD66s1X4CmsM@f`fDjG(H8xP&p@UUcy1eF)+<QI(o!j0|96;h
z4jWE&QjDYL<iA(P|Lvbt>3@lPJZ=L*jlgP<GLAiTTRV>^+{k+}5ZFWL=cltO!w5vR
z2O)oVx-Fm+4!q_)^jiS?RKfPE0Z|WHBdPeqT=259Mhulzcb5FnFedQK3YExq_s3s6
z1$dPJI<kLmwx{Gab=jsz>q~3%DlZ=xx-wWKr<K1<fsV%(Bxk;Ri~2{DgX<88I6$nd
z1XhJ{P{|Ah`CCJkN+@a>FdiuaUMCr^kjl%54#dgFcP%Sk>Y`Ih_0P}ezgjg5ApB3j
zB6>$b#vSb9(}+zIq$)a*X)jSxXxLZ<3+VJ{%o02!(Jh4RF<!IS5^cBBpSQ$w#UBlW
z>P$wz#W7|DEJ1aW3d9fKcHldZ5mrEjh*aUo(w~sMw?(YRtHGK*<|HN5x4K;pISpgd
z(5eFnqaDBuHb`JTCG>WJ(n!X=#*^nC1Z|RxKsHwbx`0hAm;h#BEHVoRQ>U#@d-&OI
z5tWLgc^r}sfv98*>@#`{84$m1urnLIxTft5`keu!wta`B?$Bn*o#_K8AsD-<wor!1
zX{{iAz5-k|1y^CquMm381Ej^r3(_D{V*>#U>~$I#QKx9)CDA<4Spc*A;Aq8nUXms`
z_CPEbT6Ba(Vss9w0LYQ@K?Z3I9s=sNp7HM3cQ<kZq3`CWu*ik?e~9w}pCi6VveOc?
z89<Rmb}*aw-oLVFDzX=K=g~*3F?d+ZX^L^EpjlAiIs*C0u-=ge`qvT$t&SZnIV@*_
z9z7>HOXl9xY@1!o?}0jwlVBU)oj2zJ(qDE;%6*V3w$ICrBM2d4kzc;wP>?Q(KylsV
zbqJek4C2KVP~I(?N6tC5rw{ByEhvyzSy;o)&%9~^x`^zc&Jn>4p~_wt8z?BPp@iM*
z8v72A<tc2nJBNr$M&OSSivng3`}N@SP?qw<3k;0rU=q+MJVUBypI<ex>e_e0y6!9s
zNM<2mDvQo7WFm;KESF1!nP&rpx$|zjy$PRvofnFK02MTO1mrGLt`1TO{=i$d7~{DJ
zH@%45bU7^}+%&7l?US5{Sx=;(%0Q6%K~XP(3|@LJ1icRG{XRdgcib=>D-$GIXHd3F
z<F4HSZD!Wq?d%US1No+qd4CM%?;2c;yTMWyUlTb&CDTeoE9w^eE;+FgHoRn5p&^~j
ziT2?5Rza$vb!kT}Fc8ORUHTR*5g7%~#=kNu#?B#T=?Ykg%6bg6p{RnZX~K+9X?wJ&
z51^}=fE!cT>GztEdT!zAAJ#$UchOZ?>n4(N6ade+WrM(STn(l!E18?*x+=zy1u0c<
zDCZvslPIc(t><sNe*6tMNS`opO_&N_Ie+>9?^9)B<d*Gw@^;F=gvNE0V2q@xYkG#H
z=?MUQUw5~-gMK1NdANoSGdst-Ue=)C{T{PDZagEiPcJpXi8LGa0nl8yxX9_8rONrm
zXxema)gXqU-Vt;6a^hBdNf_xjweb?y=xHUET8@#Pfc(|y{!3N~COf%qD5b?~a|hZ-
zT&`jtF><jAfw)}-+PSf!&4o>XZya$iWG_MSn|h=Q15*3YN3w`{ZbX)<m=MYxGFBhs
zLppTVxM-8}Wxj7zxmeyniQ{?XK@|u#^4wtOs&`ooSZznk6Nl{=c}7<tVYdR4?RB}O
z=Vzo8oyg3eY^<)3XGgZ0sBiB5-Mnja2}DL=W%1giYqf3-8(SWJIS<&Z0F~adbfdr3
zeuPqAqhqi*EOM|J77hy5{*1#IdwP1!;}r_{x}@K~o}h@@+a_K6>f*Ly*X}Okzq+eJ
zSqdUMVYiOJmL}F0j4*?*W4Kj`wx7EAwN~zFXUU!Srm;}F+s61JXn;|Gy`6g>sTKb%
zmneDwR4nhGBqV*eFY17G-DDY3jrluYEYpHfAe-JB1nbWicNI1?7cz8VN;`aSl#vq}
zDBL()bZ5)mF(>C1S)d)XSr@(#kzM2?jhM9SR`P#_Kr}9U4ykmy`=H14wo(Gll{@*3
z1#AoFA+8Bi5}jZ>sH^98XGn@B5{}4!&S#PGJ>SjVHt-1Y2+<c(LxAeCdq=dkDV%3V
z-*JjV@K7@;`iQ%BhWtkMj{ZXnw=wT%YMqgi6{MD>!rr`smi~nVcwEu?cFFU?ZrBgo
zJBjk*Y(7(zB2X);GvA@xp86<Tqn+cvMlX5QStY@1D1AnBs0Ay3_sGNpzqI~2Ry$z|
zr#)+Ga-c9jd$l%=xKlmP2ze8HDU!o?<&OT1P^cfvy|-PFbpm)qyGo*r-J85)OBx>X
z3~^6bXi%*ALf~68TbgPRmsLE@av<#;fwHg~cp5Kksw2~qp6Iz8Gppg@FUUM>>U83g
zcH=Gxx9)1XyUPsb<hYKn4BpK`iaa_rhDHh70kuL|7~S=8$rktf!P1cRxbDEBECrC`
zg)dRHo{d-1is`W?4Pv+iFDh(%1sb|#0;yWDU4wT7D7QBc+Vwky-A)j0^~V(Id*&D<
zz(CDT)4x>zZt?(d<-ARpq})rqR;ZFW&9bZcV}p@m)d{3cT^VQhyjt={$Ccd%yahe{
z3h=X)iUDCn<GLb#)7NM<a?f{!<gg{Ctq~ZlX0^|1)lB!d%-l!J%b`GugId(p(@_x|
zqXT&=kK!_l6Hvj<Jr~M27Hfp*)}4aF*AFv@8MODxi^h})ny=ItRHH&z*I;&-Hx0%R
zxK+vO4pZ6;2GDmtB6(Yyo)OHjoN?WlR@uR09Fr5N5xFhIuuq&@%La?sW1>ecm?*lg
zeh++CirZJ|<SEAr!0o8<Valc#vr~*9&ckU1MSZ>cbx09?y7|gzr@2wv!w-r@4)$}i
z=s&qT_Ry4u&WZKuewR2=f9YU8`h6uEUo?yZnnV@!=B?;&h6lUUCmDJbnd)6hw1hw|
zjv9z|9Mn!$jK)gs7q0bI=@m?t3ha1L`~1UO&D9#0L9hl&OXh|m)B45{2`*y2+;xT-
zhdi7dQM}&`vDE5{xc%PErDV!C6EpCq#nyn2ZxWG|6FlC;XV23p!*i&Yb!4DUhR%L`
z5;bXSh%i`PROnI0<<6rvJM-g6L}mwnFw2E!Lfl_M)1~g=2XR1o8t(;n^MU9ZGZcBj
zBeSd-z7a=PkJ>38mL&7@flya$OpVBuVm-T}@m6&F+wPOmlt~-`&T9jwEB98aIS$3L
z&|WjR2KTv`3JgHH#UlFQK9oG}L)?bFIjptOaj6<-%8@#`8ZLq5afC+l@q&ET*{B@z
z>+fZ*NF4z|QQ7ZV4eNsIwF~HJwJLFfHS0Zc4yvv8x84uVyLOLPFKN_%X1Q^vPsX-*
zpftO~B1W=PJ7!5EnUzaq7w9%wmI-Op3-3o(y~=l|V&t2*ZnoxkQn|8w)rw}Zw<Ii)
zFDG|KYp8M#<!&hWNaJp69V}}kJeIf|b!T`o_xZVIb6<&zkwvF7LY}?n*pXu;A9&!O
z)U!47Ltm-4a>MDY+o@W(&BaZk+H6La=*L$oYHD9P^ghiWx8q*Ei>|E@qnb!7zw5H*
zJM=OpX)jvR{JRoT`K*5J0pv|qago_nNizftAkx~OW(s%=uIPj08c)T0Vxnr>?u?1J
zXO?d}<$4fbUUuJ^j+F8(+R0A4<>7-Aw;+`DTzgr62_>*pAg(2Eub)hsbFr!&*JvnU
zlKkGtz*1SLhaW(u6?tr6fqSnwi`%v1xybGpWER_C0NcoPY$9{~@%r@DsQrHasV%Oj
zfw#NpzKcaR+df^ZnB%uZC6+c@m$Hi&hno1~N(aby)lhnnU3Ps<B%+_=LXc`5QOD6-
zrWD`(0Qc^YTb25~<X0()5?XmHmQ=p9A+?_@6%u@#S2wg0a&eP_;Pr6M1JF3!lC*<m
z3ww2qo~^nP*N~9_!PTZJTIcJeqhy6dtpkr;{XOx@=TEh2q)iS#%~`!Jo0IqS39$9C
zsxAiw$L8l|WW6_-Cy)n?+fbQ?_5Wz^%fq2;-@l7QmZU_fP%5&-BN1YhP-Kh9GKMG0
zkR{odnW&V#Y&~ClRQ5Id+Cyc{K9ns&7;Bcvm|@=Y_D#?4+wuPO9>4cE-s3%f|1r#5
zb6@v$U)On_pXIFE_OQm;tn0-GVR2SVQ}D_7o{#Vkx<h+nxDCP*=nkecNPYekpyZ$2
zKN4lw9sLCyA2e+UOL{vbJ*6SHt>Vy8>K$^JF<{vvsJB{eNd}E@sAX~J`P1dC0ROsy
zGiTp+#={_9)XMo-Yah|Y-o<r_ARel4!bi76(j}8aAp0ak@5;E`KuxO1gF@qCx^;^(
zkDgg;S60ogtA`#X-<<00tGqzDRsU5MLLoxnZcA(3^@Xax=VSnY_vu+YTS4&3e>J4V
z(Dp`ESKGF2N?qOH3*<nbGd_qx+8glpKC|Vdp^TTZ&3KRhq%fUrm;4>n`o;8-+>wDH
ziKiU6bVA--A?InEUOU*I-X4E!n)%U36H-0lh7G2dEY>d{RpCF|9mpb>VfED4Bx{AM
zPG&oZ6||0wmuIj0lp)>@(>&p(ZptD{B{9z9q6|i0&{1P*IMtpT&?s&Fz3tdF_fvL9
zlv(hk6Q>rk)Bn}DM$Gs7gJ<^g#cjS{8Q^+^?q~eaL$7EH2+cT@!y&f438LP-*DH9@
z8r6n$R!S=W?N#)-IREh5!llGizCB&4zBAA{sUvb9+C%4yOp&9{8)tA9s9(9R0EbXJ
znvP!h>Cv@?=loTtrh?3SP_emg;D+$Q!7|1(*`qsp`CBD0>MpfC_6>Ht+g|vhnmOa)
zg{KQ2`<v#P?c~)gD<Khb5owv$u+Dsbd?p>H!gKFej~$-k=6DQA<r=<%G?XIyz7g|d
z|J!xQ8a?rE=}4j(t&JQzZ?*ULFAxo4k|RV@Q=6T*mG+@G<j?0Q>IJHg+<q1MLUz1L
zbFGCb)z06=V4INXo#K*ubJUJKm$`-Ho^tT2_-><k*>g4I`52>T^!|S0m|-@QJ}fu%
zE@0ni`P7?PFc>JXDfC7nc~vWG{OhQ~7FE?9>H<1_Kyi^pEN;HXM*3$P-M_#>diczg
zjj<~krEdJNbI5r`MU%za#yF5fI2Ok&XMG?)xkkFg$N^Uz=Klg)n&)q9%WJl3rS0Hs
zTzOS+M>*#-sPwq!Q)TLa68J_Myi;SiPOv>VAWQ+(V<^{{2C0&Z@%j#<dz382#`B~b
zz6UjD+cU_dZ<z)A|LB!I^2P5=aTsrAtEZx=9VU}BolkeH5z|!bnvQ6Wq=zt+Nkf&{
z-1nMGC*`pnjN%ZP1ny`0LYq<0v{jhwb&6%Y)9;c==%>i<zdkYc>QbF6>07nWP=i|H
zu0}~wrEV=0Izj4u&QMU$<q=04ks6u#iHxAv<qrjq8zk5h1Qo>g+_>Z=MDHuiGD+#a
zQPX}~zS^)z<Y~wA^x>XffM^9SFTP&r>{;X0f(1CYPsom6;Jj{$xT<S90igY?x#swU
zl8(J=PX8*-jp7)x2bf1U40u{S-1!nUgC(-9JbkzNjmtD+A7nA8*<l#e3%;>WYZQ&!
zy4u<mJpT$JP(mkeWLF5WW99%U&X30<Lh;T@81NL|vrKZIp->7<7(}|hofj!J_Se^s
zUm3b>S8T%Y)@w;yaM&X0Se&}G)$J-nZfuO{Qc9Ylp3Xqu3zMpe$Fp{?*o^beh`7I8
zyfd3&&~yw+T9JB5Xy0IX^4X(ml~fZLBC}c7S8vZhXmnW>WiNp<&mKurmqpNv5Bs=w
zb?zw~ySFyp6kct9>m#8^+%B-%-@%(%{djMqjv2${+K&Bl_U=>X*~GP2;)D`aeLls$
z<V{7JouyvH`tsPSxo2u@xB0kRM10nuzTLY<0*UOV6ou6=(RCtRg*@G|lSvP1mG1&~
zdd)~lFvXMavKOAIWJ;Zvr_ThrP=sJ9UFZeAHIJnJu(NySNW7xW*wKAH`^t;qT-Bwb
zaH;U4GiL@JtnPLgG(nnPH{H}TeN?-1!fKVw)uWd(SLlv#;ma{e;=ZgQRP~b#5_cjE
z&V4N-qz_wo_^?@iVzThLRlHVxm^YCEy8w;ufj$H6gr(9+8pCJJM8!cexy=42srIP}
z=p6^Z%VQP@zTNG&Xzz!}&j5o(--Tp0(*&ro@w9^I7@4v+_`1;0(bT|me5;Otj|W3~
z{yF^fH9m^@lUQlH7=+N;OnJ%mP8D|M%D4FSUc4;IkH710JoX-r=fbktU;j$4)8^E@
z&3DHUBCwpyvOZ66)G32bZ2W7iX<nVE1zY)ZD6tN*3NabKv14GJI)QS-ag>#3dE$Y*
zh&L_KWY;goC^vott9G_F?1RMQN$<D4<ZYLzRLJ~BsmmW$^9K?8<9J1sC|TrQnOMm{
zC$DM=F6L=0IJ|$g+bnhc&5qlH(yF+b_+ZWZ%@}vG>jw8_uSsg;{t5aE-)|PFB}O4$
zRFxm+R*R(K*qc7pm7Ac1KO6z(Y&wOE4Q<7!)Tg)NY^;}MO-s(O!#Vtgff{qARl6><
zIwC_7X-_nmxUrm?W`;Ye{R0HW66G}jcdE@FI`dIVZ~4<kmsj$Tb|jTOxWlV}_M(N{
ztWW_*#31?XRPubo#^Xy9ct>YkhErrO<Jkc`?FU|-Ru`l9*>fmYWnwuYRc>`Sb4egL
zrP34p!>%MxP$Oro`+*6!uc~Ac>1{r!ddQB`=W$rVRdSm)M;MBpaJr^gP5rbq1Sy{x
zb~^K?%&c}J5W0|Y(&vEJp|8Yoq(^y^H-8jY9wW|hj|Eq6OC?<RdmIWPvdp(A82b3c
zLq{00F+guDoS6ePzY}<7-=M~QU0-vW5r4(w@J60FItx-hdtU~wCw{EV{zC3KN|%hU
znb4L{v@<v&)T{#N=-0ju#g6qVIpN~dy3zad76cCHB~D{gTDn7ljooXtKBbN4VelbJ
zfAe+(Ub&i}tgM>=l(?XTdVz(3)A$PZf>9OCC{G}hwdyue;l_5o8dp8E&RJVNhAZM_
zV|%%G@)s=HRPddeg&IS2i>R<IEY5(0AGZCnHFGAn%n~3p+{>~|OWI?zNm<c}ywA0|
z!Y7+f3x9r7jQE$2SZbPFYPQugtByMrTozg<X|EIrg+{WC6NOsW&V^{#H^d}A8<3gK
z=Q7jMFg`D5n`lsC+c7QiO4AKnlqHeKm39*5m%KyjNN&{Bdt*+;X`4$iC--VK7T5Uj
zy-kCHrB=~GpteV<pDpv{K4*J*e|wk1e4=)NnR03I!kMC7qOk^l334d$Uq^&EXk>$A
zVm<DgNF5Vo6QEmWp+LSnf*3YmlP*e=`(efDXHNBtX%*iSV>8x^b=qjsO>bK%?7$>+
zug}vq9Ha{B7S}QxUut>ePneVuq}XbbY?XUx<V4zCBuuR%8OTf;>fxuHO2Dq_LxnYS
zsH)}iS+d9@*&1s%OppbB$DRc`92@UMZo>riBC$1>qQkroe@ZVE+r_gY4QyhyuEMib
z@^;20!gutBjpLckE(xp~>E7}Q6iNilG>M)reS}T=So_KCCZ_2}p6{~Ni#o;So3pRI
zwqav5WiBx>(Uj=QX@vOnWEO*<f>4B-;!q@A@b3dEWPj(esI{Vef-dKpmCp0b-FV-t
zu6|s(_C=L>t;RyyK?c*rqu47crkz}QyPF%zb9H&kBi$v*<ax2!TT$7wIHFd8H7}Iz
z#Tk};)bWO<968VL&-jD$oXFcJth708V%&W$DMBve#)A^6n#0aHD~O6w?nKgiv7*ay
zTn;^J`?}U^+dvu;z~$a`F`5UPq%vN(`jYEGYXnsgr=P=>vIM@7V8r&KYmzxT;)Udj
z_BgWE?-e>aCloY7F3L2<(e5VfQ_}e)7^z5qj;cf|(uN33K%ASdZrax{S*@7z;U|=!
zgU|%VXSi)ZN-_)Do(|m2&^-WhFiDgmpS20IE?)rDf^Xpy6jjQN;YUL^%#Z5{N~%9r
z4xatVRPc2I$st%oK4#}@?GATx<tTy!T^i^f*84=?zh<%Cc#Uz+)6cX`?TyU-Ir0{J
z%L>>K|5f3*^*R*SK@KSp^(f@GH^6C%KrL1?`-#X(bygY%7mK^lMdu+xq8|hfHVf)2
zaIhzEq&JCWKq@03T$M-9wYTs#7=eHc`Vp6ka1idxL)m{2cCjwk05B{1h<$<i2RuWF
zWxvpP-_x9*pxfvmgn|8L_d-ZFfN9c)JG@Cm)7J&7$1vbPt{J{3b=`tf=rvU0ZAe}<
zMKPJt%ZC&gRg*3ZCkLH&f-&ssNXKLjLwCA4a61w$xcqM_>+0kCr)x4e91y^DEmQ@;
zJ2Md6MpywI?n<mP;4251;@k5F;mf-SX~l2BpJN1?79acpauROlKP(&e$bi$H41gQl
z#-|@`YHz&7mH0=!Smt1RC<2&FKeL8XRRsA&6-(OI&1KyKb4mYF01If`7>G`v#ReZ)
zLq-+%ocXl^{*ON=Ny$H)>s2Rn|38Y#pU@hDq&1|dnEehr?U^)$FSOp&_V@2ll@fm#
z%;<UJv0wbEUr;1&Kdjtb>fyt`!G_y+{BMR)=a;Y_C{5#XHSrVHWq|RMZbD^UPW7Yj
z#qAZWw%G<OGWY(`oh31Dub9aw_>RtC6x{?nfN*nD$<>y8=oN<jLl_?)+RtTitZ_V7
z;_NT|t@<%@6w{OwDY!xCN~#VUfPHE3%|o5;<>}mKAJKULLD9cgxzfGuA@ADRFO;8n
z(lHQtn?uRm9G=qypVm#*dW<BL_gh(8PE`ak|5Owzp_o5K?)5vc{%O}?VD+dY@?XwD
zomuryPcK(w7w^pe#SkoD;A}bY$Dfy6cP1cEY6kx>o<Z=aj`4PyOd3IHdV`EPdTn|d
z)jhq4McHX+U_GKxVpHyoa+CR({*<E?#jnOcX52F#0*t;X_WFcBGldO-&l5@7nfDAR
zopEnK@o8AEu3<E*fxJ*a9LOPA5oEJBLH0yao8ClW<?@wvud5ge2tF4pzcqCtsCwr{
z@teNhoSKM&A6GD#dmdjH`v`(Y>(5DY$=XCO1?|M{`9=Zce#FoPH!OT1UK!ut-W_4d
z15F=^wFW#LcLBBOwQ7qnlwT7!aG^~5;>hjZ-Ij}(PjR%bx+StA4h9N+AWijLF7mOt
zQvq^Oyqp<kI?hU$pg3hq>k!<#su(rm1D+PW0OnJiE32zR3pO?iHp~z~EAlQ6gn^Kv
zeSE=3`0LWk7N12Qjj~Nh-VnAu7fM3(e-#!WPf)uHES>V4<}``l86*T)lDdP{spDJs
zdh3A$$93P}oM}m605bpMZ%Qrz_v)9-W1+Xjf6`mfi1K*4Jec|S;dRjPtPp0sj6;9_
ztX@P~S$;aD_P38>(*M7HDa6|@Z?|pRV~TVog3<qfxvc-Wl>ZkO+keAd>D~pLTzPxY
z_ss#N))CA%12@D<hyCUzP_!}%lCX`hH)A%w0ymx)(z`DzZJ_gOu?It4doYDb6Uj_v
z1lXz&DFzKdgYUgjXX;qbltQx77s9L1hr}Lp0Fed~7m?n#E(gJZ=g=R2KJ)@X-~h;f
zIgeaiVbxpw4&J^K(a=0wLZiDH1sREbQ{<^R(4IMh1JHdF(8CSxmY*4Ru0%Qr*xVb`
zs`6d{Enr22mRzF(&?P(ak#6ZFq(0jU$VR&W;J;@N+oc;~wqm*l4F7q{EYf4Ue2NpI
zR2&35Y2NvzCXw7yu*10&CYGphP_ZXJ`o^CDX}pMu5d}>5JO|VcB8~gfw6m_-Zym<I
z8xSLOd{KD&9bWMbXFBZocKaOgfqj(iK&hkT23J)Mh>?igltaU?j5o+BT?EIl9<_Fe
zFB<1lC_`+(Bu`3zGN*R~X;cN!tk093IV1m)wll6HWwAkLWG6(0OI?PfgM$IB2(g*F
zNnd=gnb$|L!uE?;?V`$Az_Xp8WwJ#V6*0o4gn@6gP#f^IM>!7=PkjV!^c<jYfo~9K
zU)S;Y(9^rO;e^)Zw?-(XAol&(Kqw_IwV~X=B-Y;BPi$+8RKoA-Mhp~SFLn2Ythh=V
z*bcqa30-O%%@2bFmjZnFWOP3elfO=gCemJ^O1?k?c*7;ZF&C?BFvmQLD(4q&4vS2i
z%0Hr9(u61u&UXLN5>Yu%fAH=gRp@E?&5TOu4_i2LPPi^C_{opyk4N~Jt|&yGuao1w
z`;Noq{XA5xUD_UdxL{m{07xg2$^d|jnY(GVs-#-@1z_j+=5|I&VrV`6R@{_S!(fUx
zyg#~mG(@Vs)jpUmR@8m1BOu?82inS8DUWkL%!{~5YsS?rW!t;MoR2yIy&<z3%75;o
z=MV?DgA=+q$ht!<J;8Qx)CUx_#gkan^^|X^F7NvHE3HBic@&)pw3zbysu=o%i2lYq
z3*@+!B#1og>*at!Gde-N{M_AZF_cc@8x?%3jwgolK~Wb$jE3gadf!nf=_ocItnbSQ
zyXAz2>gVFzzXp7-2JP@8yKh`uLEC@Od7A_rp+xjfFdqzLvFP_w!gX|X<!svR(~`xj
z<{**|4IA&RO6%A7n?e5Bk7>hhh3Bl6t1Al0&zNx;s!|zsB*b4lc^$!nOGQzSUgwB<
zCGp>%eV!FGj$I+K<5!hUdk_U$No-5Y0uU8)y-s_#+whM9njHck%^nb_v#q`_`TnC2
z=UvU(J0+=wv3=mpsIS*P29@Sek|8fc$<M_CXWYLax{gejNn?SdTZhh<9YvK;+|?sa
zwpe6w{?LY)7LMpumCthz1UDn@Gw6dzCt_-xR!^O!Z!nM+(!cdiq{V~VnXZ}qcSXq9
zZFtD$f+LR)J6$%7*823O69Mp}qE@^!`}w>V{$6$G8Trbsn~gN~{65FrX)UGkMoA|V
zfM`t0ZO+3TOgu;G{WzI#eTocU9j2Au^dP~$)!+yKNC+`&INXdLQjF|V+b;d0Qa7Ch
zE2#|=UPwkE{xUO%9zl+<mpK(84%}|Qd-~HEpAsS+$TikYFSa53#d}Gm-;0t+({V<$
zD$YfF@RD=XE2WfF{F>koSf;Y*KU9L8*32I*b*T_{k7yUCWX5#cQS0gX4qKNtefY`K
zo^WQZ5Z36cR<T>^G8h2r$+t}hf@3~;M~&a<v&1w>R%TpAtI}Xm2jnoN%;F*+E_GRl
z8-1-e7Sg{`Pl!TOC&MT6*`;Oq(w@u-(z>pjcXBBZHWXK%wX_1x{BN>g(s??ope!9e
z<agf<Zg*g1jB+s4Y^>y_Fq1VRKJ<H=C*`TX*0hp^O>^|-6i)Ipr0Q6ni8mB%%HcbO
zzN(JS!dP@DOkvdvoIx7`*759mc0JKR&GI2-9*9fdzZXhmhFspY^vUPYGmDjc6TXH#
z(Sct+Ag?&hFZt8Hscz1;HRl1aAzIQyB?}@h0+V~QaI=pm^KsuXc0~jpH{yFPw7N6+
zGj$~3go7%;J8Yna+FVnbu9qr@FE|pC^cM_IR4?W@$Yb%n|I0=XIby^8p}(qN%em2D
zIH3}PPWNW$vg2kM-CAWQE`i20`Gt#B5ydz0^$4~5zy$Z65PM*0d|Q2rJF4RbCS&zs
zb@Cpnmew^W8o8|%bK#_LE#}CH^2%(}@pBfnJ4%{QS8TF?6d#5F;RXv#It{lTquVMT
zy=XVITeDcnIld=%%UNf^+wQAR=`XjR$csslBm0Fn4Jz8_>7Ijg19f@>+7>{QmG)3w
zwvmQM|CW38G+x$_K311w{3nYUa`$yf2!ldc+4iQ;l<<iSWtp|q=DcG0slBP?h0`EM
zI<)(hovYP>OuX{Rnxkz`SW)YO_KAI97oFTZYGdNW84}Wvml+-B)O1Q$@$;$1YZUTZ
zjpe^+zf#2lrqTlahn}0ee58hwo<QM~mkirxdUDU|x~tTsi@F{jT+`j3Ua!7WGO{sj
z=1J<c)s<7po|KNA9UCJ7)qadku~%cl7c0K3=M+O@9|8kiIN8|ajy*hEXwy_jl;#&5
z3g^`Z_3WLJ2r(vx&zIvzmkKvp-|!`?vnb2rrw9;U7vo{(NzSGTr};Ggc?OTa?cPh9
z4c+unczQZ1ogF$DSW_)~^ux|6^UXcfGtzJQaL8eXdi@$vec5o*aYLP_6*F?&jNdu8
zij>iu!Wg6e9i;5?e34JMK6V!)?dV&>BHIV80|WR1hNHnkU&ei5I4ui~K2(Bw*PSLc
zeI<6<qab&i?^p5U8>v&g><Y_ImkdP$!cwqD;3N*z=&H@w=uB_L_`D;F#9r#MSRMl=
z9}!F+?#duR4pJd@Lr(U}wp`qb$VBhK0NzY|ca8*^<8&(qM<6TtS4_Q*9YA`DmStQ$
zp*iu{GD5Q!olHS6Y-f4kms_uCDenYM-_HbQ9dXe~9F|3BtnF-c#sU^#iV=`WO!Yj6
zTH(i;j>W@fR11NX-04MNAvq#p0QH7jDDW0aK$G2VXG`K|wVVv(#W4phJ-4wd4(f_h
zDb#vQ$5^Fl!JlGua6>YClfYMCZ5IX(VS+2_$1DySb00UIxC!9_gbOVZu9#@aK`0;%
z0@Wox`h?3N-(Y<%>8&Ij&m4($XL8x3C<`Ut$J{uRaO>$=Ldp~4$9hKI5-+}K&m0P@
zf|GBwNRE2@wrOq_rBTFjQkYlqz%@N`qmR^)cVXAk$d5D+g~mcF*)goXcDA;K-nrUQ
zY|P`=6~`mc=o2lmmGf%m-Ltn~w<Cv#`S1($wJ|zfn~rR82xA9=K>6tRUkvF<hwZAC
zs}44s6&7EcU90z1rB*HpXgTAd7#;LFworKliFLYOYX;?PlY}i!&G1@B1Is@pE~R-m
zyp~%-s<xhyt@BtZi4XY;z1-tpm5r@`@f||)%lJcHn`Ne7cMpliQBc^KWOUvQW@t>T
zoW7vg?1~3!)f~{vS_v<~B<1}q;G+nZ*Ia~2m1OUBb!iG4{q+&Eh&W0M-Gmetca|y*
zX?XY_hJY|MRtkHqG+pWkqfLc6{Z*A;UqlfZj-Iyyxjomp5g-1znz(cdoY79YlNm>9
zsN>>;5c9>`)@WJrrp*!~`RaeM%li=VRD5|PuyeO!z@I-oLTKTU5w%;x{4*u-kns1V
zYkdCg4RGuuHJ*#Ir&xde@768)pJD!+CymNCSZ~Bcv)Jhd=7Za=sAyfvxoGb9zW{U@
B0L%aY

literal 0
HcmV?d00001

diff --git a/docs/source/assets/kernel/value.png b/docs/source/assets/kernel/value.png
new file mode 100644
index 0000000000000000000000000000000000000000..f585c77b2e1449825a3c704cce6b102f567696a8
GIT binary patch
literal 121414
zcmeFZWmr^e+Xf7Zfha1epkPpffOL0B=g=V_-Q6Ha4HAlUcMOemij*{nLx*$^-3)y9
zsC&Qe=h^Rf9N(|+&&zQTGi%np*1F@m&g(qS8>pO&*sU8xH_*_~Zi&AXRzO3$qJ)Nq
zA#wdGcqgUk)C~>oMv;k-kes-X&?7lpD?<}=12nXk(8%a(N{VW@Z#LcN>+d{z{0*~S
z+zLIzxBn8=%WK4sG4DuR_fFAMAy6q!d*ajSQ+R`_idtmICs0oV56hA8wY=PLt51&V
z`Lzc=wHL=17iYV@J?>VFPKF4c!z8BLXw+}%BeR-2(O$o_eLaKDn?F+JOyG(3`OS5z
z&)6foS4I_NR8_C@8z@|8O*vgQmYOajdbhuMaiJoAH>mUq8ow?T<&95Q?o*M9wbOVj
zMO{WKy<JXB*Zm=MTO{H>V;O%0X{gN(#_bTBS$6#wbG#pBG$Y{XkK~Tr)fmz6o__D`
z#3xI)p=zKSm+;lY>|EyB>i+r_FD-cSieXL|-a^UIdduc4*#)g(+7pW8<oqAd@?fkc
zl>}PpCHix|$~iAd(HeKg08CD*XQiL5m6!&2YU88s=f4u4&F>c{z-mXfbBK`KW9jfQ
zw>%@mI&NqRs$TgdNWme#-ly2oF(V>VN;OWe7j6(c6Qn85NM{s&T$)?AQ%2>-BhsSS
z(kBTGj-A)E*~=I2m3enPX=jFdozbN#&GQ4<XCvyOM!k*uqtgW{-HP2k;?SE)`H6gx
z#+P4WgJK`%<w#TQ?8omw6^<nB-0`Y2R&;N8$-BP!l9WiC8WaDP<ZEuVZV->u!Zkl>
z#_Q<lZ>~HMU_K%C=GDA|y^j9%E-evP`kVM?TF-GQv*(N1muYcJoN>JO?W5`o>Yt+N
zJ2}q2tTPCm77285>5N+OCo$MVJQ~k7hh9cwp1jQemfL_yu#e9I9W8_y!&VL9Yq8FW
z{`?U-(JQoDDOfkX?-vWM*<M9_CO-0hbSG%&!L`N9=r7Ul^Is+K$$W)Rg(mWfX#~5I
z|H=p!womg9D&tF(O={NYn&`t#7F%e0Z#=gy-Sj?uh3E5#Ti9>;{nd<X_l2U~^R)PL
zKjx#txDha$f<bjZoZr^?S=m);pP*F692}Uxr?(8hQL3aIq5bVuzpJTi{gn5-pS`#|
z<SqS*IOdk!k5}wCN!KWw)vQSpZd>_E&QKrSnYf<pSM<4ZhUDl9cQBf7@Qb@1EjM(Z
zYLekW#lr90FMaS#SmHf(2DXU+=ll4Vw|8)k1DkZ0?-8`!m<z>h5!S_sdERZ??J9MD
z@ln>@!O&{xO6yJC2EANWi+u8-TV2g<dLK)$O0JCHtb3YYKHxrJtsp%m320`p$zmte
zd_;#G9i-u-5jGx@*IqtbW9?C;%S}?`UD@LHLulLG9lHUo5!dUw*Hy2}(_t6<k8t;{
zA7FS1atjEmN>e@len$q^@=nrSQD~>Iei|`D1Qi(_h2;~((~3JqcTb_|WccCFI^N=m
zOOO{li40$Txg>sD+^XYsDf(z6cl>jTBC_c4u=n)gZXKl0V0c(Jq8X!~WSeI^NGVf5
z$WJNcD&fM|Sd!v|CSMk0C8hIbdCH~HsL^ak_)5@3bVd?IT2j@g-8E@r#<t+44O156
z60H?=%au}Mm0zUZjf|#=rb(7-NaxNMR4Gx!gmlQM6)B|KiW~6b7n!E5W>YKms7&U$
z%9MwN_TO63ta;Pm-w@`RG}ZT+LFzuYD0O<EP0T>YJq1OGozgpHjSS^NM~Q*}=nLr>
zZbMEx++C_q@y8}{z0AGL17!|_t67sVtZ9XrhypqVdZi+j#)5P4x0y59AF@hB(#7XK
zP>2o++EpwH_VC0lvFMmGlueXlsQamll_SiW#|Xw|#w5!QO?BWTLj!Qk{tv6`-#M~(
z6{2(33gTh4(uZY;6cVjIMcMpH>CNCNyC|Y}%@RhDrDJMBCu1kxC$>llqzqF2vf^8+
zxA3d`SHo_K`<pkDHCy;w_zQ`;iZX>n5y6Qj*(iuUg(wC)-E+DZ784WK5|htj!g&7$
zgl!>qF?J}9mpMJoCDtGo5<AJ*9{Z_hu-hqKr}7PL!gGb^owS8&U!r=W^tvXav<6EC
zwX!9%RHPgAMM`E>7u4bE`|3;uLis8hY-)|A45iS@QN2OkUHDK3>!f4y#$_%@?W0<_
zBdw#=I_>(EgN1|22RYd1*fbB~9%^%`a|S&)WS??K<La?jU;Dne*E-s&wy3$py{tDM
z{-d;O0Xx14TffLi$G)}Zq^i)GXXRv|XOy;mBWGL(p^Pv=uplloNf`wiuE!57>HcUs
z%-LJse(gfxd_F%qm$}(`2-|U;wOFRzo7zMM>-UN;JeaN9DcS#U%zPAVJz3RNRnQP<
z^~NgIDmxjb6QL73t?zaD;`N34MJihM8*=ml?~%_xKGS+TIdgKO7savR&Eu3k&*6!|
zd2m;g;Eb&0i7>%8M%<+P%yw(t0iS}6#0^>eeB`v3hx)~Ivvk#T8++S&u;aCOPfe<{
zj=pV$UUzNr35*Lo{J{Gm{t2mYtVl=tSenUzU3y-+n4Y_SkiM&)g<f>4Hgqr;x1FVZ
zZZ3W9u>E%5cKjy~nG4PA>MM4&OR>Eg%$wibdyZFjWoc7rf6%i>y^#(S&k;ZBzP+Hb
z9f0cst;Gqz&}_nP!b+{Qj<=SZe@solBf0aET88N5!ON*Q?wHv~h43APZv1GdPWII-
z<&4k_zpRHZPoqdaY9|YPefgFBv3z8P_!yG|6QO~lIBn*YZ%NN9^EcFlN?(nhn^U=I
zJKAAiwg@fk?3w5L>So7d*Ja1|JXqV={6puHj<mdYv;;*OXV3fiHkZg_$bHByB=@5W
z)yHasKjqtUlyTd*jNfR+Kc+cRxth;i6;wuT0k_JtP$4YxtfVlMmTZtY?>uWDz2uog
zD@Xkve?o1op<-67J8YJeYnUT{yc%JaTD)cZ!9reqU}@iHKbmdCf@*ThLjPM^uXJzx
zfq9!nlsRH*wtV4LiG+=|&Gb;L261Ic1-_+QrAO0&BzY(~KG#dGc`jE8A&Gb=qK%2V
zT{|_2(GA<=ofECG+Mx}iQQ8R){jkHNnj48hX+g4lP_G(|`*NeI1`3c;!b&!$%|VZW
zQ%+CA3(a~RJFkIr`fFs@T(SL0&5Im!3kwGdN0qRKJI$`$?%}aV7H`fha^}@^x*B>6
z+_e_=oIj?=Y_uo6#XII0?2(VC+TPZ>wdemuG{?qfPEJumQPxh=ZWx+^Ye9>kz166G
z;rb)vykkevG@CV%BC(ebxgj+bJELr}*7d2w=yZ&rjE9}j4WWFxe$czyv_!AH)o5M#
zcF<-qt-bYQ^aNdERd>ULee1Er93(sMRwD;;sDEW*)VXIteb-@ljaG}PMrL}e#^xYv
zvj?lvxw3E~$)Prn3wDY~tafrm+}OmPkZe3W@kAg8uF;U{krMDKcq;9vp4z}S7aNBU
z*$)#|_c!WK$Ywj<@(N79<14*5nn=EgI4PTto%*zje(L@4?fKi`<UW_F3&9h;J%grX
zYA=MV!NKa0&Xkwd!=Q(&UbJq=jh>TABCFeVcJ9Nh4_MERJlakNK76@NM2hYC+H3s?
zZ+B&M`AgqXU#3oXW10J{^OG%&wn{67hG4#_8;_RXd<;i(wnIZReSo&NhW36@YVZ5p
zC299-!MI1V+yhQw@0ifsG|&SzG5N2<oYKZH9x_{@wT-2{v_j)zr)qnB!8Mf^2BkYG
z=y$!l@gU{YQA63s>#H#7h1TT7cmBKkRwp`;va5K;@on7z!3G(qiW^ExqfvvuucKk0
z6QNxJf1!hKUUcHW{}x53Lc9F){Yz+Q0VZe|zds`bexttLgKyM1zkXj1_eZ-1Ug3am
z=afr-efo-0%H_ZQ#*hHV&;%5P#KpmHMSWWX14}z&EBkK{p$PB>ru9oT&;tbIsBd&}
z1+p#B{)mZ^s=cbT6sNwG1)c6|D?I}`XA5goJ80a_oZwFj1AE;^&KBmDcAU;UPkw%a
z6a0;OoBqk8pC7R|<9VVgE%!*s%GTf!E8Ppa7f*O^JbLtq+xE2~r-HEP@6*96o+rlk
z_ST&A^iEDrbWTikR<=g;3>+LB^e-6c85wE8Cur?lEbVojX)W!Z{%Yi}c7zS=^leS7
z?M<vKAEDaS)w6Q2=Xvr3b)mn%e%+^mv&sKl$<prkV}S>xM;)POpnF08ciZ4pZq&P+
zawg6O=4!$w7NF0-HFz0baIka#JmJ3%{m&);I92tJQ<+$q|2*}NL;ray#LmE0$jSm-
z)SmZ$2JH9Ae;)jOA~!wi+5hN^UmgAPUC`6KH@NBl9yQ(@#99<>U?3lu2+M-<3Jpv%
z)Ym020nn&^{YL$b=t@}_z(YghM-vwoP;y3JpS+fU2N^otetYkZov?t&-4{y<m(8$a
zUTnoEiwJ0`@Lzcvh<;g5RHQqs^T}2I?i&<RIXO9Ro?MlR5s{ZdPF;8$4k?-#))@G&
z#GX&~8Lh)KvcFZD8b%LhTe3W>GGxZM!;gmkFF(AAuEf2UD;Nkw`<H{L6Fy_Tms=41
z_mj~vg76-ts*~aAUVrok?b5&ez=R<F?W(BvQeL9tz(fS^`v2>Rm#%}$lKkIgQ9bZ~
zo&4wI`oAgldnx~aT0pQn_m~WW>yZcJ%2ar1fd!RQW2_^u-Wn%iqSn3l?3F3!xm(0+
zmI=u1c2dtw<l*$RdWE^#+0{VdpK&AQCdSL0s3wsgcmBG%tGCm%$1sWemcZ#sc5=S>
zM+(+H?%fg7yaL0MfTOzIUb9BzLHwj$9}lZRCtjl_!#lc~oQ?8f?V$>b@l`hMRt4l+
zue^ea&gb>(rCs@AVI;AhoJ57633c3cE2muZLLPDrWl6^y(yEsggR{m%*=MhN$<kK+
z@NxW%e(n1j<`lR-tLb+tuFbk#b3c6LoMHWr@#A=_29wKIZx-u^@eJ){UM@gj81^Mu
zGzZ)cJD=)dn|80*s2Cq(9XHpfuip#j*>1r-INfYKZS>2_FAcijs_qxWf={AH7B7%`
z9yJl@xNo-*_3@rQKa>*mz5lbP{2%cPmOs`c554;{uzBAg95GH0SqFF6Ma6x#)m*Vw
zSUVPY_o;cZ$3e&QY0uLQX-Jk=c25jL^N^a+!$G6E4{{-9%x5d$8M`QZjd~N*Hagr7
zMvW@YcDhuE6{E3Kv>YI(Ki=Lp|MA&-sNU_cW&%Dvc7REl@apZ=TRS>>!4EigtSsI}
z2H{p+oS#~NUhcy1I^C!os&O!robGyi?REv7R?X|#we-27#`B$y>7`^Z-Z36bc%aX(
z!M;ic9synJIX3>UN07jm!^yRpZh#6mQN)|;Njxtzg?I9vZ3%I0eZIUAsiF-HBv5;O
zxAtiEU4cfWrMylquIu-UVI8l1xb{2RiWr?lU(?*61!6vvZ)D^4gVO1z5{JRzTQJp-
zX-Uq@kn?914RO?kkyKoB!A$EuSadl>%{jJ>CrHJ}Do=VH&*KH^In@_SmlU&Q$|s$s
zCslI%3mgy=qlmg)87$~Vf03yn<gf^IS8CpK-ei}>`{S0i8)qf&<!IkYei8VYcp|94
z6A8cga`#ulNb$r_dYm(#tDY^BXg*S`8+&;aOhb#k<$;EaF8anYgJ><*pn+mEnCSN{
zbRCcVNEWl9?mhPWbNM2%nmjY2HQ6vY3=%DeTuu*@3M+%re(GkF7fB~+gj-{HogZiG
zRP23|TnfFP^Y|8pRLm<d|3XQJUQId-mzNKzcLjN9I-TsTcs61^FMcyEt92%}l+s?S
z<2-ZaV!7I1MAbAm)7A<9P*p#KWxaYSY4^Nz*yWi(?#z9d=|@W01A?(oYGw#S{*AiT
zp<Gjq6;G{?tQKQ2-QOcJ)+cK#orcfAgtd}T@M|%JU_u}vt9ABLuHUr|m9$S6<E_>Q
zW%>5s;id0udm$Z8kC&1y_Teg(v=t*?;@GT`)F7D1!zs5ud-|M*F7qE^Z$M&weZAZ-
z*H}3e*z<W@fP<R;H5cEcBj1jNkx;4(7GFD&AN0tbgJ3mV_)Z5DPl4!QCCjrPQEoLo
zMXBtu<#nM5N0Nla5)XzfOhrZ_y59frI$6o~fE&F?V7E!T&>A=sy3O;tcB4W=70+W;
z^716{$-}{E>PbQ(Q4ygWCHNkj>S4a8k^QvkgklI~!9L9F<cs9#H=c~f8Dgutl9kO*
z2p1L{7<{GV=1<gMenoTP%B`Oh7yK~3bE)u#j8(u6AF0=w&1{SoBbEAMTZ7c|+ErLy
znt?8IYbCtUk22Yv@osXGO-H4%N3@(Y{lP9}qP+rMx<##xvPWYc_4BIE_LkB@ty+qT
z85|xz4a;hBT*A=pw0CsWRl+KIiF?74=f@t#I*3gAl~z0MO&p^uD&z@n!G+W%KVUq#
zr~MrMbHmwr5t7|`R$)8`RPM<AVdy^d9L|d?Y`m5f+G|x9mQCbFjB62NGbEB>6`MG@
z)p?CzImt_%c-l2f%_n{A9fZhGRxE{I3^>|j*DUSbzp#OR^^we?9J0e45ticS<t;En
z;ABA^w8ATYgcnz@k$UV6Tw#k4dE&X-!`9cb%4-X)mAas-UB@Y?o$)6&jA{>~m8#9q
zDeIj`GB!UfJk!Lto=>W9(0L%zr76hLRM03^*vX+zQW(bTpp&(!RgmwNS+nN8oEktd
zL8uHx8ZWk!dOc5Yo`qJQ22ID)rN*g6W1}J<I!31!zg0oz<G+&fEy2(A%v2}s)!}a2
zLwPDi#<6)x!SvMj>mv;61hS8OO~^AQj^08YW|>;al(ZIJ>5)E8Jpl`y{G`0kUa;J#
zFDW+0+I(g=Cp|QXR6*|~TE|m7-|KiGnlh?yB^amS;$*lH=k$6zKJOrOq8Xbjf9$9@
zX<~NcqbzTS^qBwAR;<jAwo3W#JvF05{D;Xnau60ZZKp{GXe&q%PGy$tNqb9LPLucr
zcA%$*?-lK9wVF3UGDO#^B|fJO4b~LS(yRA{&}z=PO8n@JW1&$u>R}n~&QH%+KiZyS
zLQ13rWNl>h@mNjO%hBXBs_+rd)zf@-7KC^a&K1CXVdpaQ<h_H#YX{?2VOE7d+KLLd
zi4NC>=}SU!^5NvVbEjmfaVh#&e@<R+fNCus5e7B?N~&nyf&!6-kG<1PVRGdwBzz&^
z)+b`_HMy?)U$E$(pKV^87Zpt@)w#vH2}D9?NDspJ&YfM$jT>ToVK~<jC0onvf`SlY
zdHgKct4RCr=d&Ojy!X@ZUkYKHTF&E{WQU$R&%uMkcu$P&rB<N49w$c|%V(46<zNys
zd$r=L2)F07#OK?~SRVO)0Xdw6cJR1ESsL0@WyQ`0(Sq$LidS1tAqMuAkCeQLP7cAC
z`MBx3H1Q;9%6$`wjMR;tom~?lrm_`;bdN>3JLuq#&*H~(>OctA5|o<aWD-`n;bq7@
z2S@rS>g5I{J=KmKYwenV*`8ZGKkA!p*(O8;4h4=rgb{yvqTb3A^>ETiT?`(`X}Dio
z!tCK^H4+uxnVK8-!lS;h@%%)vrba{Ob(Jpd7gI3py&FXz%V%d;vsY~8+T|1t-P_bs
zYr}=jxGs9M(J0O7aIS1FdmK)x$bHj}{2HxkD-L-7h9M+Q971Pusr387#1{EE7S-hR
z%;v{JuewAxMD@TyS$Dl}XHoi`;~bt?mwh*x(wwc-Q`979#sihgtpy32gx?j;k1G-L
zn9$2S5(Qaa>jAvdODquHB#;=ckbkQfH6*^Ms>X5D<$*j@h6jn~_&W&sJWi6k)fHj>
z4Ga<7nLL{$44yu}o&25(?&>PsP?I&6|BfI$!8Jh(zj9@<jWp!J9CXFmXCR~~R&~Rl
z*fMYkvxAsT+4Ot%E_^>kW7%f@N8W-&@7W}Q|I4qEpH(WQ$??jqkU6eQlMxSYboFs8
zB^Z~4%cbgcWI4M5V32NrXqDL(8L)rZMkAoErD$M264pzL$X+?x44VBWzOuJdOzYu4
zU0H{u8OV!q-Z#~rm_V2X9~Rz}l+J-Eu$|Z1ffex@aSQ2zIn2dV1|4byNd)WU$*ZFJ
zgB|gKx3_uOi4Fw4#%F2QC86V6<dUj|^k3|){5?L%-C}fq^0N=gP<^;!YLow0ALh+{
z;cJO}E{`?0-AY3?rxdO2YO)u0SZsOnsR0M(qW>-{jJBIboS`z_U-0^AktvmTAKwLc
zkUYMo@wTt2YrYZ$D_{fmMNt~Fv0tUz2-2}ze6h+a1ToIdN-b3u&xENvyYnXHhfpdx
zByDFg=r<oLN0vQjO@#@2jwmxE!@{L2>(<`O1ZEFEYxgzPuXi#amCb?KYRo+kNIu$n
zkSfT6G!T(K^lxy!A+00YiksrH24>ia4_OP9P5dx(K7Zij)Hq6EQdHT?Uk$2c+C!R_
z?cY(6U-VJZi>iY$vvjYR_Kz;QnRVNA``2ON#gD}^y+(#r1k1C8MP73H_E~G&ttsl;
z`Fp5DK9Jv64}_{LM#ag&pRVX~wYsk|EHsYKLduD1Qbq{EGhI!TA<`Yn4;v9B{fo+i
z`(=YNG#xK^ri%Slu8YqdvB*K3IG>U9N+&PG$XVnc7S_#S!u7Nz@MY=Ri_9Q{AP2`g
z&ILd)byWRSJuoxqKh5e~ZN-HV*^;$PIgVR+a;;q4B<1A}0?#$1OY$>dJw^qr>nc$F
zzXF!;=N2e#x>s<3SJfJmReqbZt?_by>Uqzgu0Bx_W4_KfCLADd*7Y_oYI%H;w;_Bx
zMJv`fUAvW`l`zj87$z=PZhGvi91Mw*jjgeTH|Uqs?Bo=f%M`$E1Td?9mC)x>Hb^~R
z)g~hdO@+x?aIH%&Witp5N()gf8*oWb_!FwMwg<zj-mhde6i+()=(H1`EM*8n!Zfqv
z+p3fzi`CDtFvuPAb_Yw3B#W7o%(%s~=t*WVy~umYWVEp`A>;|WaiK*uC8qm&<dCLS
zBbh=o*XfY$DL4nJ+c|4IXGp#uRlp~SQ%$*fef|6~`nT{>R}P=vjK`79S;dOlGJuD2
zA!6<;3myubiFlJKy`@V2IiqihzwtE{s!@MH^P2||zO+IZuf)<|oI~}UZ)=~ud`h)o
zHRos~Q-YD|aYl5RFRNkqy^FM{bL1&uFQ{Rt?v`}X)#826PR=W8;QaXNCR|hEI<-k^
zw0x`1)pU>C8c9r^a_37jT}rw+B{w3OhZt@?U@uPD=4ByBje@}KWf<k3u$_me!Db~_
z!WxT3msnnZeg6ftwW=eE=M=Hs9m|Bwa?mEyXCSlQQz(9+4Ab_-^=gXfWeEP1(^*v<
ziWAGIjGuXsZ5NT*tJupJ_mP0_YGSl@O?ou<%Ep1j)%Hz>kjA_$ea!Z3*^mGjFS13a
zCEeih3{31~PQS>sapq*L&LLDcUZObQwes17V47kwkr9L*;~RU~&TxqtGTySLqmhid
zJ5RQYh1mz;_AouG{5H9S)0j!N&(v8VAuN}Vtf?%iB9-h@c>K1f^qP%D0j^ce<`<6T
zsyze0@LCwf?Z&s9QFU2_#eNR^vC(CH8bvV;ZO)sk?;=0Sa6V%k+`mcBAOAvEaOS(B
ztn_m%ovSUh5oqvfmJpYGvXvb9^fP20S>5@NZz?5;^AB%h4I&Dm!#$=xZpHpB(rt_a
z#Ji@q*Tfp<w&{cHk&wHE5@YY^8`wW&4DTbpWxl)MQtRF)`<ckmylw9S;)_njXYMye
z<sFU@VINGX`$g4zC1ja&Yr85gYyZfJvq^E1b0tWiJ}{@|s8|X!P_BefcEo?dC0U)a
z=vj59f$qcl*m~A@>R$UK-BXujiO==Ni}9Y7pA36CLvBH8Q(?3Xi2w%=6y-^@ISalZ
z`Iw|UYbg__{q;L2#U}JaHYLTS$VG${OP@7xTdBkQQ(N=41<iAoiJ6V%3^gQDvNfd7
z#|p#GOeJOmkl*+2hnu+AoFu|b@A)4^-ySpK-j1rI-GAmCKWx9z%z2`f(}PN-9s_sk
zW|LRuom8_ha^nzAT5tP7+Vs9gvOGM%`VQ&w{Q#S1Pp&a3e0$s1%;EG#m+2?QQBsTx
zfnQVs_LY}8Lyzg=MSs3T^RB^_!(liR(!ng+8}DpRij?D6%l~Bi@%r`+VPv{?WV!^#
zGQ;vwdhZPB*^2Cj4&>u@R{N_k?wzmQ=X*!??j8{9*7$H0IQb(b4&}-8l9>U+ji=`$
z9`2-eoCOR@kqOY`0Gw)xdu_O(W3rqkFm{Ll?AcwSY5!i0uonRbpy*eMUskG{a!fvq
z@f;B?NtQ6p6J%9-(-tN<tTMb(yV=lXE)$lu<_T>#Q}>WCZi}Co#k6Y8{URs51Y)gC
z_7SU62Q0(1YH98iUakT!M%4FDxhy66kW_}Yw`M!=fl_OqS5sd*t4|02Gm(z~---c@
z{reW$N|ckAak9FO4)Z*RDe;W<K=p7Ni!^EU;%=z)safD*A&h76@Z(Jf`u4oS=6%HI
zCnMoBH{r+;U9}U%aX3$r+Y@F;y?^Jaa~)#*s6N%9d9!K=;={~4<={SfG)=R=hWYjg
zNqo>l`77cqQ1=U8TRayF{+m%{Hbp<YS65B;vD#~1qbrKGhy1Dqlsqculbs{|R*8zX
zODln<wSd`pML~6oP{`@G3tGFd)_Av!Vb7BlN)1q$BTH(?;#e%k%EtD!AUV~KFLjf1
zn1`?o<*+)+5O&(09c`bF45^#Z#yf!yvHw`Tww5qaRkaviUa_3^&NWQAKz$S-*#u%f
z%0#;R26WJQB+Ev)KgD5OT&O&ObR2U@2<zBaP?%~DQrneHc-Xu`40zaeh-(-`LeA<l
z-SS~sr&@;}iJOO`)C})`oVM+vH3CI7KP0c)rja<PY2qlsQ-=kaR823Mrf(xI%QfSR
zyXbrYw@6`_s)Z+7LAvZoR}Li({m(0wtwI2Pk^WwvV2OD5%nsfSYHH>SwgQK>p>r!^
zZF5&GB<-N5rtQ@9Tfj-h(UyN#Sb4Gdi%&3?yF{i${C#2e)o*?QHLs1iFIm>9+l3XM
z(m#l>Hyg@Vo8#c<#Xx0EaG0n74HGWvX2QYn>-Dd?N#k>Ues?eTu9O{p4ddZA*bc91
zDE99&?i>}NVL~jg{PFoaIfUMTWtnUXAvQuRyh71y-@6!!761B3iW0z7nL&5M{+&U`
zrvaph9z73b=)WJ&ya6x^B-r_{=6<c#ziuZ-6*P|Tw!&fa@5iYiAm8QcwfscL|Hc0E
zw}QqCMkM>7|I6`Q(4~_2Pk*9}e_j~XebmaJ@m6hDnFoK}&hIe$`=tavh`z9U+rP2o
zUzO<ZmvSYbahii~cC-J>aTpjJh1Ni^|GqF7C+{Fs<0a8axc|#>1rXk%s71Md=lTD5
zcBOpKcu@Ub0{g!okDvylqQo%!$5i^`*;4dC<I3xqOD+G)@&6n0|LE^uFaK}I|8JH5
zXB7CqRsJ*U{O6DVe*?Qp3ct`l!){F8C4^3jUtRK`3>WPo`&gw_GISc0qjvhY`Odba
z@(&`13u_TI6C=agZnU`PCu>DZZP-N2RJeJeb-RNl3*|u2p~GzieD9(kf$RQiBoyAz
zaI%^wbp}MNWMVGU?>SQI!S|VL^+EpZmdfWGD>L~B-Rw^lvNT`nO=K#;qAioNT!*M=
zfisU$@H_ShP>>vKj$1#PVpSdHwLYeYa#FK7cF|ULw-i+`##NW_xE*x2nCkGIAL&z^
zC%GXyLOj+>+8Ild-FL(-&69aO7)w@-frLQe8Lj2m5i;p@fn+KHQi-bNdS<kC9O^o!
zh}zgAo4K}ywkZH>eqCoSl#7ndaZsAArv;^VRF|M=Y6|7kI2JQyigVBlogp5px!H^*
zq(F7ET=yf?j)6kN-Vx9;Zo6&PJ6W2yhdyLNRgvbF<`?JSlAL@Gb!tNHGf}V|fU5On
z00F>s(Gw%MGVG6W^RSPzi)KWLEh32B&bHf)^vW%@Yk@v!rw^2?QK_SdXNtD^4%3U}
z<k6apxQ#%`TJR%qoc1`BG6zu7PXBa&EgC$C2RIk%jMVC9aRF2lF7lRz{|$(fgULTq
z@8=0T<8K*Il*|I8w@)pWw>jqT*mXn*CmUjOt-rAA$P{io4XD#{S%`%4_EZVK<vCeS
zhn|D6=p*qu+Xf#K4Sis8`5Jcj-n8>v$Pl3Fg)dMtNc#gjsWe%p$yz72=<6MOK#A!E
zJZvNRiI#1b3?Fh2oGHV5{2EmgrcIN#uY*EcYR{yN9IJ=?4I}cYXz+{ZJCb{RhZA;=
zK%FB$VSbtpw2~(SKna;1=RIBjxI7rV4hYm`sYc20F{5OUsAVT8UM&zTaXjKUfSY{-
zN}2R>j~=L!2V2dzDNmT~Ko7-A!VFKW8jcO^#3zI=GQ>aTn++F&2TQQ+d{(iP<Ysu{
zG+`rlx>nSPgQU~dqx+D&JlG8yGRCEpk=iS&+v#*%E2yM60SdzILGr~J<={oX&~3Hd
z#U5OwEFZG0+SeF7G)m<JOfUPuX88P2u3|QL)==)9mlq$kQ&d6WOowTLB0lT7WY_N5
zKi{s{`hqnMn7VtLJA<5TiTX9w8PNr0-z1=x(PWI#?vi^ZSx%c%DD~3tq!J9aRI((y
zdKd{0BXY9FfhoF0dau%Ae8~Ui{cv!va1|X3Kml3-`g5ctwgKoX-%u2W)Wt%yjwP{q
zu|V3zZXX{TGEt%uNRcBKXMGnLwLRpk>tJl8_pHj{%8pG}2+2;E)2AWG-nMZZ$>+L=
zcORUilZU3!khCu=K~Xjy1*AdYN)&XLs;Q=zG|x_SlyK6RVi^Ur{>Jphxf6x57w&!~
z!<4dy1q({^nAP$;38E=1zDDBwXfWAHdV=ugV)*x1Rjpx%AvMYYK93W7uDyOCT#vW`
zq;0>zHG>leuj&lLv}q3kBf7_NV5kw}Kz(KyXaw@!_|{A_>XVyq&t)>li*=j3$=lKq
z+jH&XD2)_c;uT^#gsQlk5`e+uE3;dFhS?MwOsZ6PnTcZZgLqB{(>YF#-rux=DTcPo
z0{$T3!;?$G9Yg+OFLPMG)3n?C36lBW0#D7@BYNU%At>A~hXuf_8Ttq5f8w4<s{TqV
z9lN=aSmLE(9NA>MpT%~L9166%&Tv@#FQ8ou;OZ5_eD8m+2=J*>qK{Yn^vnv%l@XFl
zL^rVXb66v2T5Wz(xc|HXu)g{00qa?CDES%vzYhIupI-=&Bx7=gc<s=CzurC_z`|4X
z=-2*{x+5uyK^W4=Q!YR)*ZeZYC3gUZy*Y6D?<Js&0jSW)3dT~yrWM~JJ%qIsOfglk
zHkORX?P_-N)jq8ch>`BM<4+F>TzmK$6%Fgm3T68eI0i|_ffSB7q^$%2E_->dzk0>%
z)1E>uSUY2S?`RDHD-r6Xqw9wsG$o^tNtQaMT1d9^kFL5U_yb7lrNEY;9?xoV#wS7C
zB4GC(4AQ|R5T*hqK@{d4$a!l4VjXSi*C!S2ByK+q5$e3|N7nJoCgFU9v^w?!A{Kx+
zF9kM;DCF@98hzuL?b<-P7@~wam(8#{hJ!a6+~$<k(&4mM-+H#%CqoeLxCW#3B<btz
z4Ubz1b<BY)pm(nk7zQdjDB@qAEE$S5dLr{>lX(Z|P?5!Lr-L&6c$K0p^r0EJrOIc|
z;)em@g}ydYqCb@;IxZ$yKA^2G^HWg$D;amngQ>xxjUS5X?N6UFKf-su9o7vsZ#Z5&
z044m$(=cv5l!$9s9sX8AILZw?e%!?q>Ic~k5$YhRNMrW*vrbqG5%YQ0$x;L~DDVPK
zL`B28>57@A2y~U$vZ@K2WB$FcIb=BH0S+u(EbO42I6N(rObOBhGkwL9cnb2m6)uHT
zAWr_OVry|yT~2Q97YC3E%ViQdN5S$ly+J>|K&I4n@Xh+wpSh*~9;ldD+wwX&9?M-^
z0;=eTXW|D<*GPr0rMGvx?XNE6aqj2Tk!&jq(o9{&eqanzMVM3MLK%)XECUI`)q2Ia
z<tHE&4U;Z`(E2)n)Uyheu078|2B-v}VBadaneXCQmM?>!f!KMwmC)i-538i_Igr*-
znP&7;AFmn62k&&0u`wYJ1J4hQhP9l^fp$7EP1CBJ1LJh~o+F<{tpfSc3*09SvugOw
zAdtoJiD}_#1;i0y!sC2Z|D!;E50*{b<J8OO7^dR<;&m34lS5Y0`Yyl^(VHc_2C&J>
zsZtXM$aN<-1jsA)sxH2Mc$!|nwmAsK+Q_PYKT1a|tR9s`Dq0CNQ~^9lt2*+<NraKy
zvdD~7E(H^4f<iWFokqVr1p6VUFgz1@T~d)pb78!im^wV7MbwJqKGCXrm>M7soQ))K
z_S!3^DF_i}v`fI{u3dXz{pPmUVyuyQyhVk05m67ydeYznIfcReGLoY}&K>-a+eYfK
z)pTuRL_cglF9}G<j=+SlWGUKSGU+&eY|u)f#3GZcx6p{FSvy%%QfyscAN^!8+X8J6
z+yi{|j(&L|Smzasab_oGIn-0m<6w9;W&%h<abPoXW?ZI3#N~jt*@ooHQ5~6O8Emua
zw*@aZu%ohZH&b6QeQzRm#jScl2-SbM9=)rnew=^MTArxh)tq=*D~RSTamQM`!>3E{
zI2lDwC+~nvg1=wgDLlPIGH~EQ_xN5()ob%6a`yC*;>4{LYPIx{7c<n%Oxqy!QdRlt
zXI-GM-(IbaEY^(b7tg(wHe%W;GTjmB@??bku8$<I=&2p%AoUzr?PF93;La#y>?LPO
zX9pgHmg)>k`)<qYel4X(!9!R}85@vcO@*2tIKlGns<O$ePMFIR@bcSidjbeoU1GYD
zBvqC6e1pQc(ZgCYH$xAVk_1&st2S~)tbm7(s%3?e%u9X-Cq4zed5_tE&UU<Nv;OBI
z`4wM3dtK3t)Q$MDI$mkj1j!cNl}K~g7%Sfbsanhfg%w^q!67Ol$jA4i&+X|cgJxM$
zkL{%(yAnffxYm%?)>9PrYB~QBoM%g0W;M<GY17$<y`_y$Yr(WE)J;vrU0#oq3~B#d
zLXZY}0+tV<Xz;xcFWqa}r*YR>je?|eJb{Gl=lqS!^IUf<<gLa_%Am;4H@osqzkA%V
z+V+JJCq0tDZm+bN{~*?;0?oqT^k?|Gt|G3cqD9;aVY_QCo}--I!uT(l&AXc74ooT%
zuXZ+w2B(7_^0ubEBfSA&2rf=K@|zGpPUo#3K5iJ(lv_%E%i0g{T2TmB-aSSRvW_wa
zwEDIrxQ#-Z>epc@Ey>~5hymwhdqthVf$Y$Z5HEG>ut7p?*OkmK!23c<tMW}reAZr3
zkEtQ#1rp?^3?bZIgvDON4HZ#1M9DuxXT)+UJM<LTaHP%ocooeB%k|qs2da%vfJ1Ee
z05NdnLmd7X_#)6~s)-6eh4%1R>hwFjPwk~?&_0Rt^M!?{ps+=mhs9E97ah&=#N>-x
z+ltUE2flf=oSn)MR+DcTK1ttRQo6p=YSf%{eim7aYy96s&98r_{~nA;5>C$C14%d3
z7KDE?lC^^}dBOU44il<Zzb=>IqZaNs$}R_tSNlj>jfy>b?{C3**5v>L(goedCDKr`
zNkvKHp4>ek<M|i1ECELhQT=0Ws{mk{g5!2+DX@%%NtT|J2+yGtNkZrazK!I$mMA!1
zt|u1!93~J)!p7WC*me^VZ<ORRI|*HR(QA~(WF%^p>l7KqA=w>tUkF0~-4?3Wcz%Am
zX$3rbvgVTQFa}u`lES#QBuJh1=W>8Vj~fKuURA5KoIJBX3aiK?v@;1-WGnMt5z$EX
zV^C_Y<1OzAAaVU#G3C1E?1TU&uXK<6;62<p$wy-PJ^l@%NHw}xtxyn6|Ac5i)84CI
z4B#pZl)|3@2_s>}uA|V$Emo<FiD!(Y$o2cqUYP&QuV!GbsNv-?D4ChU5Z^0MYSn}s
zB+lO>Pp_ip0JzD>VCs|cOL<uIrfPM9kfr=iEo4=TfxujYzy4r$+lL;rR;8R8GQ+Y0
zfE|}C+Eh0Ek<YXXP`TO_*c!|bu47XG#j9F(z*~0YQi&>Z`%wO)yNc_<ZK=1$<ss(e
zENyZH8F@X`|AoT%(+L78#|29z!?tK#?k_eQIVX5_Mm&o(a2@!ljg2o`O7Kyx9u$?P
zd$%wl8&5a4ZR=k?d!WzI(Qx!GZExf(p#s=gpab^rNOev=oZQin5`+jn^xe%jts7C%
z@n8uWkh^A@o#YCP2R71@CN3bqHG{HQa#RX$joJ)ZvK)5~mV~^Qelmc0BSkNk0{(<~
z29znj?Z7;!=Gfkf{`d`lGhx+AR(#s)cpL+XK<WXMIw|GFKF-w3{#5;E@zr(}F8NVV
zp}YbY#>rcsLw){*ocu}idD$5-`d|jAEo9>~r`jrw#}6ti3PQwxyvIWsK<%7Vz+G2x
ziXn5pFeSeK?(QB8|A9w3tUUK+k%di@UjRCdiAzC75CB%zg5{CF@dc+pz){S1HM*so
z2Vx3i57eH8Feptv6#6GEu^9EHt!1#v2WTJAHmU@TQ$xP%kNS_UfNX|o2hSORCb{o)
zERwvfENVFE)ci)XIy{m~Bm^k{HdbEIL4=5MMGx!v2r5^%>(zrymbNW3`7Zd(IWdvT
zsbaAwetf>;W6v%8AMG#dfe);_rW!+^d2!`zw{No@J^{ikvUDr?=pHP!kNjvItWkzT
z+su9g0Q)L{Tzq=$i9CrVmiZjYbD(M`E}&uCLEb~Iuz^A>pr)J(%ZBl*n3!C9XfIb5
za?}FevRq1ZN?O@0KGo|&xqgrI_%^5okw^Ima&^yh;JYey+!zzFA!=8eOE*1JE(cEC
zuYgHe2r~fK<=pxE#gyCT@wOz(2F?J=WGa19Vu&i>!F?T{xw}g#-cN8@jIG$|q!|bB
zX}NO4A8*U1CJQ*>Vv-%{n;An0iQ^I+CmmG0JiLx(uAKr`Wp_PQrw(7Mu@od4gY{vu
z+x-Z(DVz3($fHjPa^=PFYF))eoD9$BaGt}7LDIZ7DfqVOsU<t6*+80K7`&<))azl@
zL9h9WY&2Pt%v#GNIv&_1{UZwUb6j8EmO8xM>m!*H)jVT;6O7(hj0wpk97q5N$RWKi
zlqgrN`cnf)i1y81qV40Krk8NQ0r{l!ofC+!K?j*oxk<Ij^F1n$y4eBpH)a#jZg*9h
z$hmP(gZcp1Lo&d+<A1f^IUoK|(KQyv$l{qF=g;|)zQ|NN>?9xyrWpOX&{2!_Har&?
zfwGj#k<N{UAN+;;Ki=K;jF#Jv|8_5b`}{?LjT2n+604I-;9&LxW`qR;s2s%o0HM`u
zRT*<+8#43Z!nU6A0pXug0<#3D@j`Y=Z56IZgnS07emX`BPV_c|1Ph7DOWzfS3+F-o
zc^7D}{S!r<t1BOzSyb-dx1cv?L%RI%zSib6m)-2iOpv4P4+pc!Cp=MNw))05pFD-0
z{B+;M@HZ<PX4>O+kP2R#7&V5WyQE_&z@Jw_e!;c^v$8w9J6)9beSa>dH-_P<x(z^V
z(9|2V+uJKF!<wH`GW6=6&A87(DGi5I(iD&@zV@oakOTs-4cPs3+oWKPTHK?G<l=Vo
z6Do>x!58Gib88)nLSi{9naVRI=gv|wLvmMu-`X#Xc9Bn)-4DQid|d4c2JW3r1RBww
z9q{NyC5H37m!<e8Qj9xPkk90Zbzo;_<WXoqmF=*+Lem2Ea%u-AmbL(~a}@TErO&J;
z%i`m`rCSV0kodgMd+qAotP_OlSz^okPR@YW+iNbu`!qEX9LwndtESesgR1Z`OG%$P
z-WR(FZ1k7te~(FYLm+ISsI-&10_6}da~<f?N}JjgLzr5OSL`3Q0cKHgj}`BzlcwnC
z@ncOvxF$|xhC|o07*JDb-3q<(XYwJcM`?pzP!0i%J2?WcWN=_C=5ehCs9Lz2{!YL@
zg6Qu#EysJw75y8iMiz8lXQtbRMV%7z7F5j!v4S9)MkC#|6wDs}{~}8PHO_vyzbVOe
zHFsi{l+!-vp?$wVb=R^vH%i#-E1#G8uha<g<ErYbi7Fdb=dD-qJZ_G9vS#@u{)r&!
zWu>ASUuVWZZ~KQc=+7qvt$CN;0V*0nwD;RT)UCg2P1N!K@X`N1ohXujy1~PugFLzn
zI689W6A3Bb{^AnIffK2~H?N!H&n(D4di`vFSZ4@OEoOsRA<~JQ<>sO9F8}f&fL#q>
z&jP`cFy23Aj~oq(Q1SZ^@{_b+=Jy8v{_e&Y^MdlBQZVUQJ~JI%1V{;%H)(TX|3iKx
zN(1BO?+j-*L?H?9u{+KSkt*h(MBVTBuG{PnZ=+Z<ASP2<^Gi(p$33ES)`IUeeJ(#)
zp8wv~)T#Mu{_6);>mQ#d((3@XVF5T=+2??=^g;DDiZ(<=SHO-N{37P28%_~s7w3nK
zOh`a~2G@g401X$wzFA?|6E_OVBuh3>h*KQ=&4lqeu8&Af16P(Ip|-Pb*2QXGQN@1}
zVk<)h@u*1=dtQGyHSP&GUEvE*7@Hwa_J1>j2ea?!DNax(Jz`#WXFxv!lCXBteYYpp
z6GB4^Dq!e2iX3~Eod5{JVD0tD0~8r(UODO5)q*nHIHEY_lZ)d%FTlSK;vmh6>MOK>
zuMj++CIHv0u6xT;UQ;ac3A2M1{V4*d>W`wP-0PuOO(Lp*1Y15bY5`lqJ!jGW(5cL>
zmxFR@AsaZ+l@xcLOhsu}au9Rc8w0WvWfB1DL8ZaTX6+{L#ijBiA8^whAf4d@<`WPr
z4HotLL(ubBCZjQJw~a`^W<Cyhpa&Y&%(^IU#{MMBdrUUa2*Bhh057r2gHnJ=-vEq~
z^j;q*;158V3|L-8__PnomNQmv79BXend_-DSm1Vu7%I_kx3Apzn}v27H6Y{y6)}oP
zoF+sjLMu_0h(xEU``}(s8#REiK{Lpw*K%FMyrdNei2~<|U~E9Ad^HAo!OJ~BMI2Uu
z849@4r>E&*yqts6)pkaM`eWM_1ER#EU?&3sXsYHKs>&9=0Glg}foft6hNq^oux1q|
z)p*Wo9K7O;;+{aQ#{&9nzfu=mrh9MANl-_-bsSN<X$c4kbretRget<J=is?VEkP#%
zLK$cdoTq(W=O0(RfHOSbWp@$kiA>xa%$EI=fW82H@B;6-Cn((FJikeYb($qQE5et6
zaDcN(-L=vdE?0i(Z_EDAcwn4y2|?O;<&eH}BKbD&(IdMdP{VhGpuj3)30-a^vD+6+
z%GY}EnQ*~a=je#9Ar2jlQu)14i>l{eTdo7nB_1=G(Llm7U-H~Q4_-1KE@?P>2<Vsz
z)#&`MEfwaYjBG>XuYmy42t3p5xSKd_)g^StaZrbj+KCYo9#>S;ZChO}R@Dnp;K)D(
zv|u%^icHIL9gDv1{pf*)RXw-|un>(v>mcGA%xW=mgC<WHOMvgHH^c#R6x_}xsGR#+
zYUkxG*8%oQh638s%1&QPj|ON{2OHx?vsyTOY(pN8KcHGy+OPb9+9hOu3L;kDWT9jr
zXgk#-%gf%}#R5=kT`i~qb4k4tUDn%T%18Gh3L5Qka*vtse*~;aLNOo|Rm~exGzbM?
z|9T=gPyDbZBMWYxK58QX(+rAD1l__38vkHq^IcwSp>5TP_u*#4(btd-U@@XlUaO%`
zV%!E0O4c&}JS#K)6z4Ijvj$T=r^siRw&|eBpqs{h5Y=_3+_>D)oFJQ#)}E%B(QTAi
zIyOJOK=fw+?fnP&iGL2S3aD}(`<c`;*t|zSTEr~@Y1kmaJeAv7V80&|RaDYOG?XUt
z&N(N!H%~^XT&-qSx(QZx20|9jxa#W<B2tmsD+d5#j%Fu&#vyu@UQ_=-?a~40o(;FQ
z=_>JS*oe|-&gvI?JIseD(syw3008Cj&BlwQWgWv<11Wl3akl7tWdM#+4Ag(Vg!LLW
zhqncmtO2zUJh_k^G7XwCvZ~wmr@)nNPtg~ztTRiq8|27}r_S^QM4oUZ^~LLGOR)Kd
zHCj{3X^G_1EMBrD;4D*VYnIcXmK%IpBCt1rt{uBUlm9*t9xNF#p6qoVyQ~7Ss@Z{{
z(=iy(rN#zA49XBl`%@qTN%H~gmvou51=pm~WFBnZzN?mkrl3qA>xTSVbxUjy*kn)w
zMbU6WI$lVy%9P5Fz$>zorC1{pMSO>v+%MleQr{yEB7MMYv`*7$%s%muW{X~DIV2aT
z8BYS~K#C=U+8fkp`%?Mm4a=KSb-6xvfR}vdbZl~%8CFeypAqRYJP(k6uacUn2qc#t
z_?coY=CyF6%_||wYUH?ijrUhz%j8k2Uz9i!EKf8t8pXF!X>{B$-U8?->Rxkum2{J=
zqpxx8#795}C88>RaI1hJEARezbydY7M>5@n(bV;c!1l@_&hkO?Z9cMJ@fu`d7Da)A
z7|_W#zJV;kRgbwz(TM5;SOi=aNI6F42kWCy%fl-%1t9)Gd68Dh)QQQitIx)D10LdZ
zt(s+(DCLC96%=s=BL9&|6R~`KS49Z5B2EKs6G_pyenE?rB#avS6+I#Uo7>GVs$jFO
zoI!2X8UQ2j*4QflSrq&LaKM)MXWCannFDHScR;%CjiUWie)+qEm7@nZVJ#HwZJQh|
zHTZAx0ELi5z&}EvV>;Ahw4<-?dPrQOjSc&-9^q^HwEBI*)qkLlzZH(}DD+puW_bJ0
zJNx^nQXLA>Z-97O4nq6TkfvSF)ir^C0VBTvTTJaGP^^@q*k*08zswB<NWi|1GC(Md
zgmSJ@nN8IlPw>Pf)B_E;ZOZf1f(Gx#z&C)FYo<tDRGw^uHBt^-^%JDg6JQ5o^76fR
zvCd%k)&^=@+CeEukLC5JV9v{RuNvG<cQgEhj_;g}IqwyCFNfqZ&Hshaes|n0!B@Z*
zcmVb<j>&Lsyav++@|N8Y8wYse3O@s@t-gdS5^(238REDXpMZKX0ygt4%1U(blDEuu
zRZiGSp0@Ly4?L1N0o7$m=tHhC5Mz@$w8{5LT$dkf7$rI|c1f%_gRE%L5_tDuE{xAm
z*=(#Vw*WwM-p7?=rg;WYVZ>~7hgS7%I2%A!q42bH+*=#&13U6?H*8VZ0ks<}uE=s0
zWQJnIWbHU8UOp0MwPfH#+)DlM-<Z}rhyWGeP&*$T2Sf<finQyg$9z7D0beWr5rA9f
zKojVj>_CMWpm{{2oP*U}U=JLnhH<K&ZFHn$cKBz&s1?@l_Hqq8Uiq+b<bHavu>q1m
za=HW@V1ZE;`hkS{zy24%XHl+Py);mCIRX$o0_qA}k#4Yel+u3xzSY8exraQiWe;t?
zKF*SWlq=xCe(sIx_zujBBiBgX@9>FuJsbiX3x#z!dYPkqVG=3-)ZzKl@u~pnzSnKk
zM6N`<aBQOhv@70AbTg{f@U3&J&~t=u^5xsy#VH`5sRQ<&C+)m=0#X#!C$3~ih5}pm
z>wv$YlS<~D6sv{7zku~hKx+qq!Ob%lMZoS<>Z)y)^o=Xob$#BKuNksZ8L>SVXd<sl
zx2W<pr6}a(oIMPb%pq{NzP)RWx&Fl$6Zgy&s92IS0Fs9G4tSWvj7|Y91FMj_;zxK<
ztie8f1_yQ?qa|YD@ldc}JzDI1(u(XKK!PY64gdE-kI4HBGJ++TE{%M3pL#jF06EL8
z=&%Yzk)>0B(~Kmam-r^ywA~+S^zxd{?u6XBeFvrJFXnM;efb9RfRWi~X(s6f)8<{L
zf^-j=SjN`dPg7hVO%-4_tWxGNKpiIxTrBIKL4?d|Jpdyw8aBAr$`7LXZrE1dtbBU~
zu)-cw_W^70aC-o1E5!t#a>Lgr??%K(J?8bB8F{jb>=g4>pl?9bhcny12`#B1ka|re
z$pUdQT_3uP&Hh<7?}_8id`ETsfc_51=E*wB=6*1k^|br0bmQ$?x+$r8Fc>v9m%}Ir
z3ea7|0uj#X0}y%4b%9o%+35LLIT^6twLD-Svb-Oxun`dkGX7~X_(ZpQrRBqd9guU?
z?>&VG(HaK^_Lm`cy688H_Q9q>Jz%$fsXEVB`zw%^P$oHl;j~|Vud1xWq?9ux!5jUn
zT>4w+b)e+;`yCj+5?XiA>v(6wq{~sT9QNdUSsXRDmSElw93M~Z3meS-FJKeXy;f6~
zvn60-pgjX9NxqI^f<668^be^PlSbIr;G(=00B&on0kcAJ;*h_xkH3af><7iAq&Uc2
zm4M{d%<H7-FrmUj*(88@JfFDj>p-<Xa6TcKFX)6YVh5y3&vR6sPe-m)?Zinki!rc3
za$qp9Pqks7#LwdzR`iRWagio+IV{+V(Dv+XEk^bd%fQ?CFqh$pkmxv7)TTMAs@J9{
z=uDvN<&uP|t#lALuvmw+JfXw&7zX_th*sTwhwJ5~IwZ$M_@b;nf9DDizZXzjkRKG?
zX!qrxBNzeOzCCcdAZ#b?XOqs+^4}x}P1Uo6RnKfLP%fq*Eb-F4Y_}5mOZ4aL&YY}Z
zYayhe&7);l@kvqEqV{?8OJU_h=VK6s<3}3vo)p;b&ag);D?L#QtEwe*#ose_Q_X6z
zf)tE^eSoEw!QYJIb;q}5$rA?&PzI#gy1DIb6RkDpca%!yn{~biAH+!R>ySGEg|(5c
zioqg%t^vd&MjNn8sOQ1!%Nu~a<)?oT@Y8MZS0ZA;suCmjAnyj5MbqxZT@=ity-syM
z%QvnK9RLB9bYGR1rO#}L+O7iu$*fEaA`bhu3o1@3;|B_d_q=An7}3_DHn^Sv)r+Jx
z0F(RxW#plvSQmq5gHdu4&{u}lK-w2bVh_D=g@Sn_`)ffRiv?hnivWHGxU9Q<H!Q^=
zEG<CGfVaI$qWP#|LNbk!VH-P9@#UpONK%{r+<1Yh`jQrehBcLXv)Cg<9%ezUb`0tn
z0+jQ|I5uJ8&dmXYpq|niY<A=pzFfa^DR6*E&46uF?}j9GJ{2cXE3!0Lj#TnO#81IW
zAhchHvmmYF3Z+$5^mi8_tNuZ+Aq0b_DnJ<Da4lu|F9?wH%G(*=5O`$LGE`oAww}_}
zZGG{L2-ib7V+6w7Hv!te0;vW6A)(3dMKFJkwRHa8bgZ+FbXW_`eGSMv6%lHi7)#);
zc^9f7K2*mFC>ko!rL1}Vl7cMX)C$}>*lC2K_7nLv!g#q`@Gp#xbCafU;o2WQSZVs0
zn3`<he;=s8yCUt8_UpRK0mTFF<wBC93j>HZS$&mGb0tRKzaM60w&&*_Al8Y!0k0C>
zkl9tBb~+sR1s(pRWdN5^wLhe>0jx$wdWwbAgL~l_z|l`R;05w!OdiR;hbtxoc*}Yq
z#T0t8OmwNHKr#w(5*c(giPV-^z;R*KYYA+5SD2P^A@dwS=JQOD`pZ7@9E|9%v@V9{
zFTz+nE`Bt<m8NIA(UX~ZhE)zs@ZJ3ez-&`e4eK^;V5RsXnQI?Rhq=Ob46x5Ahy`&k
zh+e#Uv%fyd^r;JMV#_4-r>&k3_nZWk!33~$&38LC9L-`~%u>=?H{Uvnw&XdU|9BAQ
zb=KTo6QqPfv5adg1`0LUH6%2JReELIbrIC7*0_a?qncN2>S&zQhb8EfZ}gD&Zh$mV
zc7;&$h5a^1C59;*U)RvTbWkULKh_&f8OrUsDR&(?7SkG(@rgtt3?3jhBNR+c0pYn1
zQ-teC`N6b#A&OY|urVX;@*GKZ(RP^bb-_XQEZvuHtrO^;Js=ofH5&1p>PI8WW~s^`
z&Aok1Udqq3?%Q6RSs`W~1uq1hI8+4QYML$@5_H1sZ~lz7ND%V>@b%vDSib-Jcu7T4
zA`}@VDKiO?aTi*4+{mnC@10E<MJTd2S+~76QOKURJ(E4kxb5+KUh4J!^m>0jzwaMC
z9^}5S>%7kE9M9)*9M9t%P^>Y5?A;*KXDwxeOj>|*y&qjj9E6QJ<hRok#i(-kFJSEe
zNCQ`jHtI1EJa&iK?5#4V@&q+WKYLFlAJ6m7M@mRvkEO5WN6KJ@a1ZPpUb;Zm#lQ5R
zxK<Y}x6BD9lemu`i3y+4$u#a0(gNqRz$~<<JDlPDFj-%)y!X-e<5$+T@6Lxx7N6*y
z?ibh?eeG0SJ9Vlq>Dh6NzMX)^_?W)KU>npWZxeG!og65ERa!)8HHFgrnl07y)ftRl
z`~<PlUCpnv7cf-Jv9S@S{dP)IAN)<exYl0#1;Oa%{iooBf&I!5?6YFX#{y9AaaYj7
z^xY+5J4@q58)-xMSQvq6LhWs3^O}4d^_s(Z*Zn+x0yt2y51hsgMJ@e|8y$n<>GKd?
z@F;$m2?i9nn3A#&Nc$N&ksgL$8Oj~1m)ca1ra8aU_(F66!m4&51$A4k+tV?xNktC1
z-QSC+=N%(;m{iA{T@Y`yTVGtD``NRqY>*y!F6VRkn|j32gKb&TueDTP(jxF<0mx20
z7r!PIIKv-o{hW8_G$#G^-%f;?aj4!1cDu>zo2oHc`$1A*auH*@yM?eA8X5OT>1=<T
z5ndoIn^do!SXlkG4-ZdMqkCH)lE18VB}j00%EX`F`$Arn{jhxe7`5S3{%EkkXuiq%
zI&2<Sc}-b^ZSf*ea7Y1QOPjGYs-*phSdw?hGQiJ@&_V@*_}zS<{4;s2HKW5kPMJPb
z7G`xhZ3#{TK$#ji`=}Hc>SZu@-@I@~w<P*3@TNs!_Ix3987Pr^AMHDFeU_b)yZLlH
zhDybB#@M<ZnH!ZC{!|?ONnvC#sN}u&@d89{@zj{m?f$G9Phwl9Mx|~cqAHNrOAcw1
zzewfW{=PePvCgM-RuS1W^39%60}!R!8@T_DPUFo(=aLsM;&O6Atnz$-<6>GPk8W3)
z|KgNaUN&}2_0>%;!fdRg2XAj_wwAX#hdLUn4!Jdkge;9zF9|p?yMtD0L~%x;P+ci;
z_6JnDUtWt1eweZgd*0i<EMN6|iHQa@TbON2&E~BQ^L=T}6qyO&Rw_Gh3H`mSZwx@r
zx6ebva>?cM1p>FaZ-IxrnZB>Saem-+?aDBCeY`(^ADwO2B>avNV+-VDQDR0lN56Wn
z@s`CGVqUP3XxPYz6CF)Sic)S1p9_@fk~#9|`^*`8AK5VDnkx>ix5nSCV%i-X3>ADv
z!g?|Q!Jppfm?E77LB>xqACT@S<PkDnvC6oZh>?!00|jC{uG+0u25SvmNk7_=0!&%H
zkOXaX?G1@1k<NYX=_z^k4jpN~!(U4lGxI5BZ4RTdTXFNUh(kD9GU+E%x`6HSk5{Sc
zY(@G5U(I@^-lfXmrs6MBQ<;x{aJFIU_<Ny?F*~6O`O99mtYYl!NXy`@R{va3$kbkn
zsoUWe_zalsPWS-iKt}BK!7o)Sd0Myymg$C-HK+$zt*JAVJ5{+;763kVDti6ACtSsh
z>fRj@%4suu!ERv?Ds>kb5EsWhJD(^Wz36GL3oITYJw;qpdWwwIj#<uh)PS<|`J$L!
zcdUqK$soFYz?ju<CqRa6%b)Dc-(eQ9B<#i_rOz*L;1M(tyKYG&xA?4R((P-+TtG^Q
z5zV&HlSp0<W07KO;5Hfpzu)@1k|yQ_kY|hm*?3UZ|NL<Ip&6iyZmfq?3X8An7UvuE
zv{dFv2fo9p$@aaOnVaZp;EYw~AhF7lBgI&`G+g=Dl?3otdlN|sjMMlB!1JhKJm%I3
z7)k&}to9b#zm;1Y_B=(Sk8rSyH~6U&KLcfGz&rdh9@(cR)n8$+OUFt0g}Ld6s-HVW
z^k3u&{F*H33gYn1x^&0{BYWqm4AP!K-kPI#^^o%TnV7rziy4dnBjjo|{+}>Hm-lCo
zZ%b$fA<hW}5Qp_Na^CEY^oO!=#h{h{wO#z)Gn6TTv6=g1Nb<j1g9Hm5cmR+U06^Pf
z-TUnL96-Q!X0Jn;V*8)6lOOl``yKt$B-9u3e<+fFRD4wPZ=s+(E7<qQ-^Ep~VffHc
zyEz~s)Ic4Hn23Z`Klw)$XN5%8r_B0p{tHl%p+;yNkq<_{Qxcim$gg!D(*Kj89JIpf
z3VgRkx9>XgT~sL1KO;?AV9Cu23fEbEgWU9=s2)3o_pgUeR)$fA<zYP_l5_|0AXJ`6
zkoVb+8lV5yoDoGZIIMO{MCcO(D6GtaAc~`NA41T6VE#j=O#qlc<bnX_I{>VLV4PZ8
zynv}8&;aQ!|1lcxe#mzhAowhVWV?H7ZNUKI8;45@T!_q@{Diw`<#z;e1K{nz&j*rD
z(%t?_UxuMXsYrb!*e@9<0Evvm2(&}Xq;l6fQNwsyybx%J#8N{*gVuy+j&hX!des~y
zc)PtEVDkQwxr}fiXngO~k(vM^k%_P&OYFkw2Y9GDPe+;Uis(8(Bf213Q-)*+1bHQF
zka&gem8~ENG9*+&WT^}K+rJg%wbUjTfPjF^yFxNrNDz@-gM{@N%z3FR)?kBE2*9R5
z`#z)|NG){;&?%rCH3S)A!z#cuEs!S_itcxaa~J_1WDU0i1$-VzZ28-TLC!Y-ybdrh
zNETv<lsqIlLsDD_G#BGm<^G=ifvR7c3*ja6b=xl?_g2o=*MiNZ@vASL@Gw-dCW-D}
z@@9QbZImYq7vC!H{%KP(p+>e_-ZtW6Iq>u~G0-j);+FgH2G*bzPymn%g(Uru$%M>+
zp~ut$>?_i%9N_<iB=_*=m2MT2KHz}@rZt|r9yL7RX!Ir_Ud_?#Ow`(1X#v)$A{Qn)
z10eEk*`GkK-d;R&p;Hp=3QuGar=p!B&dXRP1W#vKI6?E@NV7)W2V-EA^T`#IE)E$1
z?~{grWpDTX0Rv$m3<DUj4JkC)ZpoC@&m=^0p}Pf;_DUyD{(@!aPmGgJV|k$F-Tc93
zFCn3I=L`Rqhkd?4ROF0eVF}$d^E*FjTxv1Z5~>s&(d1JO4}K%NKX}{I_{ttocP&0N
z0xwOkDA3r}IULXkC6^Vh4DXH$@fIMKJxU9WjQf9Rgn1aeL2s?zHVyW|)_XwPsE2N-
zS@$vWS_4wo`nBYf_<t&2iro~i3^S5gXd>JTl;bRC)PwRPn0k)12&kSnEk1%Xp8}vE
zJ)q>iu>?LoipKMD(t?3+tscVlAtt&Gr7OW%O<#WkRau-(bKANJuK)P|ui{13{08ru
z8=2I%uNaZN8G~v6l;Uh8y#yX)G9Q5wfcqG}F(#KNQSXCw4DT5XJ{h_JL=Fc>UKWtC
zI?vUrg?S4!DxE@=BBRBfbiU)ZqpUm2?8)q|bUT`Tq#gh*GdzXrV;-{1N*&t;5Vy2^
zRj`sYE?cS{R9Y94((YtG6G=T+vDyGF*tdVbb_=3Qb4{dTTEY;PJPS?4um$Rz#{Iv9
zZh(^QD(@P|;lR?#rol1We<ri_)Vfxj@LpM^3eHy5=h=v)CFx>q?B5j3Av}xAF<zI^
zrfaiU7FpsX=5j#e1#%Yv`sz`%tD#I@UVO*Z@&?EObQppSh3-MxsfC%5Uzptc5dC%t
z=AJprj}{QyKmvm)P`$*G>jQ$=YjReY2F_rmhOZ?*-lULn4#{5&H^#VGJ`Audq=8K!
zRG$m2dFsL~cp<ZxvJ)&di7dC!XRLuJ)GmnCe4wa>X-I{d66m<_7S16lfu*k!6G50K
zTEWKK^SOR0-?@gC-^rVmKtW8R$Zo!r;w{W9Y{40!N<$*HXm8tXIr6y0cFWt}-!K>k
zq7@!mrsn(teFfC-+|#6(M5~@Ycb#6YD#dGB<P_iKTptJ=`k~Z@bQj|#gzJ(6%GGwU
zcxp%K?aa{wvOKT_(^d)#bZ6|O|C?fLqwPJ+J?r|KCZ3yJwUCH@m{xSpX0GW(T;hrk
z?v6_of04lZY?0`_1Y;KS0}y)6N~DRGO(?9ZJ1o^CV!B_-<!6YG)|Bq70*V`&5~6rC
zW1AGHl@l+6bsJaV$pz)msa__*Z7pA|XS=QyKIUnCfX-PY3TWavMojerQ{u~u&Mo<&
z2h?k7j&Nq2fQhr0oI?qXMH6QzMs5LF3xFRnFlJ2IzI#coKE0t0FhY7@Q-U%=6d5<`
znW7`!VsUL&4-4g__-f>bXiYFN-8iX2sPgGm?5*1djPU!udMas!Dit}#%7zhK86w$K
z{<tzA`&~>N{ztbihOdv<6X@P%TqTgA`18MjcXM>U=ib8OjO<O9ej^)yZ}Xz*as>gV
zbzJ}C_LbQFa~YSW{=tfno%KAZGuU2xf4+FI{We*-noB0PSiL#Xe~}P-qNF#1`LDg(
z>KcjqqrmlETeZbtqu<nvOf>A?-aS3yO@kr51>o@&LcLwOLEgyf4*`-I$*JT2+MWN<
zK4Zu|_ms;)cb2#m%*|su+&fldlH$bhFu6NZ*#7ST<DakVj^cs%D<9JRHlz;in9~(2
z`wd`ux`WgIPY%ch#Xt)1*c<jMN&f=-{uAM7l1_s&2=VzFUlC#89H=9oM1ZD0r|9Gb
z{J$RgpU*>-^a;up62<gQqCipF>d*GqG?r<{gVHHi$&l++pzye@0)vd@fM@^**fWr4
zpPmoZiOL2&W&<RmMg|$h+MKCa+a_4Uj1b)bLO!=wAnEG>%;&hxt&q0H=oj4J$q1y+
zfi)+HHBG>D;o6KT5VX|JPE_mino$Tm#h5-K^Qw^(YIWxI`4K%5VfMe2FJ90I%4f;$
z09o+*GYA)rKo-bjF)0y!RW1i{-ppd~y;VIKX`>!3(YYM$v{lcc_}B>Hp<o#r4C66U
z{?a|?yi{a1J`YuhSwK{@K_Wm)(*{_*35Z>_ZVEGzZ9O;G_%-%+!E>SB0(kdAM4@8i
zoU7A{$XeWgO<RDR@K@ahtFA9!0g5#QU`k?s6XKMsvn3!Z^h9Jz_N(pp5ra|hf_8xT
zqnmvB_a;U8Dn8B@jr*=lrCnZyTEkzwt?#hQKm<;vwEatI#fUgKg}x&vLDAoT3fp=X
zQVEtT`Zfz;Xn6W{<3;bX;xotQLj{9-U(SxayB7N4NWVlf;`?O{<XMky79{1HKNcU0
zvjx5x0%E}s2&%Ug<`0q)IgJo!a>H;NXy7L!0}5oR39+VX19^IyfTIz6D*TKFjD3g^
zI@zYc@cci66*NW%9WNUk9v~Lef{*YSx2Ga3NC$uqXkGvmV<`~x$OWM^-cDeiIzq2G
zPG~*9bH2sxD^e4u1v0oC5GHNLr!~7_XnRILOJW4j4fly@&b<!|_qD$}?eE&i<(Px~
z;|7fi-(~qPcYsB7w}#@5t-j}E7)4bxcdMVnL#I=-po*#&VuHmUlO{qe{L-XoKc|YT
zY|rZ_gI3!I84~{8Y6|$j_TcD%`!NbfSMQk%CZ>tb!p1#HW<SQcK*6T01l!&MpwgUZ
zCCp4S;`3-`c@m0!U(<Cy3490fY<db_uH*Jv;%raZ5)U_R5;KubbJ#>-HgNk$J=Nq?
zV1xtI5~M?@lqeeEL9+pQ{sI6$E7<@76$TD%#Ce6^(DI@iuz>MU094xug|cq!bjWwI
zM|k?T?RGTY9<XQp>Vnl7yOWX9y*<#Ida#ohEEJ9muNIYsktRSCS`rBbn`;R1QvvaK
z7Z~EsBz5k$$A8D{u`$Ln+t^O`A`l8<SJDyQ+%$LYrgf@^LkNrr$7R*Jbc3AU=iZwY
zSsh?=H4lzNxZI1em{}~u%ERk=yCAvM!nnRJef3I3Q!3f+<enTa-eU0k|CqP@UP-e*
zL+drBA=?^5Qi^e-Z0P7@Lsx_?8O@=0M*>3;fxV4!Cv`{m!!z4R*pb}I$jgw`5_$Xn
zu9x4wVbPd}J0fcGA|_+!41Z%mo_Um4%HU4)3=RYnlcLc<rT5FZ9Qm!+dmVbk7L&x|
z?#=0ygsj2jyuWSn@Voy0WXa{^kqU7MZ5~JPffL=femK{(+!kwP<=d|yDLEC+(^)c$
zp!XJvHkuO@AmlnjBR+UrqV%iX^C3iQ?Pc?@<1WyqahqAb_Em~if?m-|971H@!G>~p
zw=fYxVd+Zeq2qb8M^(QM?mHh<3{`V{8p?s?W%{oI9%PKg7~*f?<|rAzsk%Rpg=nF@
zjPw~)eaTi46OrJq#U>>F=!b}(6zBQr$qpAs@i~8wr6NqCAOCE!eqpq_$+!0x(ItD4
zZnIFJ5!t&B4v(pWP7jieAxm?If!DlW#N75O_19|stSP9Y70|{Ey<Hz=kdi~PI`RUV
z{W73VNa2))oTH55<D6*8y2><hCV9jfv5Dv7SK@WC=SlmKgxg@DmLzB6Xsmn6Ii25X
zR^H}zD3QvQSboa)eMkS-zHpYq6a;Hxgf|!ubw2D!%_zZ`_CE9646#JV*yoj-0_jXX
zsIo`|0+xGKg3B5qx6Gbb31-k4tpIZS=)ybdOEJHG1W0c&H@(*-iU!2(t;YUpr!+^h
zKUPp#{Uu1h9TE)Rv`>9@Jwnu(D9Iubb~9zPcJAF;f|l`B7pUiH8hMz^%t=L>0I~Gy
z!yj4Yt2%GI>2&Orr_gFFS7K*9RNu5H_RK;ub}gAyR$<0VeZ4wc*JkN!GRAYL4bL5h
zQR4My)o#_W_EH<k)ulBIwtb&dgW|upx6r&SGdsZ2d;A(~9hIR5I2k3as|t1A#oKW!
z`>+o0MIV*>YkT~YzN69!i<ac#FF$FG5{#dDk1vOg$FOt%R1f>um+8*~%4PQC6H;cF
zr|%T+P71DN`XD}~?G>xH%KeMc`5SF`w&-L~Ncj{bc(s+S*^pwM-Nr1YrH}{6U`b9`
zD93`%dt_R5g;{$@DZSksP2rhXw`@g>mJAC%WW^!3+{o>4cV}t$<z4PUvaPP`$7%|)
z)QuSBy@yN^>ORP%uikc9`Crk6EB~37kdegUt3Gg;S^0eAbkl0;CC)tHUn#%8ZP_SU
znh7JtU~;;8qp<lKPT#Qs&uADAiD;!iU+IsyEWc6^yok8dv7+y!M#Ib-VTv86^-9H%
z%f*&}NMd6y2Y5qte(2o?Q5aIRqzIpzW*g@Ocd>rjAgCL!W#8h6#du9TR_?WdwS@UY
zCG1k<gG57*VpBSMWt!7QCOigT{;0gDA?Qo<KMVE2Pp<c_l1LZ0e+{PXvoZ1Y^EZvv
z7*F@0$h?j<XS|7y#prjEWVS-nAECOtz+hBxILa94YpXnIbl+rtai9$P@y(%|^LKW%
zRH@eJGogox>pMUUCu~AiAziP~mkAN9le!3}n?c;8uE)yiKxWJim)qxy(0^S&vo;h$
zhCz5%UMG-^e@{}+?1YVwMfmpLz3XaDhQwHzZYe34)Is0y@jHiHm(R9o4qu!}kVZ4l
zCdkb=`ShevNLMpo#Jtej$=-ndiM3zdh)Eu8m69zdf|OuCe}}~kn`%Tu3Co;Sj~ZpD
z?qElm0#uHSXL%STSUf7+utjnw<+LS}{3vEsv|2<DlA^yFCZNm2^JS8U6{bCzRB52&
z`sor%ef_bQ5My3Omo{?ey{_fUFuvu}KcqQinEsY$&iK-QiPi5PM0Lf^@58CRTM<Wb
zzJ3Z8B-#YYr+YEwIWZFUY;Dk>M<AQcq<_3@Wokt()MbXG669+IJ*izUU3$DQXwjOF
zh_!|t*VQaEBf_ZbxRmRB#`TbD%ybYtOsX%x0LthFZ<7lwMUIoS57*4t+bL=|3VNSU
zGC!7#j@@nD-|;Ih?Dl-JKf}yz1leakz!+S!Hi4l|fXH{=;JufIz~xBoY>4>TZKw5w
zkHz@$%^=x12*|~6Z0(ip7v2AXBuJCY5|UH?;H1Sy&H3D|n38eRu?Rmm(zKdTHbh(^
zT%36TT4Cgc?p$Kc`iwU$p2+Sxvpv)pYX~Zwy<W?A2h>Kf*E3kZBcpq)I&dc>*T6p@
zHyW|g*fDd{VlUQ*z%U^i6g-~Av+}~}D^`JJPT~BgXreR3ZV9h1U$dxJvWH9);dTlj
zpC8OjviJbt<l?lST%@t8lCvXriut+YAR_Q+9u&_63I%Ulx~9nNWjZ+3JB<MD6#diW
z<bFG@qnys^KGhe#z-GQK$*7?8e<{uq2-5&Tt>{QD2@aPJBb@R+gy*b5zxTj#=K~JZ
z)E6-xmi2+6TFD+qWi&X2sZODq8Ogo9Dxu7PjEPLR0<+cwP~0op^wVrY8T#nCWR1Et
zkRW?tNuwd1)c^i+rM9js<P+9NxSUnU@D;|svvFk-7vfx&;&qdQeK?8IswSNIA?|V7
zO9^x*QrUgSFC!^9RsRW_ki(rfA2eTv1~My(I9o*nZ1#@sxol^NLVqH&abrs0IK%$L
z4=$#<>T?*2xVn<RS6C84V2ZDXKzON40)zxt9DZ-io!4<}%(mpxTQ#2N&9I(Fea1tj
zLat@QvN*mAy<0YtNQ;#|%|PPUnYWhoLcI(M6$?`9f&rj1tTbVi1?lrboxM+;=M2<|
z-!Qu{<ESUJ7nh!;zVXhl8Pj{7OWRGXeG012$^5FO7DT2#UH*YsQKTom1!O0`cD!@V
zvSMbE*~EhE)1xpTcLQcjg)>*$dy{PIl7%*zXVMe1^|_B32=IV`G**8yqSSg}wx`qi
z<_l4W@-jWeKGh9COyaG)A|dchlOKNz^u{{1=)XusKFZM#`+U4Fh#-l%jc14N0EKdi
zbyX?=7}}lH@cYoCO$1D&TXZp|y?@ZsX8?FB<TwQ9rV24{3hvt0g+q$A`r~ze>wfJf
zdF}VQ0Vkz8UFDMwu%G0L-$E%q)R+|J8}@Z5DC;Fq^`!qiq<GXoGMoqZ<x<g-#oV@O
z@4hOOMErLI5J-<`UbgT(QmvQA=g`%B7QWEHVmw@^Ou8y=S-^r6Q=>uY)0fIY|M7Yc
zB@J;tNYs)QeaHVw<A1ZiuEp%vZI@qI_>~U8OCT+8dx*wh+|bTe906f-#~V*gd4cfo
zdnhh0TatIEYcQl>vUZ5gspf|F<yd8HO1Ce6iYESNdvIMQF9I9C`TcAg;ZZYr++JNJ
z3A>UEE4jB)+)eWRygj}4#Gp7MD*Mz|`?QkAnUraY59P`_?k~x<-8WGoH@VY~Nq$O&
zlQpRk71hJUBs!y#A1UuWI73Z%_q_{!oV=Kr|A@nsP3w(qj?m`NK0%!|!Pd^j#d$uR
zma-Np|K0h%^A_$K)#?@N<v)otQ5r>2b$yfs>=6_mv%o_&@3kn;Uv$MBrA7Zu%lu)&
z>5ksJckhQq^mYWSoPxGP;M6ld0fBrF;uC5jc$x$?`tUFwmWi$CUrFHqOp{ZDt2mtd
z6pLE1c|sFdN4$-iscBfvJLfPl@^&nS?0-J^_fJpp`T6-jm6ob(ZEq*!=VQ|{X4tso
z4H{Z&|8C-EZdVS+8F6z!MjHX#<&d^*kW-zb-TmX9nSrl*QXcp5yqcQY4LBXbGzgP<
zq*^S+frfAEFNuzeCLUN`1i&IGMho;YQn;j99)J6HYr2A6yj}Gb?zy12Ss3b_{-Q)=
zG~ubKP9F+C*kO}HKl?L^zkr-D2MX%LaIvOeA|oTm4BJ5D_efk^e5_NT;LJHqq+zq7
z^czq3DAq^7dgpm+np!E3b!inxU+;o`RnaciJaF~N_L)MXp~@ofwTs53${GHQbB-A=
zb~>DzQ}T^P_8IeaRm?P~FEDKir!t>ELgyDylu5)k)nP_jI1;$_r0%+pW|n`?2g$x-
ziz$yfyXzv@#PhSiSK`sY&meg?v$toUuWybS0R^Z+Z|3L)j1-QAC^BV!$dF<!#1R&=
z67o0OlZCXydE4hoUX-^6da#>mw9YEjKgJAQu13SVzJI5#<fBEgm2`K+MwDOO!{S}`
z{Nai5Px8)IYNU=*%U^unYKU1g638#?Bp>wjXp&2+>ntc4t1HDeVz@SY$X)f96Z3$N
zM3bJQp`o$w&W#G(Uhhg0vND9Cma%cgise8}N}#ZM5nrCQ;L07wdD&v#(u8iNrDH|%
zr4QXy68f=LfvbIWZNK|RpImB|lR|JZZXMU_EwmrCUIs@urDjk8iLhNgo~hu-E}+hA
z*0N=g;s0K0=mcAxy=r}?>r($UhYbIvl_hRo^=QN;Yw+hS!L_KRcRC+w0!$p|RK!Gn
z0S+C+x({<FGdV3I1PZuhZFh&1^=*4fhtcv1bs1ebT8%;eTFzUY(G=vBP;&SvXsFEA
zLiULoQZzswXkuc*@nnrV(^gWpZ%SZ_kFn8dHJ*-*Idz*-RX#p>2kINdgAnY^8#yZE
z3;Y?<cUI<97AgLjPjSnccG%QML4qqyT;2_Pjg>p%^*I_l-zmKtX7?806pQ2?ScK$r
z=BL0~BQB@pu12AI-O_oT2-%$Z?={}u4dBC{`l1ivr;^i*r=UucnR$Qcr&HmJzKV*9
zxSZ1hzfko#berGqdo|2@!SKjV-swmX7Y|a)el8~42kT$Xi@sKeNiH!ht9_)DC2kpR
z)G8iC%ub>xub_xXJ;WyVSA_R-U-Y>XQBnjtjuB}n<sj#AsK_g9?|=q5JJ|vHpgyNZ
z)dIR;Y;4S5t8w=fQH8OZywm3LgxJEYS78M%T}&%_Z#5~Hb$dqIxg<GlafHKpvoUTN
zd^;WCkvAI@y&CpbqW4*!Sc4xJG^d6|eaS0s8Ce0ug9HK2@oLbIZnlrtO&DdFWo%sR
z&1O($Efc<Z6M6@5N*QP^$ZgA$k#Y(3oO$YzjoP>z@Rc#F&&f!)<d(5&z9zmqb^%i*
zeU<{B<d+=%#xqtR5t1cBZ-{~9&Wv!6{=?{L8^XP0O2JL*vB&j%ny-XAE)&R|2*0}d
za(!_q2a1XPTgfv{4zqqMv6?%f80;9K5S@P-5)Pxjg?rJ-pIQTzrg>hOUM@*7`>9jj
z%3eNcd7+Wbu8GF%>ls_=<8;Q~z>R6$bVx5L@(PlE!g|S-DZ!ZO^GMyI<cx0sQ;MU2
zR0owXorXpK)LQ|>@r&?PwrLxk{d_jlaba@~0{MxNy@ioW2BN2E1+Icx)^eENkpv$@
zivLk;4%bk6N9KK@5P>`+fzV3#%xjy<?IBY9qwtq?%dw83A+=?jyn@t)`@y^0>y6^-
z%l^}tUlnVL+iFGlbh8gg&i+V0g@>M*nW<MvIvvEOa;2(=Zl(LUE9PKlm|Lx?6I$ck
z{J|vrBu_o_u>)XrQ|Sm{z=7OViVoiuQ#Qe7Qm?Y_DZF#%)l&hJNFf3|G@;O0cIEw@
zDPavKQ29)ypT<KMgR*)VX<ykm&9aY)1A|F<g)zA3IR8lRJPuh}3O?N{a^!=yn~fS#
zg?pfk41&^jKDc08W%=G*<zA|8`Ci4<L2Z&`3Y;X%?1-AXb2!|85ER8?3kc9?KITqI
zfn0*HI=BosvOT=+HJ^`ykNaSMI<4O(6w3ORntA#a3jHc88LpL43b@6=v*KTrhc5jE
zCrbQ`tJAh5#KXq*qzUc!wK^62Fc3hxwlz-j!F5{{1*_7Zr^b&}ZMWMMgrh@`x$4(z
zZhVxJy2d~G8iig2I5o~%_?PSHZ(s}Ig>)tUMXlj26X)##xcC?;yg0XL+f>Mg)!Q|5
zP1`HOS8deC%%P&|!LMh=!}>;Y1MI59YXyb%HWe$8W!Qej1&c9rkkNa>x9xWMHsdJE
zubSY9c?anc_W@{lj4%^;v*TZkm~O>Yi`7ZhWtoNwJ6B9+pI2Muj?MI`)&dpw*vJd_
zIo#BzinX{#-pWnMO~3sOKV&Ra1mU=8)Zp!~A2df()Gx4l0v;j&km&0s`Fjyvnpvi{
zo#Dfm2MUZDRnqJ7Xb0LSpt2(XAVn1(!(RHT<E-sT`qmozw8px+??+koN!?nw=R8kS
z)1Eu`esWS<&%hvH^aurAiE54?K8Ep$5*S_y5r`haPKP&`J3rmbl3qP?I2eD`=TXXO
zqE*RE!Y@RPqR|$58q-906@(!%F<qR=b<rN4TeT(G*|M+Bt5z(~?us46GEY13@G$|2
z7IBwcF=Ss*Oo+tfs6>piE-FU(YPBZ*JpKt0Iiu#LWH#>b4AV*Pq3#qPlq1nc{=L=L
z-lKC#_!!)7$J3t>fWQuE$|;jRP!;mkmn>Nd&xaYJ1(ZKt!te{y#4Fb%R0ssF$-FF^
z$xhU0?#jFxE+#@tmKSC<c*zBI1Fy>Vv<2}@HbsP=tU{(b+ZNVgS6_hWH<X3Dgo>eZ
z{&f8KaWQFGGS!=pL7YHU`P_>Umq)(E7H*z2@zAXEEl;!J9@)evFhovHbZL3#?ZM}k
zg6+~#!Cj4s#dsv!rOjk3ZlGUiCw7mgzH-b6a-p@@vTxnVXlw}opud?-gXpwe=LtxX
zYw5(x3*JOks<u)O_xB&%C^R<g61yalRFe^g@CLP;yYa~qFEYH(K&6~dEkVk!JILBb
z9QkjfDe<uHUH1<}9J4Yg4%Gz8JDn!_)4V)N4Maufl4)<5qWEv$wxBy3{Ih(Z72=SE
zrC1w?$nUvaPda%r^-QuqYIUxz<e^YoGO0lLozZ=K<C2-gUnk}(oYi~}|9)kMTvw48
zu5(~uwgBFgnn?kGx`c#hora>XGhI{)yF>4R8arvWLjL0%`Wo;`goMZpH}81JWb3?_
z?RHD|k6Bz8(e$Ti$S>-_|51OKveOlvs!$Ir*b9*2=2*@=&J7#D$0v{{Lw$mCy^<>q
zx$5JZ`!3<#i!t@sfH2KmLxe}GNbHcS<<V6<GA`{VdKt;!(C*)%=o`lC{QS(s#Kaw4
zU6(~fM6ze)<4L&WJLRs(NVWzAok7uyxlZ@^;2g}Urn6v}Siy}_$D0&L&Pk81SMHY!
ztc13Hgq^b?O3=QRrNQD)82mm5LMD9;jp+G-LTQBz)oWZjEfP;+E5ewdoNGzj<jl8s
zE>06lm6Il~&=wzjo95|nzj7JJvpX5)KHmDk)EkAvr*+zR#s|vb1eggPo}Q_N&kEfB
zsEfq#GLB)yXtczh!<=|4+d+ayAg@ACm8@VtV0ijBnx%Y#B)R)>^lx6V>+w^V(V)z@
z@}%4@KTRn+E=aB3C>+IopK~k{+$109=p;?+?W`)FtYF8-RQO)ly?4m9>^#Qwcf>3s
z4Y$GF?4Lh4(g7up6np(=NdQ<?eKjR)h)KprTKM{P`sU{5j_&R&<>lo|Ec^=oC~WrO
zSO~{SG+fSOmeQ{tIN-f!gXr7Phspk+Vv0oLa}A2y>F2eFF%1UDTu;u1SLk!xrTKu1
z?2>Rn<!i4Ro<klirWTsQO?#-ZnJifu?=b@6Ws%d%<dc6A!V4g4TcMc|OR-)YJRh~K
zYo0Cxda^4FF?CV2ueG7f*>`@Vazx<A0k?a|wTTFsLEn9SPt7bY2JP)R=@}XMXmHl}
z<72LU??5zGB=;e2@FmqEd7utI*vcu3(q}-!RIfE0kd-gcpKH7y7wa!Pk7*D~QFY93
z<AA19!;_Vq*SGHCJdB5~JLb076$sx^Bj&vgkO>S9mq18KFxXRk@}b_3`EH)GggMDf
zMlX}z<lRAVnGqEe_VMxpB);&a3GJ>w3@;Vl)@?h;({r9yz0x;i@8*H2nUI?5EoW8J
zfk`8A)z?!02zjt4j7=J<lO>N8p_8n*2j|l?;Rj!0f}!6j6+H<=>wz8~$Mp2|{bOS<
z>PUpdh7HTWI;0~G`nmTxaMOm*H~moO;UR8ZPrDa$0?*xb7EauwN2L7Z1x;7}sJ>z%
z{`c@^?Q!guz6&#iR97CL$$p|Vl|uGRGIzg~s2_*%2re^UHyIWHsf$Xra1&@CM8`J7
zKL*#5oKuXsE`!*dPDn!xUS#zWD&!9C(m`&@EhA}@Iag|hhuxxeUZXv1auhMoQymFr
zeR2t)j|RcXQ$QrM#6%l|+2~NQ&$OW##D2-X@EleVeJ2k3yA%&VnZSjC!bS-6U$n=G
zNm+hGaqnG!_wJqBlvN}Qi@f~&n#^gXhriz!%y9vD{FhQ6@6r>)4>#rY$Xs7SKQ2XF
zbw^&|do@=8RwY^h@&P}-*AXCKk($5is=xE>+`%R|d0k>*VZleEJsXD7@6P|v_fb@E
zA-&{|D|k!ruEzOfPaqFVUy8?m41-Cpj8BpXy+lO8`i<QQ&;DLa*r7|3$P5gC5gwU#
z0Y{QW2}7Ot(^32@ET|a%Cw#NN2cp<#EF`!UdOwf5>i1XYAvXB$7~lh^PDx*UpY!?l
z$fne0e@{iwz=<iDW?55+QwVgEp~w9i!iQHAf3a8~IYDZU+Mhxm7b4_o?>`%__37y8
zVG8Fpp(xKvBl3<qVzali*)crKE+{C-ZmIX!5dwgK=kcU?1YOE7N4pt5ugyJq3WXd<
zD1G6utG@56+XV8GqoCFcLS%j0ybS0gnj!~FWat@s86V+~kUc`=%C@_^+wp&FVJ|pk
zurm5R8EVUo8hejdD;DFU49jC<pI!3JX`QQYw^<y#R=I`EtmZil$8gxT+7FWz(4<?!
zSg0pgRJ{m+d-??k&5<pFZy~Sq2Sj<+dFR&2C|&~8R4uKA-Gzk8zs8%gae1O~B7wtE
zuUh3%L2=qK47fFf4?;b~GPoRgJ~gpzRQUnYsjO;5H=_kq%raO3a2Zaen>ZTOkI!a0
z@1`PNMPr;BU&*AuLC$2By6~7U%0+qqJ7wj2V9KPNmII>QgpZG7(;TniVf+0K3;v$k
zpREp49zpjse_!)Ek1<QQ3Mh;e;RZ#r{;g%Wj4XS}^d-LetS^t0XTvuY;aP24cPBWU
zKOpF$i5lbSn=x3^n<ctgWW;)&9v(01{TY`BM)r7=C8E~ylImNX*0@3;;~lo6z044E
zxIcVkM|%_EaMi9D^@W`D^mN^F78VC0dBh^|SE^L>##g3>W1pyuDQOM(c`|LA4X!lr
zB?<37gfJ#dQNwQqFiLLKq)gXLCtRvIIOI)~pF;8o;jMPR<=G{AzQenpXYEI}qg25N
z(;OJmZ*Yo?h0mO)O#28&G1G-Ik(k-hm^Q}W)B6G}dXgq60{PC|1gI*0AWOgHq4)(w
zd`e9XAS(omdA7bfN1XYxhUJkp4q(k-fJ7PqEe)|B=8-7}u~an*hgd=9pC<-Pj!69%
z9w(IWZnjb`coLpN%#PbFRsxA!Equ(Ap1UHs(sE#?17jL5IHvMJqP}w1h^7=t=*bIJ
zd`?sx#Xte^35{`3W%|RuzRURb<_r5T)Wh^hN@vkJa12A$GUIufe-6vqj>7qivwr{+
z=a3}xLVOILm;B-&C=ud>2OhEEV-n<*{=w472&YF#`1KM}QXX1Xd$AC?>U%MqDRX^_
zEXQSJ1Wv5Jq0V<=W-a$$X9i4eZGEBJGaalrR{5rm9cp@1H|b?)3LOTS5L0E<@H=n9
zQrBR>u@vqhpDPjyLGw~XP|z0&-6BV22-x_aA*k+%FuDHvr9>UyM%ZRApe_@GTSz5h
zs82#$15_GV>~@W};TS`VXx7#l05ot@BNjq^TTmfMmQoUuavlA6s+i|Bid%64=#SGq
z$6dFSOk*=S%h9d$GR=WwQp<SASu{$f%$}T%YWS2^R4RG8vc?XpI$0czKb`ltvqmx<
z7YrO{ZtVU(B#S;5uS(CYdZZB^u}f&yzHv0~xh7JMA-CIbS0^Nvs?6dU^u#{Cva+&y
zWcP~H>^nZn;87p@4eSs(i-OzGHl&pt2!LX)76~c_fuFXmjoV%+L4?(LS?^jDu$e=1
zCm)w@yopj-so`)kQ{Pn8tfJX{u?hkpBnS57Qp;ZeMHr*NnN3fj)z~auOAq~D_f%iK
zSG4r_0mKYjKb@MT2c`vsRf)_8SLXs#$CUk78XnJv5G2L0a#c<_(#RkP*c7SNX!a7H
zk)282$!qN^Wfi~NIU-imlm?7a{G#M!TeVV?^g>`KDf_|5LAYxX$!azQ9b!N21nL~O
zDG_vMaQVE}*flbz_0o}gP#;C7CL}rC0&Q@0T?w1f2rDcga}z+|oRTgk!M`%rgkFhY
zDYSE6nufR{^5R}$<*wvGcHQ!j+AT<oOFnvgaH^pb@RkckmTPge)OJ$OXyhE#W|xw3
zzeO*9O#0D4Iov+}pibWJiJ*t&y%OL3YImY9aqZ_ZfH0Y8A1mUcWr>X4f<*qJz?v`L
zWJvX*n;(+IPie=_d4bFOBg^g#cDtQf&iEv{>)y1D07c~4DzzJt4)B|fqF(OIuo^8S
z(bDltwVPZyYyD_lq?J5<e|74ua)f2a!`RX-%Ba)It&y08nIc3;V-5mhPs!QVwxt5O
zq9J7=9*s>CL&J{%B`I6G|Bg`UW!CYwc-_3Y<ma58cHn=z%iJUcJF(-5BB6lk<H8$v
zjJxH2S(MH1R`=;v^|&+!VXa$$voXD=IhK4p_FG6xH(Ng2?x(IazZMy8uJ4eVsr3%_
z!dkeN<r{RZaJuPHr?qK`OZKS1M=`P27Os;8Mq4q3RW)|CP5}G`Ij|SeE6Cn9Q|L7o
z*ExAC_DQ|aQH^#wBD_Wap|<mHYA=@fwQslg)$>!!Mzi1u*I?N*AL!xK3XMRS#^y(8
zT3$HubB?u!{#L7zsBk$4E16?j=IO|1szm~@&1(L1{An-V?NgB$$0biRK*0WhqMKtU
zR7Qz?YlBE*<F0Xd)wRoU((dcHTi$CUgFWLa!K2kGjL|)sxg9muDtFEaQmE#ozHqjr
zNNHK)$j3iC+dHSeUgD2y+B<+E(cG$qAa7Dt>v7H$wW)5BA;!Z_>o5Km^1Zh`ynCBt
z1Le%RfoczRw6gNRaG^f<PLDM6J5`Csd{f?EEZG+n+?SZ)ge2nOXB=-umAkvSNk?<|
z*5ksJsn%Aq@T)Rd9-)n@)05bCRFy+|aTibAoJ0&&Q*slIBP2GkCrXGwdv`L5Lx*OU
zIoDY8=Ww~$g6=J<p^D;6tM!Ee#pPlI5Efhb`s7(|`cnnn`ihXf*>sQEDv64+3`6j_
zs<u;B6l^AKdVFe5*F~WSN(vczSa6x2|M3DCesMRwdTkbGX8Hl??33a{r?sz7hFm!z
zpI>q8Q~Lzn{zjdWsZ@*7fH=PlcP8V4V#?`DaY@(TES5TL#CSVR2$l>FB7Q-Zl$t|@
zecd3Krk>MLx_*Q^lf!IIk6y+2q6{0;7rWm()8N@6D9HIzsj+HPgsj;+ABvW0slP07
zIs4US&L}{fSXldd7)Iw&yotLNC&C_-Cs%ADJADWds}SH^elrqcE7Cfm6j4n1*JZFP
z`m;${;DxMqZdz~jOk`hjWm!CzoYk7<FY*b_$nvh99?B?|Hn+=m26$-VW6fKwj*Avz
z6bL38V6R>%tKUvv?-oMU6jBhc^*ZyApWXvq?Ju)DR#(EGypp5(0sq_e>3ZHxEvAyl
z`GQwOo)hkTb{Wm1c+WLU)Q@yd^HIce4ZX@#o?7T(65g!O9!(wAta2}}wyT;UHQ!##
z_H%@|y)!C|oW1JkJ9De9S6a(>h$K5C>NvKn1?R_OF+XGE?7ZJAa$4|Xp=&h?EnL=S
z4PF%mWqkp55@C$DcJCNdZ#{GIjjiBW^{biAPT76HFGHN6Kv!`L*W2EspCYEybu205
z8%|cIW474es=comEX^APlO|TV1^Tpd25!{3QO%@1p$SL)>Q_7Z#<Wl7tFBH-SZZTh
zO0ZATsuah*L<d*tNtxqnx8Hl9ta9Qd1%JOPeTLEJF&I+2NLPq;a`W#j>89d?y)%Ux
zWc!9EHb{~&WffJt&s;oik+HR-&?4p2%PG>1%soNJcuMf6-ZZ7F6q{=mv2R6tFlcYv
z8C1WyjEBypDUaz86*qOqsR~!@;WDSZY;0_%x}7JvqU+NOi{)ktDmI%<Cbwd^h%>>c
zsLWYocUIA&^*%8YMh)5#w-5!7?qn5z=H9Y0B%?i(OyQNV(%UrbYaxr#jlpEbPsbfD
z<O_-$OW4dp>0GCDLd9`a#pfypoRZIX-g3QPd|FUqKVPHvj_`ZVy0kTEj<!$R39XcC
zBVCz43ObDR2n<wl3Oeie{i*=zEG-^?5ZxnOZPgxHlc-*N{?w$14+s@<=h;b;gA&F7
z4ne}qh_jGMS&=qYNu5dNzjf6TG>Wt0F|>HtYSZ}CR2EA5>67wmND<y(2_iQ5;XSJM
z(@rtt9icp~+wBb9-Y-w@+4B9}pRIx#t3NI)=oTrRo1Q+|qZ<;>B@v*+Jk*U8=np7r
z-WX2z=q;H}YD-i#J^aaEt*4WI+EzAxTU#4T9<_Lfy6xAuyz`OuDmhD3>0K7IGZk|w
zR~(d@<}x}^Xr01)!J~JzinH54OH)SH7}l~B#i-`JT&!?%dD`&lO!6ep&8kQwLo9Z3
z7^fZ~5Wg?X+6zire73L7CH_V!2}^G+QXRX+bh5*i`CnId#U(kltu*=C_;S-7K@09R
zog5skwoR~(wE7LuyikCYey{BqCFQAo<HF5jk9X2*SW*H!R!xe`>E6^CB&;QDG&nER
zxzN(Pe7<~?Kz`pj22=HhkbE_@J|PGRsuXMSG0H8f4;~(=sIGB97t2p8D)B6i>zc;q
z-i>!Wo#8-N7^*bE+!sW6<=wbw;pAZAmX)cTb=>(^?NW&Ca<QHJop~mqrMH6?cGcFD
zlq+zwxh;=Dwy@qYO#As!jqU!PM~mvKB7VxpFk5yvC?+qNPwKd2^qjurugu)1Ft|bz
zuGzek)+QGgVwKP`a=}I;PMF1EqdbmIH)OY41rJ-qAjyZkf7i4Ftgs%`F)TZ!cZA<q
zYd!P8urS;a1~|p|sKd+g)YoLY4V6(pvrOAj%?^$l(Xe6)x17XID}KbO`nr1;7-_rC
zhX2%!p^9{;iX_$(;_g~@r~J%pSm|G9S8<PQTK7E}u|*Rf4U3*hSx%tg98?jt9zDAS
zcEVLvX8WNeH|@r<A8V%WcJ`!1&0gfUzr|0)ZHGA&S)=c8g2UnFaD~D)pT-fiV>u-|
zzmQ#vO0KA1(k(7|?gzY@1yHG0HZDHUwH8n5gwcw@>=r}ezj-DOSi6bEIlh1Z_3yA}
z`-Qp4NJqyAd)MAR*4!#C=$D?NsBhT3r)w_^8&`Gq^|?5R)DbN7K&?Mo;j%{@LwDVG
zo%rnfdK|x?C1jeXhpl#uD|gps=|AO>YbPh-i|0bNU*W>xs=vD9Vu!XN?pZyt&fnM&
zF(NJE3IV3y!f^R*n-5HCHh|erv4_P^nY=s3WhaE*Ey;E=asqRw;-_~+{1J+4%suAj
zNgt1t{={Pi+E-H5b?fj+sa{O;N#N!R+xbDMv|jW&g^L;YiX)+V@G&{AT2(uIQh{Ug
zV=bwTirpD%3tEYVfG88$N`<tsu$Ft+Z72;q)#{#70cvoi-l1xi`e>j<#?H+ue#sw3
z=(y}@bZ>Ld9`}16<DW<9Hc&5@KIwi=xR{yhZQ_ct6`WgupmKq;oFt5vfxkuj!s>Su
zJ5^EX{TND<8_v@+GsJO}*0a;o-_%qkHK-@^)9T`my|sI%0l}Q@lr;8+vNT6WqPNr2
zxbd`SiF@8_ooSs@M8ZmME7(`LF_dN<(ZqJnabjN{kR1vanj61QYx^P&LoTdQU(lp?
zJx|)MX}a~Apa(Fglu_NH3plGJx|`Fjk8(ww*8slDfclbOBCPps(OT_9%3@y(clpxB
z<6GLtrJOTn`5hfaIRvx|rmE8`4s&WI6*kFOJ(Hh4f<Qfo@QhCYbX{@{ZJS9T&%zrI
z&QrH8jf%%$RH^VpM-By-?8npXCcijO{4$R6y;nCj(|0Z|mo9#esf2Eq%6Zjb&K=jC
zu0mO$u_#P@JI?C+G5&UbyOE|_EQOKv89mnH#^NFw)j1Qi;qN846S(9W1HChPGL;NA
zj3@-7Wmh++9V2IS*{NPMlv>WIz+8DtW92){ak9I~LOaoP^~=eXh?cBwF<~v<-1KG(
zvY}Z6Ig!%04Kpjs8H48FrajT$CRF=mwaC|)chU2rSwA}~nT-m+c(2<)dqrRg9j=X}
z>^eO=`hE{=H<c$!lj=;0Z?xk8YH}|f|Eh_HX7X^A`h(YA&U(4@opaZvCqWCy*0B-6
ziU>?2W9i$iyD|J9NLm?OTypV*j$-W2Fj#1<Pg7f$XXeUV6?|OuYkBG;lzI4di+Pur
zPAs{Q4m)dntAP?pq@GrTes9b(_0{YaW^V6eHgL@2wG)h%)>ES1>gO8sQVYSma8wjs
zJtYhg#l-AP4Z1@WmcABN>~*dZ>Go}YvUR|WS!Oe$8qLbuRCW08RVo-$O)oC8pZH)I
zQ5XI}ceZ=EyAXvI<d$b6@15}^;y-0~VLEN8+Ehx1W-g90g4uz6i`#<uLHrc6Z-ucX
zb$cDvtJ$^HvS_lS;Or3fDt<7Ivj2wBoW#^E@iDVhuDniIC4<%&?3|wEYD_AySvqrn
z_GRbq6yhHk`N&&Hb@P5}HioOFaaVxH$}-KWJI9;I72_R$H~L`5oK-_fA4zb&KRv?2
z&ejAt#m!UgiLdYI{7&54=rlYpn7>in6*3)l!B0B$ev~o;Nqo#fyx9O&P(jl|0&u<#
zDSi<t{&`XdcO~_WYRU%C`SXqOys}7bfRk9$=<@0Zi!CO=BriJ4mxAD~JP2g{SXGN;
zK)P3?WTnb+)=e{Z3_Pgl5&B*W;E?4s9YJx#raebZk(Xv*^pY4UzQ_&m<!;lQYzj38
zu|i;alJxZ4B?8q6kU<B5GTv@5hh3nrqvKN5%(crKpHx;WIJ_%rUuS+0|An(|H?tjH
ziDH6+zlV9-ca$hAtY2B!xo1XFmW>`=ey1bA&~7A3-5JeTb?xNk4b(j;zKOGXy1H+K
z7AAI|-mAdpl4iU$^x&ny1?Qb50KO3M$KY;zw3yw_(WLJLetf&Os}WNI_raRG*KBTc
z?Gvp%GK018r()KOc&DLi9T}^kq-~Wot3R{oWddVkV<i@-Z_6P_Vjhvf8PsPpSKaqi
zO+f)qYkkKB6YU9J+(&(RKwR2Xt&@B2l?r|bfT5BjA#8ufd<uZR3D5U}nPhOzC;N`z
z&pw6=v3o{FT8p}%upeAwsI9I6RT%uGMe1M=5XFBbXZp}Ij2;1T@hLOj3CNX}0`9r+
zy7S3H3yWMpg&%_MVaLli@Uo1RA!-_+=C6QR`Amsk!|x!0dy53}oO#0plwa#xun&W$
z=|gx$w9)S5rzmEmqOq?=`f2MwycFtb4#27bffkifQByaH-hVtm3~eutuWf)CR3JEt
z)+6pKi^Wpo1^0`W;j<G)Ilr2uotDYK??qg+wNAA@Zt9#6m}N|8?Bg_8QttJgfWU+W
zKmOo^P1Gd5DzQA*@yL!-Xa4M6E~pCtVBdP=6jThj<rnYgk_GFCC+<tLsQT`t;jegG
zGut3l4`=ztCvR~!v}k3LxAF;MzWH#W5mY2vK(7U0#1mfPQ6*<+RH`E?iuPz5yD@=s
zddmMrd1}3QaoNF{+iJFJgItca*yg7?!c6BC7K%$tOM9t2D%7i3j?UmnXK+D1f`_|S
z#ov~up0BDCI?JIfNGUrgrjuMTuf;*6QwYQt%wSn#QatEGq9ZhNfhg>Q7vt7|yCM|U
zCS>&r?WaIeDA~e4$7+j9)licaJRvRhH_jX|V6T4wj(6q4xY%j`IX(S;anX3@=g;8o
z?rx-o^(V1s&Ge`Me-C*8q{XF+QTOQup}STM_0GE%K?{GQu@C`pAC7#P?YM3Iq`)wg
zRXOjfL22;x9&9wT@A@(_*nwD*die|^YE$Ip!zq&Q)oTctzCQwPw)h8;B+J7ne9YOv
z@V*S%@mn+nV+GZX5Tm)zC1Sz?KfDkoN#=q(^HZBz_gn+oD9Ph>OBXVpBTosG{#Ib6
zx$OC*C83mawSBl8RrB=&4f}E!1?tM9=Asz+;JZIq^&qOpT!j%XX2|1?a)oanCOCa2
z8To;@t?%;h`KE=x+JE|g$WE<CXlHI9JGq$tLw3TokZy4$1b%|)(04t24E<9)wBUX(
zc#))LiShh%4AcZN+7PC@^$EYtqAKX_^otjBok)=}j*18BrAz%Oh-%^#DR`@9L0qRl
za?Q>KMMF5vsY~H2%uvOKhb6o2s;{XTfAaKcMUyZ8DV*{`F6_<U;Pn++Ko<Y$A`m(S
zsGBnBYXyC@zn>!iEbwIsLIVfV&*X&9;7Ixc)TcRiIQ=J}dtxi~AK(G<;d%A#<EJjX
zZ)uUz)6@F^6}3`I!&qovhVRK;aP%OA$-{Ix?tqm;-Gg7a__O9IkUM;K!-L}rLn9zc
zhq|$2$O%JNST%g$t4o2$j~)9|T>PZJzyDKVA+M8I-6PUueja*DsLcG1Fp7n72VIRS
zfY#@7N6ix>7eqM#%8%VBPrB^NoQW_#|InB`2S*mma2+N~HkZYO5g(@h{Y)R=tvS4;
z<am_-?|HNnX_j`GOP+?Bqy(tbC1~`xiY%$?<)f<(_w8gcAXoI55_*(l)DO?Mbab`1
zQ?rc~AA_uxNFrc&z`1n%zc?2&gqFH0d(8C*Tswd_K@9&p5XrTHaTgArL*b}u+10?*
zZlJ4FKQ>?$iI9#gfy<_c$7<iFfl-gFlu~MhKt5wlb0L0)WsyLM5N3<=!h4ax&4x;m
z*LM`518%@TYbd+wgL;g2MP^}cE&%F~1XHy2pdX(T0COnSov9JcQm?pYadz-Gdl{36
zu%MtX%z;A4ur-E~rGLu+I;Q0Y3t?pB<A36Z;U|SUf(7@!yP&el>bH8u#vtQ~2fOxz
zqMqp8d7a3u*V(5$U#Vq?0Hk3Byh#p7@cBPJ4gRePFuniO(o&v^is}<U_Z}V|5i{pt
znoytky*wPoe@xMa!hwTC0FXyXWsG5<B4r@iEm`s+XZe9RsHr8zo`Mzf6US5SH{on-
zu&qNfph;`~n*%<C$4vqmIo9MO(31bC1JYb2ta%^CgbihDwUT@*b7}czTX%c-B1GvX
zU#@$e=MSYbw{AP^Iz4}D_v{`N(FH<rD^JA5`^mGP54F2X5Qz%6UBBJm+k5rPmoJkS
z3xD@Kz08rribt?iGF{DA2<7Lrllx@xbC{l1SR1ZM96{sJ#KSzSCldYARo#v29T)Mr
zOHRx1J`FWCFK-ly;(ns3d>|b+)%aUJFwL8&ssy9NuNoAgpK^2MR)2=U<f{D_h3ep1
zZR&|)s-R&!6Y>r<=6TND=vVB$SOUK@52Rv#*+l7u!kTV+&>i3}``@!IqSpuyyYc1v
z|4UAQYK^snx`#<Ly?pcd=wZCKC(gF`c8*I{zPg~cDyL2Cqy)t369lm0xDl(kz$hcx
z9s1kRY>*P!rnz@ZU;d`l@z&26^ooES_<OD%%oEhnM@KRDJY?z+8Ac8soY7SmVtZl8
z%a*;wcbkCzDaX^hXu(>r*0Y2SPRJD_$nrY`d3j}2KW6s4W^;*nj53!U&pR0TJI*k(
zX`zmN`5S}Z#}_F#d#^<pDdx2AtEA+vd}r6oPaTj>U4T>Nvk`6KUh%K~2c@mrbXK>i
zFhR<z8=dBfhAB7NK`6^-9G<wF*KZxGHdOaWU^>?p{b_wguJC<Ot_Nnz1Q6F}G5x=Z
zS?R^I>jyC`$+sd{MRpE%o8(#A*oZqjSLPKI)Mkb}uTR6r@PF|}&SLoWnEsY@v(hgM
zCqp>6UGt79*>F#t`#y7DadpYKV8*8*`uVG~ip2$18sFKg_*gUxQwLICn8mjCdp-&>
zPfX}eRU6bQ>rs3*fNPt&Tezg|8CVvj_Ody_HsV*9{qj^UOWj+)Og@X9qbB#NqxbK`
z=qs50o&qQ}0>tfp?B{1tFk59;^4zust1x#3_(-QjoE4P#)n-Xo@yU}J754j}*A`3q
z%QfwhrTG>f;CSA8rg9sTx0-nu#x2vQQ>vZFJxtr{0`X<Wk3MPVY2J<V^~tSv7fRxD
za#IO7UD!}=xSDUEZZKJ(+V^-4+NW2^@`QPYE3>%1a<XaeT8j~5OW?a`7UA&bxq$zS
zOIW#JlRGKjh{G1*0?Q448U4hxjX-U)QPWOp^<*#9wTt4PID%gJSTVz0CB<mslJ6A3
zJD2#bPd!3~ZX~a?O=m%b(ffS(t-?Sz_DS5?q#?WMm6w`v8M2v64HQCC)do^^Mg9r?
zLMvemGHmNypZt3wu8GI!Uk0rbvPpa<KD?{)Q%86^lsa;=Qaue0^KkG3pAxC0tw8sd
z3vsv4?3mTvczC#Fi1Xns%*FREE_)mNkasX{G*lu&Dn^wqi{7KI_v@z~rFX4#)TnhZ
zr|$X;=nPdpYU!L(iDt=eGZQ!1XEw|56si-v*TuGIP~5SU`nA}9DK(F-Y%<D$TmHLh
z{YLSPD}k%Uq~4F#Ti;lmLL+<k!RnGvNpNNRfr!ZKo7~*>ZEZ3Ow^yA7O_wfUT1J(u
z-B4K4_sG9l0wsd4gz{`*Nq@W#T26$ZaQT^z>8fC$*|HYkdl0A5%bZ3;O;Bp`v7$n#
zugo^LDV#Tx)+0`T`$3HUL&+F@!UtCPtgt<?{YpM?SsKlEA(W@4rq+7-vUkogcIEeq
zx519g!qU{nT_&0Q3p99WaTJE(=|xnGzUzH7vI`#0gq^LNR*3&#>wS7Lrt%0BL9)Y9
zz5!6+@+9x0Drn?4f*2SWI<Qzqi^--B1qB5g<TNY~WAypx@i9Rz^7lW3%ms_*hdH>V
zI&AxPf)19n2k-3e=b8&104=&4qwM}TIr%R!NYIh*-(CMd_TD<Is=ZwsRRjc)6hs;%
z3`zy0VInFmEe$Fu(%oSIf`lSn6Oft+C@GDyly0Voq#})oQUU_sGbXO}u6n<<zkSZ$
z=Q`)?f4mkh=U~h+p5K%AbKf?VN^k*y32EC0g+}0@{Tw_1`#E#61NR_2*vGBOGHJO}
zfYTP3U*0gZ=>uS;?C2bsT_5(qpv>v2yQq(lirkOxWkudYzzO&v^)U4aZs;m+7{JKo
z*o!$YkJi_=a~FOSxUl%HyJP|G(Zc`1pe!yA=!KALROeF(q}oDRB+q{Wq7EO5YW#(z
zKb%2W5pZiZqcwUU&qRyB7dytzK%1eruP^#zY_Dx%(GP=7CsqRleMJIagG2&@`xYL&
zQ8B{o#^iV@dpi~P_lYX|to;>V1?4p5Ku)%3lrKH+W}*<}0Z+qEZtWq#bf7C+NIi-3
zU&UN#o%<||jE{lrP{GvH^sO6z&$i=PHA#;pG=i!F>I6QZMY!VPpFarEnYRp#Y=2%v
zz`cF{T861P%Br^N^?gQS&tkPH5#VQQgwAz#c#+Nj^fFmZ@iH?rd%L??Ra8{Y2?~B{
zABECNm>ND;{vY@(K8B8=`&hLsn*PAen&bGGiP<Gayje)6vgeiNai+5fM?t%bNmxQ+
z!A@?LZto^tS;2z!53UFeT>|U%`ezFs|9B}6;xYM2+N@Cfm{~v_B`_Pibvhydw1Xjw
zIIDWX1tvz%ONuLfMfk>Vkil*3UK03ggr&p$$IZ0Ftc5D!tZ9(VNEP<K%1&Q6q=PDE
zjtkq{y)~F9nf*@j-|xH!WD?MaN5^pMv;tH7gpTs_CmRewML%*v7G(Pvo24F5`L<&q
z0A7`gg(MZuX1shu4c$$9`?%E9)cc{!KTQH~NWu629S*5bRmGWgOHhAB=oTC@JqGnP
z90AK8VSp+q|ATg8bw&sj1~f)n-!F1jNkfACFAqY@uN*T&rt|zPpQ8Ewzx8BbvIC8B
z(|gHhZYZ*~r0rI7h=EP5>>}4*Mtc<Q;HZZdE;KAblLR~eA#=~?(NXIbo81R;blGyd
zk-7F!S2wqy5*Y*>#mmdvxx!7t1eak%jOl;t3Zu>j_VlPjizg~On~RQ~-pS(2Pk5mI
zd#XLK<sXmxZ*^hqDD9p%4G8#xIPp7o?qp2!KrvFNoFayGx5(L41>ec|w?`%(Rknw3
z{qsfYs;h?m@wD4|;O3Ex{@;K9qj-E%XP`Da=yL5EDlsW3tDr!|%iDWG$K@$UAZ9Y)
zg$Jv_A{CZ$3HHFSN$DAxxU)wLE|Kf7qBHJ?QKA>|jZo+KZy+UHtBV3H`6uN&CC(7x
ziL}^{4db4AE;sn9DtbFNoWo>2jv_nqa%qV#5EFbS_HvwPRG(Uw3jRkZecm`{;G~vN
zYxv3tHndo{e1Q@<+zsFHKAoA^x|rtrHA8F4K7MV@4fZkayA9-T!9JbS3lRXu{>6he
zw<b{*9SrQm9o_zm;NPXZ<>Z3@nA0i1ydq>?6EXboqrM%9VEAt@Ki*rV3jweoy-uhT
z?wjfX{yPw8HsfG^(gI>6#pT}j7bC+0=bD>CTwGo6`?X!>SDu`Iby&OYA^Sf)c_OV#
z0NpJ+EpB`-oFZ5M_)kpkAI$`Xo?Bh&@x(=*!-tU44z?w@po8pa@dcP#Ie~WdArLM!
zL&qs)R_CbL1+bOSzqOU$#o51c&1YqWYSW?oE9LyV+it+ec<nrgp^bMf|6Zuxw||V_
z&piyxm-Z{GX(+6d2;>=meHFhoEdG2EUw7BaUAX5=*}uzEet)`o@S$75g`mu^`8sV-
zFY@Rs7M_M`zwIXuyR@{l8L(QvSuZ@-zed{MJyVFmw9dyMWjP@BS7#321Z5}@#p`~5
zj>rQbp>rK-J?gkOaTl_nBZOJF3E_Tw{O0Qb_kP60sm9x);m<7^HE-WOXzC8eKSD?1
zg`<2@GXm?M^D5b+|8|E@4AX_RtlUMpp0lQV+8CJpQpP;423i5D7P&Jg5o<@if;_`^
z&*9!R$l3c<_`@`a!#)4kCIqpT<}a&xH_6P>E{D}ws6Zxgi@kLR*Fg9M0I=cQmwt{j
ziC=iApVkRCI;>R*y@;aN)9YcVvet1MvcM29d;B}qI74>@EVaH)V@Ky+*f?_vX~8Ib
z4!INa;$+Mu`kx+a;Ne3HqqPRvdw^UAi!XbSYb5}e|ChT}?9Y9Inm_qfUtiy1V4c+a
zxr+PW;8-`s9U#aSgN&43snx?vN-f%)YT@>x=gv{34!CwEivrUBXnlq$C!}5naT!07
z?ZcxTSEs>c!?z#@DGH9<^If_>oh(atVBr3nc6MZq{iCC!zopiE%+0aY7hUoHY(w}M
z;yb;#_EUf`Ug6Z4YWFX(r!=#hZxfKzJb!vgj#%qf`r`O`BgrVHCmBja9DI=Jh@2wb
z%SmA)XRZa+qk^;fUkIA7Q=KTVzJrg^o^*3JWo%xIAeC%G7Dm>Z;w5nj^hw@pc-YCd
zxHeEj(X66?IRuhBG@~Jz{UI~*!Z8!pp<5s^HXp6={K}oJ+xvsGZ?Et^Wp`>M8yZ|$
z{0ECUS+SJQU`9~Zkcq2$0tWE(JOw_kvUczJ!9*g+wJrZmQ`*-$@xWkCPezd*^wT=t
zhZKY2DiouRNViW>oeX)f`vNzSSwlyw6>R1>ar{RzUuZ~2Ler%;_Sd59-@PU?<v;bl
z2F0Q66?FUGeirFno-PW0xc8RLE&>`I3!4$<X3h7&|NQ;U{omqA5uC7tgBp$p^<@SL
z0@YF%`D1kuu?NGCJpfVVuLa~E9kXHkbtH|RX=n%#6c$!8Fi2WiTl?A8YTNJl6;(dW
ziD4fPhffL1UXsIE1{D+-D#spA;VPCIN(Z4}5NJ*TZdVoPr%Z&K^NJ~=^+{Y0R?N-?
zF0j%VU0{W@9Q%vHO&qy!!x1<is)V6-IAaCNC5slhsm;KlR&ctanqdOv=58~iw+8RM
zMEcDF8_uWRgoYB|UO(|K<_T?7k}jPpr%A;GQD=)Ep{HU6{Tr8JcBvowT7gLgt7k;&
zfhX;lYc4jynG~`l#=ckD38*`!?y=3V3P6(bHMlr4Q?{zQI;+0kMBl&w$*-aD{2%qu
zuw2nRK8|ir6jGvRU?}-D#{fKdqh1Mcc*ViA^F&oW*5*N&hMx65r42CHI1V^3{TQaA
zSC=6$Kl~?qM-dXl!ios2dPRX;kywd4Z1W@4ytw<~<Jj1vme4<m0P~N4Rl>hHsrVRU
za+bX!wmH6GW+qoqP*7e|Gx{(osc4BiG7UZnH(~p?H*qEqlPYfaf2AW41&c%22c%XH
zM%dKC0|kvK@vvWAJjh^q4WRO?S+2<G-|r4Tj4&~H;YiY=3IoDK4}<ZaW%<_@=6}Ys
z@)<vFeJ#VO=IW?r-F-rb{1d&}GX;DMI8=RCr}H@a@o@;T81@4)!Z?G6|3#+h`t72y
zJUa1YPJOr|>lQF)4f}&t?>0s4tvJLdLz`y0J%N2|{Y#o5d;cEkc7rUgiaReNauVvK
zb3Z3YKTY<JLqq%7wm_S|o?X<{(FY!qM0>d$nhUYnLGj<SP6+wErdB#9D|E>)EpZC9
zcC&Qm)>}wuO3r62kmHV$Aw1|88jo=;?&idR9kSs%-)bI%a%EuVdt5f6<l+zAKi4-l
zIy!MP6*eocwCecUp^y^&r$Q<KrH%9r^NT06-w}a@o%=^CL;sF`;;Q9ZVM&QVGHDfb
z`e1P7m~Emr1DBaR`L{*b?=r!_2gDB|A{2jX$VCZwHy!>5&po%%wDx7}%WB<TVA_xG
zLkp7$CPqeTNy}LPD45Un=hX>_W06Yi!_V2uA1g6Pti!{@&p`|0PZwDo!A|bL(T*Vd
zr|<ua!r^Q%Si3a(z<~q3!^1qbwzj=PLkz!W7QehcFz6JQ8v*ejA2WKGh6z`W$?HOj
zcwH?K_x_eH|LnW91*3%Ow6m3}JXkI1lT&1&OhePZ9OuKm@juR`D8U4Yf`Z+NwImr!
z1*_TvlY>s%9;Z-+%cdU5!WlTsO7OJ#6k}?in@8gQ;tefpFx}XR&8K0sS?By4e_Cp3
zweiR__ik+C>;Dc*{qwx@x5gNtQC!^IfUqPC;U($WF0e2KgYEyv2EzZl$~Y2QzQ>Z@
zYaV~9<F+<8%>YVXychkl|NmOZ8FpNE`!dQ4sGf%eBu70=^}-^ka^GGUd1+ha#VRg;
zDUJeK#3&(04SHO<yM%yaq60IMaQVP}lBh<GEBaVMVy*nv&x>zFqL3gH>kE!@A@Zy@
zE&{0O!-?O^8UC7p`~_tJ1I^?Au`U|)HFiR%p!<R;<(O70(0YoNIOv3fFl0b}PV;;a
z*HQ`41Q&3H^kB-~Ot?V-S)V(R&ea23bqeT9`4qXAg<eFWTp>Q@&v*Qs_d!wWYRNN0
zjQD_kc&Cwm=^v}P!&<lDva#|16<vq6IQ}@`D1P&&5k-~b#N}BSdjn5v8&GKZSh``5
z20|kY!2TNR{)-14A3Tka01aOE=gPJdtmXYp`{Q@L^niOmbB_H?e_eY0KWHG^jff%Q
zeg8A)_@6<?Kc!XI*#2kG@jru(|7Qjr*f(!3(=juzd71n=N~|M!L7%(1IOntrb)a1E
zKQ~n%=$am&H+3h{GGz_dfwrjxQ>rX0(o}Wvy@?~EZGa!N-J60^_a=7SdqX<^_P>Li
z>63+o9W0*YQ)hzaEzGm&PK)7&;!)2aSiV9F;x>@S!vpd~A3&z+<fulhWyBamztVy|
zP)SH_LluaD@yFGa<h?eT2+}5NMs<_oT1yzn0<=;jdPu2qxmsuKp-r|E!6dnE0-3_@
zSu>yvgpL>&B*s`#=c#5yqHKVu4EL85OKM0IN0X9~wZb@|Jw;sSR|cH_#t|~4Cv_&>
zY>PV@d#$ql?0+2bCt!zTq2N%%(v<+UV8&Q+Pra#+QGB|!sM9~if2e(l0LpgNyea<~
zCHO0|fAU#TGJ!xS*aD(qW<bo;xiEs!&Nl<7L9uBQnO>Dko&an@%R#J_d-%@=^p~z2
zjC}q#vuI?pEM4xu>sPdXZ?7}QI$n+N_(5N0MpS9KGld&S_g7N}^N=Eq3`imVrWL}a
zCi&s+kx!u4wECYmeO)4(p>h>aX6AsMl~`Ep*8LS@@key#Z=JUPo`GLfqlbVNr#Q6d
z9%b!Zbz&VDi!W(fgywaXz|j?aq{)h|f}{|;YH<sfHZDTT_JzRF=vm^xIkWzAy1@rT
z@7!tS^Ze6PMHxM}H84qKk-j&q5Tih*9nJrrn|7$(L|1`!&N~AG7(LI_i-hfb?Duz!
zQ`}Tn7%bg1RH(LAQ%K`JWpLm5?Uv#F*Je#mG#c+4?_y_`FZy;*a}O3NBLC=pq@wXw
z%wq)owqODnEZrxfT+y>7Z+l)32azAz0aVn!54g4`ASS8*z+ID0zsBL;*RIyFF2ABq
zA^%Jl2PZRq8SudN-Lsb&Gyq9yFvGuXqW|HmFpL^?v8%e_TlT;t^89`O1%P^nkJv3u
ziMUnkpRw~VGvxGr>-UST!8-q9hHuDFVQjuxi&q*qUA=YPl#4-34Pk5eO`$D`(Ob``
zrk~bNou(TQjQsrdP5x4);+w9f0mD40IEI|@+hiZQhG23NE54wM!xllUp-lyR>?d#(
z2>MyPZ-$f|>ITnl8+-)bicp<EjpqcXy@eU72RBavTu0ipBem*;+Oga#E0?R6!Z$o3
zPgrwf)aOVYD$9wrmSsQZPF!3sVYb^?O3Z%}d2gb{@!OLSvDhs-b?fbDp%Zn}6OJ0z
zX>A4hpM+*6ZrRmH%{%wJ#>bpUcDUdow7xgTK%ohd0lg49H%SHBNn}F5p860%3>W&O
zPa~ae-7v$7#(c?6_6Uxfb8S*cXYrW8#1Vv>vxy}1P^64){-dmCJ06kTj_0=ojlHKT
zo%piW8h_NBb3IYTi@jXwDz)aBu^lO8((--ddty><SI+=lpmk+tz$)$_>CwUjdQ<!>
zkW9E+=nzB4{WXyH&_<Xb+ZKvsv7dm$(PZ{K$+TKoPbMFp8{W$<Em45AO~#u>zz&Tk
z$AIoqkil&=w)J<zJ<GGNjTEsgMRxkO)!l`Q?wrgk8!bAD`bVqomA-%LrZO}?P~`7q
zd3^o0^fl+{eb{3C&i<Z(Q06;XM>Vtio$t+ibV#0h6noE(t)Se<N@u;RVjy|8WY#^;
zRLyqvn836esYPxptc&Ms>+OZOT->--rz?H#Bgt{1U$XYF;2cESMhiEdN>S#%xpCU0
z`SpQHAA3(iXa)hi!J()<spHkE1W;<V60F>o$-2O_{PLbLZ~tBCY}e&v&vt;*DH|iq
zcP!S)Ak}uxf5(SSEv1H62=~r@!18WzPyPL$)Idu|$`xz5)C#Cd=aU>)QEnsQJ8V&R
zQF$osMlIjWyb2yzSV%@)cfd5gLkL7)y`5RWek#ABMATn_%2U(cB{Zt_5(>2TtrvXf
zjrce|n2G&hcEi)IZ{mMG>dboLvVts;7LVE-X?D<4<YEy4#M&LfM>j>PD6(<2bur$Q
zd+Nsc5-d_%i^#eH1t`Mx!cl#84=jvoxQ=V>mi)26{fGc`L~Yf*y{lJNR-U}ba!Hzk
zj#(XJ^B$mXJW7_VCBI|f!Lo=)007*k^0(<{;vJf7?w-NQyYzXT+P9C-S>d9L7JzcS
zfF}*OF2Z1<wzbjpD&>PTt|^Xx8gaIf*q$D?k&%(!u`zz2xIt!gkLZBXYqMYQV6kfE
zC6f{kRD5(+kC}-N^~oQoJA)sjB98m6=|Ofg-IMtRXQD)a&;wk9n0M;VRY(kOD(@BL
zT&J$pdD&o~9g0AMH}S-j_%>brEpoE$JC9PFqy{EMaWlYqkD9PmIMjO}_+O;o&;p1M
zE=#?4RDss%^7kT%_cnHb29FTa9-%}OYe5mh)^PaiF?nhkR`WoRD$_|_PM!vo*n;v;
zOye(<fAoDK+87PM7)n||`eCq_f}BzK$x<gK8w0>z3_JLdbFwvJi#7y-xedUYa}uaX
zk{#4Bxrctbf?P-rVdIT93)4ZwS+5P3(fs|TX!(tK=$G~w?^d{X;TUz<J|Gu#0yI9_
z8V|U3pD_Y!DZ*tL(s|W<uNwoTU&N3VzrDh-WdKb_Mp#PRgB+nvmLog0%B({75R*h6
zFq@ylI`rt_@=bh%qx#oD9`url%Oy6lVG=h4x_fG{Bzgh{M&w-wS?aS5kU?|Poqx)e
z{yb=2+mToTd{9%38G`G1#AMEiM+b`n9pmU?CEP1|4k@(hS#=^L>a*TKhRj~{RX+&_
z&*i2L^HIVZoOM+yWNtRj?DfRIf$T)w%{Yc@Do??O1%#h000|#p9+biO<JBULva7ke
zT*Hj;l6|rcE0M%ch2Q7fd)u2xOB2Pfn6*2)(dpiL|3s>8he`=AJ_QlJ3t>A@WN`s$
z@<V{L$w5;-^5=?`D?0#<{Bxnd`D!_)(6CYiWH#v#SSGC3a05XHG@eh<d-s5K(>7i_
zA#fVT-#vlm70I+#H*XRjzyb^_bQBgBfyZqon678S4lL9OSpRj34*cA?38aUF{}+~k
z|0q;*A`J3ie)db!C9rOk_*&h3Z)fr}<uN+Chc92sBnaJ#6+D0bsz%vjYZXvQN_d<}
z77_>Ar5^GiUn2y4(M<VAP|I)lTpsA#A&n>8(SdCtNVYBRA$XiWPmzBcuE!#cJAmZk
z5Yw!jft*UQ%@61V%wTowlx_Mq3vnD%o{|W#Rk<C(0*{)!)$M8Gz11cScquBBaoLo|
zfa%YbkumiGt2xwTj{|Swu)O;M6^Q;YQ0eB-m(Mu_iy)5Jt6+$Ko&f&to`5y4M~;OT
zA(iGa$bxkg9E{~15uA?q3rAd=V0Dpc%<`2b+z7IPlt+XRF;}{+=F|L+*6s`_i&X+~
ztusRpxiD-qpv91U)X{<)ceTYs2n+^rKSi{BiZo-V``~N{NY*SQH^RRnBPTOzLpIK)
zg5#N7U{Hen@G^Zbk3}%BxM9o6UxzITPz165hQk2-!s?Cq8M9w^qhe{{%EpI}r0gM$
z)f=#kZ(z=!C2~00E)nJ0>oSO$nF~zjHD4nPyaCVg#*w$IY6#P#wL%FKeki=!Fcn=K
z&R5L@#&v#?!hx8vi8$RaqEWa#uhUepM-&qF3aA3nX>}%`*IjvDG#E!B_k<4}I0te^
zlR%yn$6)tD=8DJ%{7N7t)Ewhuz#%kH4KzsDVek1DE;RH?sKfQaYWJe{H0iXWAJu1$
zl`R}N=)}7^=q3r(@7v{>k}qXPbt~+EnI+zjBA1@njxsm30kqn9fLHqIy8UI$i8ryh
zuwRUT{roxpX}%bX%oha{7sBk}N!5iP+wMSm*GJ`Id~s9H5O%Zx<L|)cuk$o2%wbX}
zclq*@{Cp+Y;yvPbW?_EZLH+b^;Jaw*B~i6npe27*baQS%$Q7vQMydSFogYzOFEg;O
z)rQl|jxaYhHX(fHlL*C~aWs$u#6y(5Yl8KI6&bUyA72;yY&dz&Dw!neL>z1(0R_q<
z-@;6OT!1`OY@<ch?>I=DU0MkLF~PDMh0uy1^(jC=A6ZIF(0$t+s5b_(rNamj-HnQ9
zGnupl+S>;f<)k+sGQ!#B=qX~T1#Y#{j~k!u{Tck`?B5##Xt(AgP0>l~zA0C={$t#y
zsa^mJ-m3|FC_520s5XG~D!m;Tcmq<Kn%^pCTKleu7tC1NtZY~h14C-ixuKsg)gI~Z
z*I#0<C9lhFy20i!n-v#9kZj6u-sYLrJ%}c1c>&DsQzi~lO^W$B8aqIB*!d3m<N3Iu
zoB7c0yvc6je8uc;r2mhJ;>ih#uQ)#Q4%X7P{7v9TGw1n^Uy+-<mJX|YMLgt>kT}a!
z4P5R`u;2Fv;n15LoOgDNHQC)9Fnzf}xjdIR&;gXn9U>}EG%P@0*V9+goSo)8y-)5L
zL@h3Yrf!WSiCF4l^Zb_o?z%ti(Y)`DX9SKHb&vqT)WBt`qC1AB!+G+oBK0%x=CJu;
z5o|^#b_nD0W8v5-5AGV$y7M}P_b~-Pw&KsSrtSX*0`Q(M{7i70nA;SgmcBLiP@Wus
zlQcWE6h66(dTk>d_#F2y&s75LlNYn^T5T*|k8njHJ_bPRANccOPc|m2$AL3`@bP-k
z$azR;Yr|;>#qOo|R*$Y04xi6aO*5uGgD+6rx|VQ)>x4s}ituE6W|2-U%Lz4vYfv=X
zl%~M@Tg!>agn>bXfThKiCr}^o&A`UyeIZ}C<^$=(;)fim)jWwl4m>Qm|Mx~u+sg;E
zhX!805O0h&Cs>)jvGVTzC=0(DY28`1a^q6C&*Bw{y+u-#(V_Ty$rXFoxL6!N$<Uj7
z^+twH3K{(1)_<ZvuRVe<R1ROr;^;>?sc#R7*XMvP-*Zu=+7@kn{-~j=<bZ{s8unSg
z2d`0;i?U@tVGhHwt#5Jl$p-$NqSNsfFI42mp>iO;bIaFg=4pl1jHEi+3qs&@hqW(o
zhYPP&k8Se}l#H*CR&JHOm;gqhl9d%#W}kch7pQ!<lT44_M`;TmOHO1%eovVvlky98
z>SH<$?dP$kMYXk(!cr3i7)X%F+h&8+#N2xPL2JjYFnpXEZ^EvcUNw;p^KzpJ^IWw2
z*WK9(Bn|+>@--VMNI#4TPPnAyaIrOrYTa?y`mOGXrhQlIUE#;dF=dF6$ramp!*go~
z&R6t!K4?QcBsAHILud*;3#8U7yJCQ)akkPGbc{;yOGq1=q<?%MpA2D-hypGFSu)9$
z?q?|od7@_Ox5tJXuf&0F#NFe{6-oYah?K9Eg$M(aW-1w6iSe{WK`^#)3rRu{TGJ1{
zl9AI0k^EBKPR{hln$5X<PNHoN4`)8mTJ~71d?oIc+H5JcoN(QQB(1Nh%6Ho`79o!V
zKo*Tu|MF`xGBU-b;2=+Zgzi-on9!Ufxhtc8DZeev#}Q9JpUSVkmB%M`WlBgJBfXip
zg)Q=<+1cu__hUz-u=beb*7BBW%Bt$$r$)@*qcuL3TCF|==Hpng{6@G=eG^h5KK%WW
z_~|Bji_aE!c~-xZSmcJLvMg_O`Pcbu4UKh{^l+!sMugK@miZ11jdB5)k)$hs6L{6-
zjd#Aq1)C-f$bWhX3`YlhOm!pmd`auSzvd^4wCn@wdBvrdiiK~?Ob%8O-WK0<zTB0N
z*W{gdsVnViQ@m(oajJ<9Z@Z@(h#30yIG;$$dVJs0E@OzJd=(_m?n*bVi1ep%d;O>#
zC*W^htuSN(Ep#bjDF=#z9O<Ilnm0mrevn_uq1t)C`-3jqSNniulMBVupbo-7Ofsln
zaSEqP=dZMuYzlmby}|aF9@DG-QZrpLl3x}<P$S;E@Tz;es1}x01gl(7_0sKUZO=;4
zT~7l$s~kYBXuQ7fu^2`CMIxn`kbMF;2s^qJ??}hD{LAjQh1V<TlwKA+C|-As%}AZ^
z=b7*&)g2<{qZYb4ck6qk)aJ}Ppz$1iG}I%OO%9}iUdB2`CUf&A0IA)ACFb)mtI~1F
zpT_Gw?;^co7;~?U7Wfbi#k0x17<^^~j~eElurJT@G#=Unxd+2{{dXg6D`@Wan7+JJ
zt+=LRSxk`b+#>Zm5<4khL{CFcM`y0GT&41>akbZ|mgW`9ce6PoRaY#33D+rF2ol)2
zn@af7Biub-;6-Mehxuw*UfL%h3p$EuX(-w8iyaUZlP!A|{-WrPm1Ig!p>MC$e0$3)
zP9lS_E9B|stB)yxpK`1TcUBgbJ#tQ*8V>Tbspj4GM3;P5+PLL^hvZp;%H)W3EvV#F
zm`V0sb#Q!MBi3_Jj2ruy!9K{k^vX(I>W#+!F^~p9*sg^Iv{ohBpgu<2bAL1pLm%v<
z<oa_z_Uh7BH9cZ!a)&%ymV2!0K$p(W#Dg^vb3kD?&GG>sFW``0WnC-RSX3$ZEHPR<
zAOA9bISpah6J!nN+l=}6;)A-NnlCorm}X6#|BdDge4U7{)%sykQOJIJd{5JG8G;_J
zr@8Wms?WzG*RAwD_|%qt&h!;MQ#vA5J{W6!rX621n>@?zj$W?kc8GzhY?+hNFzaU!
zQ)$(k+i=U=)^0-h2q(|0$Iai-ix(}@FunPfK)`R=r+xFQ9jME{tPy+JzN|Z>2vm9|
zvckTk7DiL6O8joLzA5oen_fNBaNxThFI1Ep%-$&B-PX%U&zUu;96~rOJx6*q1%JuQ
z`5Yt86l9Cud`a$iX25>i;q^U0g5Fa{ljELO*-yC7t|Fkik%cA5I1>mhrzt{+UQpy(
zyD!!(cb=6b#}FS}i7p?%^`p|?3vX#Wzx-ipBt$OEriIZ^B_u@OEL7(X>y})Ur6Hvi
zWbH<2__Gn1cy`#DY*CVGb+n77eAaz&Q8sA4sJ?D#cG%RpQ<>C7DMeiE05hpsrjFX~
z*zWe&Qq80HA%qk?bfq9Fx}m$|iZ$Gd8Gb$(`&^B^_WOefl_l&%_dE!$xOb|xU6Re8
zY1c|A$S=_?+XX_uk#`8IsA26_i{{s)TKvG$VdxFM%wr2QFEXi$mE*9E-Hw8RNu7s6
znEky>A`4z|T2}#Y!*d{6<4mSumh6Nr6=&>9Yi@P~VGNRic^J+we0Zfdp>PoMQfI@t
zDbi0K`2W%fybKeh<|@rPy2!WBaCvo_`AW|3%(3i5>v2^VjQ6uw?%bsCn~c$km<`qt
zo0k+3Qdkrv4Th}J$!KdOM>@s(ia#Mijt_C9^hoe4x_PgN84aZs%aqX~Gr0DYZ2H%T
zuZr)LGk5~6sX|L}L25j6pXh_g%r&z#Vy(2&+<9-tW>?7{3qIilhQwN=cM{l+PG@NK
zpvsnRRnXfcs+oab&E6Ph@FG2aNcO<CzP-zub#EZX?Skcfl~Vj^=Jr*|P3)&}!|2M@
ztlJ~^-}A2eWDSKHCoB01@$?O%r_%he@9uJK*6d^qT(Ufx^!4O=8Fg)OO{^%$<|w%e
z843@gv`uvAt>pMaGUvG%<l{c0r&YB8bv&XikWK$o&Sa>EZje4U$$YXkrc%tS=NV21
zqjDNJ^*YKkwgu-8*>~JhUsr83E=dNJ;L06>=`f-E0z2-(*r1A@iFt5TK8x=Jn$Vri
z=O>CCC*By2S#+f`0lQQA{7uzEv?)SGtEJmc^|Us?tl4EI>3f?yj2PKI;AX!7ENW1D
zVmP5dFnJH<P{O*SSC3j6ZQVC}QLkUSTj*g1%P_72%$4(M*ujDx&KGJLd|KUT8QPQr
z^V@`^#}#=uh4eJJ=yMYXW3meU3fVI9H0OqMv&sc}#7ZP>2igxm2vX<DP0mdb>C!3Y
zEfd_LpI*y1*|vU1kg{#nI{%<wBdoKyFTn|x|Fcrb2EZQggx#Gi&l$olTPAXfh4Isu
zCr#(O-m<tj|5QaMwuKP$@={)?+e^kaug)ta@tP!7`wXmmoGFq96!ZhF^FALf+=5af
zc0RbX>g^@f>(@84)6F%MIEPOEKsB!93>WOQcVALHd#%+6c&!f)(Ch$B?Cb_Qvq|wf
z9mooJVS+WL%lU?{zK?V#m|hp+^2!es_?0n+11Y6SyTs<7E`1^NRh?*sT_c^#8E>}7
zQH5XkJ=>@{nwx{tuD7rsMfrceWx_4GxJhG%Je$jw=jRB_Gs66Q-}RpoTISY2zqoDX
z@Z1`UafnSG3?f7;@Wt3D9gy#f6se#$p39gX6`X_~iTLbd6RB1vZI&2_41Fu7CH=f&
zSX`z*2NS@-hnr&EVKp)O?KU#Jnr$fUt)r5wK|2c}sht_am5-D@YbLuoR%GQ6pJ`H0
zHdrn>u3yx@ku<TQ-geRd>df|wu+Bs01#UHo%Q+3NJvu3ePP}{E78en|^PrD#9_2S?
zOkHfA-p;)|2Fog|jebn^Fb&kc!&+twu>D8BA@Ly_oU_S(b!p2Bq|m0X%GG~ku?)ia
z8NF>!G^ld9Il3WPrfzFeuV{Ze)C#@1`>s0e-D%8D<b=u0E9(1sA@o@%snd>`Ybu|l
zy75dfoKM*>p6cTTD$`Tk{WG_tjx`)pKVq$$*L{yi{t2Jp<71|Z_ee=dxUcO`Cw~6^
z-ox+0c_f;t8=cGI>%vTnwaJy1Q=`sPbtif%{il*em&F7oN4q2iq8aaD+)oPkfp)f0
zo`CNGo)YCuXWPu1VZ>uAp0TWrZX@OMy*-)PD)dn^A%?Z*%?ow~n|l<BWN)@=&l~hz
zDfp6PUzg|E`rH<c*&m^2;w5PIO}`d+5EJxV`=+y1eHqjBZ<VGp`?u87y?N($7qfOR
zwr4`Es5&Y2k+7e&D;m>OKPc6wvuRjFleGTW+{nw<ppUP%+_u0})kJu}AG?<5B-U!o
zZ?9q7yMt}>c)}liBi0tGg88z<^8;ey4No)mobj+03MI^Yz2aB2OJ~(cO;-I4pcVNp
z8kqV6sO1`DSm`1}w4x2v1#YWMLqwS5m1Mdq%U@fVP50r6a8OU*_^Q^8Wlke|E7QW3
zTw!~~(HF3OE<(0b`ViJq3Lp1$%gZJ?Pj`-N0<jmZk+k+a)(N!h?pLVDWhJi8d)wEp
zUiCS`ORA&Huwrq3m5XT`^YTH2h|1WteEmu@qU&$dUs_UX#=3IyQ!cd&bPe}1pk_lP
zYqOu&Wxk(xvu^N#Zo49EZ8N$WtYm9>>DP_ME8cP3z9%7ycZSa5merGO=b7$wk@j;(
zk-YRr;v!4Sc=J=Wf$Oo_&P+!6yQA@aovXE8Il@U^p@~URop8`n9wby@A{Ph5S_f9r
zn8U=V490}p<{YnG2|Sk*k+kl@(j(P>%Cqr=WU8RyK!8Ey>?gl0b61lAhTw!-spqR$
z;&=LbioAZ@@_r=TdhUx(Qj*lUp^RnyxTm>SrP^+l&np@iUy#*cVk-*TTvwI5Jx}^_
zrRr%_+KCDQ&(_)?4CugK@@bzti)s1@T5krkU1sqG(oQqES0#5=>@%KsH*F>|C%ISQ
zD|`zwEh#C{(>-Jmiz+*#e;P-WgKAJfeerhq)J&Np_KnX*>OjJmAkTrZ&Q%~fT?1|o
zcGlOF4irngYASxS!Y0Fg%P!9>MupigMrSH_c+*SFJduf_%QGLX?i+qkECKS=oXYZX
zJxfja(%355FBEo{o2>PQl8R?$pSQT)BvR`)tL9^R9eX$~`m>Vs+wKl!`r9W+d)v6u
z=1=j6-IRaqQ~P$pn$}d+c)3`=5`?Z=1PeHwkDxcGbN#kg2i#ZQE@?%+w9c-wpQ$tY
zEa_}JY}RdLTe)&e?8czE^qG`=VXJel^M1NtJcP_wWJ^wEI+L1)CE5(E!n@?#yv{aQ
zw=(g`F?~Q?g=J!Pvcs9Q*tOT6#U?_}=yKP-T%m;e%DUvmqR+5Bf;Fi5*+QMiUs=BE
z$9#5dM1SDX>suCg=Wawwq`}j%2HGM#4aWTk_mAOeGmT`YTbDIlk<*6u`)iqyh+aki
zp0R3od!F>1$Jt|jVjWMQDf>d;gN#$*yu8#vQWwV^S|k4|TQ7R&0!_0HG1t_mLu(fD
zEq#4qWbDOH=V3vr;?Y2iO5&{PHbtrZSl!Sfm)~q}mdA>rm*=u!<I*RgtE-O=xgQEF
zEpokUTBab^Y29r%C+y60c75why$*M8>3|P_?<ZEjD@1%0vJkU2RQ<dQn)DwjA6f`j
zdYat1y8QtqN1HR&$Li$gCsmG}P#C!0$~j$S_^om+#d!Ixt8_QbJduuO#l}}oyKbhP
zbx;D=X-!an;eN7rX{dZ&qw&)uWxHEKCG!z6dYz)}#AKJpcC{=8sa|t=Lxq{y1Jy#S
zZxfRk+&Kqgd~7<oy6ex@DbJ#`*Y&ve90-L~K|oO;hU{t2!;;yqs|DE8P4DAoEH1K~
zOQ5Iz4!@0a^9El}*;fuR?b4U}Ct*`Vk066Ym;dx5zq*+YF6sN1uPaaoDNv6h+n$rp
zo0y|JJ39>xDDMeUjnv3zJaoqCVuE5gABd`_VZ{oKYftxBN!-UH0=8Z6r>z&+f8p4q
z;#YrnQdQ#XU2VEC+Fskx%c-rv-*6)sgzw~$-C6*j77eX{BfFGA&hZT>mRb<ndmw8>
z1Cu{HV5M8DCh0u|HhWkXr)RWG@89OS|LOwLS7pdMJ#endB;UZ^z6jQWUvFJ5<aT(<
zBSvGGG4JqZTkozOb)BM%XN1RYkEu5(etZKp5wY?rzpqyP#TWM%tgNpiaw~KEj1ASB
z?i%ZC+v0CwEI0MNZ`&KlmV_!;>GN2I=D*VVIzF6{JMQ`YL-2&@GYhsR;gw9M+Vi4U
z(<(h&5~4k%&k(GB`Of11<9h13y%ztzz@tPn({9_!nPYJ)0t}iP&{k&Jibr87qdpP|
zo{ak8#1J04%-4O>q2y!`iUKxJg~BH*^w?iA27W|3(83m^xFW=l<YFeDIgmVMl;|Tm
zQpSQ1j_Vj)0=k-;<$oM-6!dyqhqhFLQ{o287l<FRQUfYC8Ie%7s5{WqB@59CMBRZl
zFX=1w!#K2vAf6PmhQ3xku(DE$YzcOEM_m*?UdBH&2$T#FOdlr#7hWh(%drwcOPsap
z{^eMN8}12Hn*{;Tlh9uj?vclh!|(`vI<BvbS=BQ3!=m(|;mxIdE;Z76a_2!j6Y@`n
z=OYv;mXMDAMCWsx_aLIoynCE(WT~n6m))ZVKo1ASlla%w`{!uPJ`6)U+Bwrk?~>Q{
zB)n7px}ck}mwAk;w1AYK;JzJ2OxCfsDP)Gj&ZUjRoZ+8>+{~;(n_dO2pKkT#AY=3A
zkzE_#dAN>&Wm;3>YGM+7yPz18n2Y?{;>eJdW2@+)i|(p%AP1~t{ed1s6MBuAQPk<;
zD2Sm14Q+uOM>CX3SO?ETRm4`g7~vT&Aj-WrA5yKSfe`PZSS9v59ov#(U}lbe{`_o0
za`I!?4Bfg-z^8!LXSI{pc4iIkHz+MrhR6J2ed_*^6QA;n>ZnL9Y)+6~U$vXFuitk2
z;I3U^t0~UsKHjij)MM$Ic^?c|sb5OOCnZJuZmqQx)$b(cbs+36@TiQ9fpSK9Qdh)3
z5L9hiTVuHI)bHr-oa^61w!0yB!wi$liq<NQ0kw|PvHGb8L<wGL;wN&T(C;~{uHHe>
zz{@**vgR%-8muYlaOd0}EPzO@p$R2~dV^7c*?jAbUDzzErBON_l>?@dn%I92bsj!v
zW6TqdqJ%GVDOcmf-b^T*<hzF^=HWw1QLmL;Wfc_#e}Df82Dz3Yc;xo%Eo*pz*5~<S
zFSGK)$7?w94hAm3#|Jx32BJpcKRm-USbn_@4-b#LoSdA`#Bf4F0w|GHa<FST6RK!I
zR@z|oi7n8Y-TJXigAo|cNEAJV6=HkpqOELEX#g%y^IjC)0M(4;{P@JZ?}K730q-{V
zp)uI}#*GV7JKs%#CR29ma|X&5?&SUXt6yM$?(B04yh;))v?*6@<MW-SyZi7uD7*U-
zR-^?uBjuLqo$)D7UIjEB4CBv<iyJJo=Hg(EP%X_J`VM1aTS0p<n2n2ly$^rv#iH60
z52q&>S#9b)1|-&#@XOFuVQ+Bzk)KXFe0W75>bqHb7yyQd1jEzHuNaW2{R})iALpwO
zj!NKaTOjc&2i^zM;NPx_r+rW&6n6=HUN~tk>pdPgAAWE@3vAA7Z;_>>rpAJvs)#ZZ
z3rodc&g(I_iECA%L3=lWaP<hB#O|FJ_VHU}nQ<@mpWnC#6O9wc+O7%$R}ak`b79}W
zfkD~RiFZ+p@SZB8HTJeQS>cC37wQh&1g+T!(30-G*;nP!@3rjwb%3kKeyqFr8_cF+
z>mvs*NE56KtV1&S+MNXUEmlh&mL3YEBAGcWA}!6&;=A(DX846VP-O;F+4Cmh76Ahq
zZomxeHS=mJla!Ax7Z=yZuFo(j$*r*J8|}L_J~thQ$Y8ZZpGYbnt<m}A)NVuuWAk3#
z3uduDGJ{Bj2=UyooM~=tt+;ab2p*B3Dk`$}ljAxt%EEEcNgUYs7Tz<sP*fNoM;64d
zd3fl)dGp51$w`BdkZ^(I=nJqrl3z&ktjRt_d4-k7!Xkf}jb!WEU1SkUQMIS@91PE6
zpX&_b;%Fcbo>$y=0A#g*<f_@~99A#1e56>45aYkS8fRGJq0a0vzQ2bhNH*nI`rJF`
zU(@v8ZWpMeAZ?6>sdI4RNkG8r5v+ik&O!34p=U3YU3humXRMYd{(dYsJMq{8U>uqX
zVK$HdL<ElvUIv*5^`!=xYY>whXTgiLaTnEmR6BABWdHKv<X$}+yBBrP9)b7n5zNAR
zOKK`JoUc=U_hN6sI^13BYiIGWgx!xW0b3%y#AS3PaHSg8`xzlpw}S4kpOOZir#%DY
z>GpM}fJ(@=@Z}VHx8Z()3&T%W2QAZLAhR2lFyG77_Tllu#MO4cSD20hlqAqo*n2g=
zV+bO;Mtnw993b~i!&KG?FW~hd4!NcRqw4Z=7h)!gblt7x8i?Xq1lyN^<TN2AMMyOP
zM;8LGi3Hz=RLcO>b3PwvY!|HYwB=Bt?A_>q(Z{kyY&p&XHyc`U*L%{)SaP0;Er)$F
z9!AX~fuG3fEkMoa@@-Mv+pZ=8M|BzwOKoJ5#-KrrmjWJE!=0aDx)QZn+625BB$=Ni
zz6d7LmO^&kWJ8g|#6UbnM9r=?{~cg_Z<6%)z)Unt$nd#r%p^M}C#$GP70#=fkIyg?
zYR_H3K9}{8h+-1H|B(ov9nJm`m@>+9Ywu#hL_BVz?C`|s4gG(SzXEW+hd>jiWqe$#
zKy7JX;4ne2^zk_rt_UV^E;wqMnz*ADFpXC?b!Ky78Yni*fH^jmq`ce{A5*ZpZt=N}
zxp{P74+Z`q>Tb;+aePKzL-a)2_Us=B@SP~-Bcl2yln?Z~G-HYq)}8cD#lSPX#7c*m
zi+c#E6;<w47Z2P>uA2{L*w@zu1Fhd}#~I9vMmc9bIgWph1Q1`qi8wVqZO^Ux%!`4T
zpLLuEojwQim(%VvszFa%T35gKDU4YHapn(vK`rdWEvV@n(N&(y7NUUs^5aJTOfV&2
z)$sWj^!@w9l^$O+7KSS|SAvSAlxg`4L!=uWTI<PcdtRM)V6C7?)2^rmCq7VMZ~_pv
z#x8d-99LqKw6o-fIOi3l-zt&4yTWz1SWl{LMoN;@z`oO5{NCFs*o(F}^r?8o<mfcd
z_5hjfl`r;lnzy92EPcl;T0bUd?Ou?$Qmk9rC^g;q{BxU>Wkgo758c_JMn)uV@7r$x
zq0-<KW$nf8ckkkruUu(`rFq|%cyT;!MB4Xn6LK94RF&GeXwU6857;Az+<O^Btlc3f
zmk(dQe-DGLTaOiUVZXwrCmZ}?+aj9S?Bos!rPuGtlEhr;M#zRv9g;7Y$5xdnOO;kO
z?`Vn-<-M=d&X*oa@4yaQ)0sQ2T~FiwCem_4y{+WE-vHT`VcF-xRsEyW6zf1Wc**4Q
z$NJr#8M0+62tY`0?)`;vi2Zc{y4eLeH{0j6d<+}!Zxy{xqs>5Jvl0#4U?835xC;vR
zoC97-MtbG;108lo`{vozNYT+&H*OL@E7LAFh@yqRc~hcIYo5fbY@nn+$wsrR>QRyT
z%@V&T`PkaNiSGBxYAd_aWL9I={zF%)SVnI8pZQ|xS~w82;x*5k<X5!Nbf16Qa&yGO
zc<p@fi>`!;AmAE2^J`kGj|X0_>^=Sac{uozm(g=X@{ahxnM8nTnG<~1vs<iue*Yak
zE~Q#Fznkmn9q)s9qGuVBJO}S&xIEMA;>a&iNt@X$Id&^WYQcH#v3YCn6c6*IHIrIj
zwr%6VA$Xz1KfE*SZnk-Kl`J@0=-~m6-_Lg`ji1qWX2fe_GH%<deYfle3?;u>KFZ5G
zRrFpv=nYR#c!<>aS)r2+W#aUAGmqhk63wI)Whupl8q2N%Btx`co@Q?ha+j?^NUL$?
zgg#E=vzr;O=NikuT;^AcOy8OyjIc-qDI54-a7MrP!3cbZ2-z&$OBNVb%DeYsH2wwJ
zB_TD^5$yEZ5vb{5mw2zHhn2pD%srZ!Zt&wW?kwTBCwVdf{C102{|#qA5=G7NUO?-s
z2CqXR0%1tk#2WW+mVjmsJ64x&j=S&IhrT1+Pw&?~U6?$OmAv$HHoE^r9&lu2K?A9|
zPFzVQdQ(h8%Bg01uPS(cWfip+70SgHRSh?`_(ThrYEmFfHLOgR1aMx_2~K)TN@*Sq
zRqpwip|KcPqaPA3efk5w*W?$|avW6zx(YesMJD`l{3eBU6h*jXgjMYVlCoZ@b^Dyy
z-q8Vk<w&tMNXz4(uuNwGO?jtppIYz+v}~}+-@eSWfod?$rl4Y4LV_hooR$V`oTJ)Y
zZMwaPPmcpFcT>R?z1Kc`xK78w5Z%(EKzHoeGk)FiM-S9^DTJS3M5AQqc;D+I>1^@p
z$srt^=Nb@9S#!VMBbYKj((QjhHBE{aSiqnJj4ie`E*zRLMPt}4BAF#md&P$DoMD{%
zg-E+8^-pnl&jw&mW@a(->z+voUV^l`>=eW3-5AK`fM4oop7?Jvx<IN!j38Wm^{FnK
z_u9f&i+Q)t?>P|`b5P%HhG<fAQ-_ZJ0tDBvUxVw<L;`8r%0S*lIN~da!+xR7@JL8V
zdZA_ot;=%)0uvo?A)$Ct2xbTTQh#r{Ufa*1-ac5%yLX9wf!zcUdW(#zxx6|+HPd)@
zZthUZU_N5UZm@3gH+Bq6$e=|#I5>!|xc6o+fKyaK9&SBTC{+P4xaY7v#I*lZL;zHv
zm;=m^roc114HP7#Ra;g#Lc4%F4SAY5W>D86TfpLO3l6!b>NB7!jfKb%19b+%fDX}m
zh<3#W5j-VB&f@QKp})QZvY%xwlbEC=FE9`R4<g6J#KanbqyZxC`@4t@`JJ*(j~e21
z1CyaTawg0Y!~Jj#$f9n~VlZ$d3R-*p_?=iQOwvkT@4&LfW*=?o-m*m{SbN8+g)Q#I
z@4&Lf_deS2(z~cK$}+0h4=BFtImHSu;S+>?+Je-$;?WYQP=frb3_E$WaO+Opgb<O}
zQY0iKcz`bQoT%t2K)9_DkQhEfm7#{CFjFY9yb7yZgHT$vz0I`2kw63#z>_SXZo{SX
zk5w9)k~B7~7fjKuDtSI528qLbcv!AGC1UspuQ~ByX56VOMHGDvT!k5aQrXHPo}1J*
zB>3KoQ!%DF|5SE>ktR!eUxf{1b094rwHbj`DMm>zt?caVC-wDXNp3OeY+#<X^1p_F
z-yc`_&D|q%6Sx<z$o1^mGgf4^q_2-%KtN#BhKXG(Z2l0Wcz_B<(!6n2Xl@6{?J<6#
zK`s5)ETenq!rnC~h>RKrP_9tp@fp{gMEFsF$~mZWL3@i>i>Vrrwn$q=1RlHCxcc5x
zw}Pkc=I-9Muwe72s|dO1qz$RU=@S2^)72n@+9j3vS0Tq}-CA@KD(1=IVM8CIfz|ne
zHfWys%5=pdIV;8Aei<UIJBw0#K5LwH+>{KM*T&NO`uae!mNq!=|JXvI=hB%f$qhs?
zpfQkrjmMy;yBHs{OR#U>?ykgq_lsYp=oV5)lMS^!cgdgn5|`aRML=`3<MPu-8X>J0
zlntoLPw84y-Yh@Ga;m?$Mtk`nPtheJBD`LXqdx3U^{Ib(8W0tJ(JXN32I|J+I985g
zR+5YAqZ1?=NgHWnwp|W@jT;nqp6W~$-;zvIJ-0q7(dF5>-kwsu?%vgQl{4vZ-c7tT
z!1{GSv+x$*i*Y>X)>AoxpAX6sq0o+an;Y-F<_kJjX8Sro9k%zy0Up+U`-xEd@CZ1U
zYQOk)Er8O41&DQ@hj!sbnOidNC<6B3v+fIgLyT?|?BwAHgBPC&UP=IxPys<fY7HyW
z*Tpoqtz~O4=bYJe*govTe}8$OOqp%&DIz@Nvv};|_}K0Bu`!={gRw(AVy^;`>+L7{
zB)5-=OMD2J(UTyGlz{lI0I=vT1c^?$_%}j%nFQ=w)4CdUg_K&78%}KAc)u{>!B9p^
za{T%s+zsF38fU&7I$E9C9gbSU-Q^iJ{O;?;ra*6V2BM0Enb|MUUo#hOKYKz_%lWPW
z$ADq;C2QUrpFy=7lkzsB{~+!Li>Xj*9g3koKfh>;66-~oGEMzE%$QW(`{+4h3TGk@
z3Y6MvK=3KtjZ-J?yKSAG6gkee_uy>V0?>&p7Wmq#<wTg&eP@FMfc;aO+#~+NKDa(A
zotQQEXw?Q0Gwzp(J!pWAgfei&0HfnssP$g<lgS9xXT$Z)!v6M`1su`RdyMJskl*``
z-E8<5XtZPvA7f+_hgox>hjANo0@cnoF0)Stdw@9`_bt?{@UEzh0Jn1oD3q$tR8dIa
z?i_`lK|UPTDfH<(JXM2(GT0-mQxr?A0qD8?<eEgvPI!n&Z#!r{*)Bs4oCF=@&Kh8r
z7xrGP%t3^cP0I%@k`%54&(Q|LL!i{Ze~gWSdH;PWfQnUALPxP9T2<=i&b0qd(~rTl
z?Sw(Vs<*>dpl$hk>-?qLT*rwxh-}`Mwm?gwM>$Pu1js2*bY5xayKU{Z2%Wa`fW{U8
z9{xLnwiP4A41}>qWUwVN@XGh!Alnaq7{k1D=3WB_=MpHPBp?XFsaOL$1<s}NirNl@
zCE5a7;Hr{cGDLST?2{3C9Lq{ZL>=4!KunbqhYuH)q4%6k=-VxGFoECiYnTO4*5pl#
z5(TOe-OX)e&9&Hd13Hwqz~z;=u{@mwIx`DFfs+^2R&Amv_|3XbUndKE1!lmjtVubu
zOX&d}$_^Mb*wq0BGYJ8Hn)A((X80r0wkjqjCjGL%q-b?ft{(4UVMf%3+a%?ZJIaUb
z7`9~)79+rPlj7+ZxyB$HOSh5>BzT_BA!2%yGHa|9kjP;J=<leVqa0_BC)VUk;8;P_
zbEZH|m~Nmxo3f`(Zd>KTZB#D)1?+VoA)>C0>JB5(u!RRyHmHmdNH=bd#!%)iJmCyL
zi``7Bi=x>D%$zyE?aUYJcXz73?tWm|N#B6My@em3Po-PkVfic(QuViGJqq^=fh?I>
zc+9NB;Wdp;d?T4SkMk;^yw(!|PtE4+ckTju1Ww_QO7b@!A_wx7^l5wVl^(eyNa4)g
zRsbX1iy-n6mZK^0K63?-_1S$a;3bzSE0Z9Z4=1U7rXn}agaY?)fZmZxTV~nST4<3+
zK~lY9%+GrnLE22Y`>(r~#K(hb!COWE86|wXQm&lHuPwU~eY2=`Ez~PrYN_GC6kvC+
zd@HR4uyDK7)`F&M@&O8>KsYI@)!z{9<GWEI`|*T`9mt>F=UP9aW9*r=F@8{5S3AN_
z+-=aD)_J)@SVv_)I<7h~^N}$|`<(~{(SDcs1=8mpUvCIVNHDr~#{)B(TO*@$RsHqN
zt*s6a{&wa$WjSy)iq(_i`|z}sPO*ws)%a}^iJEKRjRa*I$V@^DrN!wy1*YDi&;|IQ
zEX(9;;@XwF-W8fL`C`Fg_Ysqqj&QxtsjfEudc$(l32nTqkNNb}OK+$9s__`Xuy#^;
zlUEe52dBS<%wM84(&}s^#J|9P&D;Irn4b8qu2niEIX(s+`O^!%u?O6G@*XXEeytZ@
zz&pb!uAgnV!|%V*)(h{cE>xn`p0@Dx^z{RH>6ED8meJ#gu$qf+<bwxh!mXmyfTixE
z^arsBOs&Cpj?o9-yBm+<`Qa~osTqXGkcnkt@%%(7ZK*5EYKfOfvdV3;5xM~<dOGw`
zfBp(!mP`f5dI1S*w>;p#JzxRa2$K@@RGFhW{Ix5OJOSN|;j)w7S}5tL-yW)ee$E%v
z4c`DJ)9Q-kYJcdRHS$)g!uQ`_g5qw)l<LkW)ze<HR$nnZ%N?(`pOFT-Q99I$ht{EG
zt3A{^f1w&@e&@HdDZT+%NkI?;JxyDfnggB6ke$VoETs$}^8xUxaF%n(@#{dWM~Bh$
zAYM9!8u=XbU@mtBLY0_)m&S|{5<CkBhwdla+v4i`?MG_{fnK%~VCrOV>OLjpo~dcO
zq_%o7<PYw5_AW*Da4rC2O)m|?8j}OZr&)B+_Y<`Fy^WDdE!Vv_*)RI5xw*Pl@@S{Q
z0Hfx)q*jJMGGgn=VEra)`sVPmrT?1c<uHArM3>K;(MMaH6sDo?-a+oSbRhGbx0hFY
z-PYnQW4QBbp2;q+Iz;*XqTdP{bep;o5M@3}?ji1xnkk{8Q(L)ZY?NYp^)QzkLGZSD
zTvXXYJguBVrcF<EpTGM2jW`fZ#F|TpWyFsTL{)zY)DKZGmjZ}xPp$m=1@T1oL=Cxh
z>5+r}U)KHjWj_^JC{$a{+WcV7_1-|7s60>i@N)z)-jOWotntM_!M`&Sf2N7WU&5;;
z=jJUf+S-*qX<qutZ?*J#AbeLbY7HZR`p_f7qaLO!AIm?E?|xo@RD))@hF`Dvmt==m
z#5eUs3HK9GhoC}kTL&&-$+0^j(Go=6*kL3-EpaqN`Kt!RSEMvRTOdEEUoZU`V{i%x
zl>D&wTTc46afeJS_hXHbOsP+&?YrQL-nhDztVh{o@?^^;{*+qpZtVi%fv3fy#?1T0
z;{~5v+H0LTzSns5_LrEkG$>x3DQL~ng?3@a4K2%Rl-8UJ;pqubVBy|=ZSvz%47pCM
zj}hQCIJXJ=mCEHD?vcM<ZRIZN7gS#;JnUPyobu&qLfjFGcq_ca8)-8~(LXK|mrlTg
zdYFCdIiAJqFP=XtMr`L(mk_|arjU#Ovt_%S%~F0+c9yIAc$vUS=XnaY@4%Z4`!*QI
z#yR9HxXSDciWM<9FyyDc*9ku!^NW93f8n^>R{ulkUL?rnmKLfwz1uyNldK^ueS&RY
zpdwx(MhU2%RdOZ79lhDeHqu1s3wQ@h-;R7*4nm7<yicYNmWBA1T+aH)2H&o7GBz}r
zVY_GYqD=KZ4XkL2Ke%J|>GGVLSkDfHIOTF-MeVvzjk-=IA1!LlXj|w9axM-FvI*(M
zem<+&n;h9P(Xo(U_)&wKlVJQSGnws|{?Hd@sz`L9(2}*K`-r*3`o$Z~6!`G8MufZn
z#w*%7HSSL{-${js)g0~RH={kc*#tc$DvRO*sf<e6!&6IDhNNL{<kp)4Pxa{Y7`a-G
zhr)J^^lYnEj%0XPxM+TY%O)khcGuC%YODAmqWBVEyIu{AE*FS=0LPW@q1z)H^Qf*3
z!cd{eG_y4#wRy5jTgOP-(U{yq-{Dm1!kMj5iB7p#RmqvRigU!#ujOJT+`eDq`XCg>
zKbgl#IFv_$xv{geUT=~g+T*zTJv`34{cKZzN0$PTGROWCxm9Bi??1R#%ocSIHM)*k
zJs-XsBwV}=doCFkG@a7BKeD&EOx3>heL9=At$*X$QR4?&*s;T3Dfw^4AmH$;*E8$I
z<sq?i!%IJe8N7w+ybKxMc}1RwEHXm-?-~)6K7`bW;ku@l645(?h||nhIWkV)4DtT)
z@<g34gw%_kf_J6;hWX;DnId;_aDz{d8(3$pTHeXbw`yMEMOMSwugjTV3H}(_XxmKi
z5s6N@Yfvq(WnmI(B&j}6>hGdS{L7tk5>M^NY|;~1PjYKP`t+-^2zekeJH-3zGpAlC
z;CYT6fQja2C?NQYdTZqy2Zd~}6nGCZxeu$CTusEVwh<lHNtKF`?zpTu+Mg)KYWJm_
zrrN6^8@7K|&Iu(v(j@Qq+L&43I4SPGsHu4*ik0H@ew8oj>Z(vNzq}iQPOLp8s1pDF
z%gci-?RM1HuE6WOEc55r`HtY>X=Yu)6(eV_Y~skpz^Nl?-!INWEXaM<SE(<lXkWK6
z)k|lYtX>5=+L_W?3%41yU9l|bo%LIc_98XO5)ef=t3$5F6fA#`67o;EarKez)6G2h
zc0e{TF!lL|a*JB#l?+TH%O1VR%88Sotvsrj<i1u+(ZNYG#n5s@$myKY>$-~*9vklq
z*8_~Mx0B&>1?;;$hmXpU=SLoskhN^B7A+)EzN3KY&MHmK;npd9x>-B${Ocp5evFw;
zT~TrPnE1*!RsId?Fe9^bRb8_?Ii9Mk8LG2Lo~%&UJEh`YCj~gjSV0@<)P7T~Iz#65
zgtc${r`{u?@e$RS_g5J1-=JN>v^MRB)Rgri6QPXIR)uVR*CuISiQ2;>w=tIU_dM9V
zuM#^GQ3rwbTu%%|?M9-U0*Oe^B3oM~s}7%-AFbqFpVkZ7%8_|{=ZO)Mh<?ol@-VM!
zy>(LwN>VmP0QbJh2Q6U$YzR$YZ_`rUidG<xY?&o~3^_$--nXpa*Ir?^R#F3Is!X3x
zXcnI+yg2H06EGRt&q2haGL?9m^-MY(&lA+r_k?B8BkQPIeAo~1KUQnl|0CTiV1O~(
zE1kB!%i6o<)OADGGS(ZtZ$HDn6pO0SCw2aX72#XwvUroTyq#;A-4h9$W2U8|YgNm-
zpNLOOeF2AP_LXKUZL>e;w9Oh!v_h8`P^^M@>2qgbanb%}4Z-*YQ&GyVthrb4SNF9Y
zUcEp@LBl44o`m}`h@cJ@JtEKd9c`R`vsPvJK$O&o+V=P7Ue4ZgGV1HumuYjHoXaAD
zQIhr_pIaHYzJT?UCV>L6T}Xv0S_hz#xEjOniITK^bXMNVRpF!Ux0)attkPT$LxsAk
zE*1U-t^hQ>(>%rhqVBE3vRt=raYYmmNkx<fK}tetkP-pu20_B0LnH+x1O=oKq+7|C
zQt3t!kj|Gzlx_qBMC!~3*4k(7wbwrD?DJjU@0{PCTwcEQ%zNH*jycAdcgQ>)M@~?~
zvZz_31efOx9zBi2n%3P{WO|n9-R9hFVE3-kRgM{%3Z=QJudr@0+%{2%yovd#dcRYc
zQ*59LokdmQ-49-;<3ikjyxDkfhmUbq@ffA?$GBS7W0)Eot_crXy+;JBWEA|Rq5gHU
z;vKxCNS>GIIx8c>l!lYXH)^j&BD;Zs7G`Gicpq5k)@JmIA;ER6iG)BeHbfCuj3<`+
z3d_Z8A2VZ_*ghI<-=B4FU1%6OG8EhhIaryO6QSx}?Xi@g;fK~QfyYAg(K{kS%x>&S
zXM$dCE}b8o+rvpl230o-(kCf$<{gGxJDqUeHVxQYw}jc}2M(Lxl)CN)D-v`0lg{Z*
zI2R@Ky>S4>VoZ=5Hl`#r>QC&T)Oc-=TxC>lKe`sB1ZJR3=bRKZ=}f7Zxq)Bh{USdc
z@}Q&bf}`<hm;`!);O3CXQels5lcZ3XGyM(oFKDN*=*+~Nrb_Lt#wP>*Q4tif`OeC5
zGBBje@z?UuHIiwfLd6!aCw5PJ$&o*tv+TXZ)Ff-&E<=bNBbISJ@Of+WfQ}qZ{*$lJ
zRci8($glBLeW`vva7D&0_VS)#l{Lej$7+B<&RPU7k7BptP7`T9h}m@|i`P3uYd)5^
zoa&crlBC#pEVavEBz&h`{O)V)<T^t}kI9$PQZ+bWFFP1y+b%<Zjz3OXFHCByqT|mm
zgrsuDV)L0kW-BLy-4F5voH~120sQqLj3r>V#6r_#z4nd8EPN3s_PT-fX-}tn#J1tZ
z+8L)`>~uNW>fhepTYg*<*Q!~RpO{XpJYi#sZG1gfrwBD(xrUOq%uPKhJib3!lX&b-
z3qd1ory?|POQ{Ol)tGDLSo7Yu*AfX2g!?z0w!}t5-ltY9muHXb(~h;B4k=%`Jkd&*
znO>iq<uoqCVKPz8gYibc`(yCxt~8~Cdevh5GciI;djj?R7NM0&t{1S~j_#(oh6Q`(
zURaF*t(|;8kYrYOCikN=;zh@A8+wZcz>_P!z_e`3kW8{LL2#vM31AuNDT{l7lVv~m
zK}&R-$mD%VdQ0}UcF_f2a{o-`<J{Z^`{4vp^UUE4X?QBV7+P(qfHk~BC1O$9E9-qz
zlMpOY_Q`&;(|bdE)qbUFOLHoVheJ<CX|SL}L})qK3L8@fYM9T)w3%ec*G;cdMs`uj
zC2hBwysl5MUs3C>QttU+T5bJ!<xx^OfuT&(j@2c#lk|HT3LeQ(Pd2)d;&FM5I6VA@
z<+b*bswFF)(e$bj3hVaO*mnNo$Kmw>$Kko9tbpmAJcL<TisNM-e`=V}lmoi!&9!uH
zQ4E6X-^I`p@lT?AIB%8Qy*V^r$DZ_DIrscx7}dG(@l2Q3rbZKmslIt&%b3m*oD*P1
z@5{t{O+)NjBKJLz^^Cu`B)fVt-7w=zngNK!)O_u!!0JGz|7F#KX%U`?8`tKJ3kgss
zQhkae@eAJ{^c72lY_^^nu%dbP^8{7&^!fwpo!jU_{yg?yR-$`9dRUh|%YFOEKYX>^
za2~klt1$qJR99DfVoG>S$}0F6L^o`C+>@euzzH%+J>O7&31Piwv+!A^2I!|>4%e5u
z>_!FZ@FvSeiDMXegaZJ{$N|64^u+mX6ODX3y@iqyTe*F2h4;h&sE||q6ZF$leD>Cz
zTTQ~Bb)hz%avIqhN(k^UUkz}QUC_&_X9|;{gG}G(@rwR@!(Q1E7PaeJbqf_1;in?I
z-|M<W_dI7Ue+WQZwI&cHomAxVth|)ouwjqBWaBC8z0#T*QI<>0WEH!B?lEuGI#h4O
zG$@O}0&T7KbiZwrjzm2Y>09e^5zmZHU@*fkq?Y`M={AP*$cb={ZF}nr<mB1+5xfVj
z`&O3Yl!NG_Hv(Y|<w<2bch;y}8ZMf=>L|Nj&H+{1x(px5&f^gqX>?WNX#bC_sGND{
z?0OyZDpz$8zdAFSt@2V)s}?0!8z~&tCIKslBz2|x#F#Xl6vwOLyEf7jCukX5GN=2=
z0K>BcaO?<jQsj;iaVz&Fm8XR{rCHtl59uhX>9D#D`zza=se6Ekq|n_Ku1%W?SI#Wc
z<l)d+JI={gBb4rYJ?e*YG%tPE6y?e~EMauWLy061awmqk%xpZ$6fm)1@Ox*rNqo;j
zIQcT^X697f1Oi`F1RJ+T6W!qd1iLaZ*GjQK7pb||E_be%E@=K`(U|*=M$q`J3Xa+6
zMef`{x%sKV&wE^Rk3q}Qu7S=tZzn%(ZEs-rwr^fL+gUICepdDyeNFSzPYc)lMWXJJ
znBt2iw;|{spXGpZw>s5C)uI_!UN93rKx`NP@qG^=HY*{|0FG+@jpLV>QdrBxo<AX!
zWp_^z9r-~Ry?M8{^<B84U{)&wn~#+1{JlKQUr?W{kgOw!1;Y)-7S;S=sd2PKdYmKe
zT!0()^INqzC}Ou;gxCpUR9j+g%tN17cAcDkeSI$lx-<#se_3t!n3bSL4bm6s{|uT@
zn{lvK*r}k^9)Ou38LAF8&xElL_#VR~qbEcq58dSfMh2YUQ+LyhB$XB3e7uTX=nE<O
z3BQ`G1vb6bDEe8K!EU`ls$hSmWfK-_8(VB-Ue)Fhs#0b+XJ_>bvq+ViMxqaEpDar5
z`ei(e-W$prpYA27<@Zlws24SNu5t&j+Kz6*%7|F~tTO#=$fJKav<p;D;A5gbqBOGN
z!j%Nrtx8dn^YlgQB%KQ=P1#K<v}}LV&i!ldLAo!Q^sEh~DzCTI#l~N{KG(BkAxlGy
z=`Zzr={KABWZBQ%WaEtT<n!eDK^I#6&xslzDKNYNO4f-^r&E@rSxQ*})xE6In+W&O
zXTdx6!Y18k=$~Jv?u<{^>E(Rv7x_?hpLzAi?s`Z5qrHuM58?H?+apmfEoOP>j3oE+
zj|Q%c0wdq$41THF>k#-O)kUG;Iyi}3Xd}p=O#lK&UfJhcBi2KkMp1f^Iwtiw{Drqj
zaWEw5;S+q)jH|pl<3ONp-L11#{IhB<`5C_d^i(jl-LOkpOl4ytz&u^oU$86nDd8k#
z#M{cYshm?1-cUyi&w<)s<ww2K?rNhPu#w!jXSP3j<H}JV{A`p%lxxgZ;#pV5=!49+
zzx#^nY9wX{OpAAvGZ{s$eLmsoNQ|xGE3VwOk2iZx+<{eNa6dCFmj13U8(GE2w`Bmf
z1y(up&Yk3fImX4`<`_W<66gVai?-6*Qy7#0!fEk2zQlR(WZ*&tlkcG8uvMF)(30Vo
zAL)ECk1HoAr`fEX<p>>$*7;&!`l<QpzLq(!$FB?z9h;m8A(nLjs(Cg)bnSv9Jg^jj
zG>pM&A@|BD142_)Pecr93<%`4O$Hxlz)NAa?JW)^tm7(^9gIt)w!|uahP~ss_{KtF
zOKh*NW`84}fNl~&G^VW&HmzIU4Rnz<u<x59Nch@2P3u`}FJQ2b&Fb&O_E$Cl5FWQx
zIU{TgD^ATERDM1a8;0Vf5YK`A<Y3gt^pcYP0V-63<C&(kKu3N6t5SC(@?Gl(XkRqs
zy*J(8FgQT>kWWKTdI=IJd5WYy_H*&tydReP^ZENIx(mZ~OfH|Z`9I@h(kMp(O1kt~
zy`h~!n~wv58#u8C%4CC<RPNwUBQTqzV(*#=IlL4vR3n4@%u#x#eF$W~2u+u9#Zqz0
z!{`T!TYP~DYC~5Hs;@VOtvLiG$8s1{!mT5KOj3y0^JqiQdg5|aa=H-BjSOo<2i?>}
z677ZKTb!ytzEB3CV@C*?cs)<x^VcHF&8KAJVZ1Pn;CC%MmN1DL1VF%eB2^bqy9UJ+
zBXIqZ2Sgn~W4k4Iq)SVEV1N|r4afex?4+mI1<XXKl1)EXC1j&rXN1O>vC?Q;7!&MO
zx!oG64_2wA>j-)1Ut%7|?q!6~P9iX&B|LpxDF5}l_0(9nA3wW(Jwcs`y<o6EWzf36
zvqBxz-PwCxUQ*4|mlC^|{_Xwx)5!Wd*v;B_p`z5Z%{3ldO?eb{!6QQTHn6@A(u>Z2
z+jf%x{gIc_6Mh72lq(jE@)-=oNa<Ywmf>klwOItUi&+c9!3d9m-qY}*+r0icjM%Xe
zb{C_?kE$^@G*{3y`wKPgucG9U`OSMm?03HAk);8lSmH(4x!gGS)wkApEcz}~m_O;2
zlflhIkZ~#D*y`0F>sO2x2X91wU0MgcLsUokW+Tss@|!$qlZMNyZhN$E?xo2hTJ=D^
zs-fMVjiT(srIjvnYd(CHw9Pa7;Rnca;MmW4!_5n87WX26^gmj;pto`@R=ql+DZ)Z)
zhZ{5icPEGPZdvWGQXh2fLayg?)L1sXdRl8*j-$#julkqy11;<z{IUz98Tgw&end;)
z3w*LHXya{fHKn>+lB{+;|5~Y=g8o4Yfg96%#HQxWx3c`oHQB<IwVOg$$(I_JMz%HG
z{fPopi^7<yX*yjn#-M0!15`hJa23e1Z@eY1k%tmtgujAcj^4@%rN@K;PxXK(=;LRy
zLKkzhL=6ZoWqFKp_cWp_x}}`arzsqIRi@v!um>c5OSnud|45SH(`jOQY<Z^R6)YL&
zx=<cSHbin7N)YiqiOk8=a=6~+D`f03(CkoHm1;gTq^~mN`fvggFe+wjyX3z49Ed1U
zT&w%8z<!cLQo%x`h1Nzc(+tdHvf{?-&1aOd-WG_WCWf@d1>e8JZ48X-k9ecR8mRsX
zJx|G6E4>Qit}1liD%NrA!WXp&1mC&RfVg3tj|e2kw&y(#2*176pn$qi#p_;tjFXq&
zI<1cTEPJ{%(24_%lm2eT){DHy!+y+}rzxB#U#jR9pS`1O<8Xu1Y9$3ghW%b}AkxXL
zzbU+<zg6BZIlQ}7g9*83EE-G;e5L9nbl-oBafZJa9fg+H0k_EAuBh1&DgaTW?PzbD
zOx2B=ja+NVfT-O?6VZV7EYexnCp*Yg<C1`aKuqP6eE2Y!diN{V^fT(+yPpKiDqt=Y
zossB*x?eO$s_#S_TPe_@oB%T*$#4<IJ>GJUA+wAaFf&QEOcyw%u?0%zG*@}@`~7G%
z2_>*o6SgxT@AkR~T-q?N8uw>?ZP7v5^Ql*+W;uT6+ybD>!b8HF0}p~OAMXfTTQ)QH
zcs`dBn??(F1kUA0^H~{EZwMD#UdO&%1Z21C?X945w>Dk#%|78^%iqVy^PLBq@Yf3#
z#BY-439%!eN<|>f0YTHsXkC<<j7N*bhE-7egHjJN+F<eI3z*zPn*9Y*-hyqc&p$(1
z5>)5~&ORfsnuqOqs34<{WA;arcpIs0wgk$Ck>&Ac92~hN$qAMMKTGcrC0C(duQY&H
z%qq*{`QxbdO>narL5wcew#?S2+el^#CtMR@JG{nBZ$%e=3df9%vsRw2c7zsI&H&j|
zk;gg786#`PXbx@9l8?TWwn^qm8_B!n?|+z`{PKQoVBof7s|&6sYg-H#qyGZr=9~xy
z!R2bZd7_0dlD92aUj*zCIcB`>fHRe^Ya-OIRxhwrnpmG_Vxi%4o|2HYUYY=X{&+p!
z(a_rG=|V~@#XHsL(}YaF4TGOyKOd4PdPaz)MbaiZ*f;r!+>fUCvEH2)_8a$BAeFY<
zh&cOm8SLulo-b3r$<7Z+M$yiJ>4!=aySZxEK>G6L+7mF^(X4+wY*iAWJ&ZoyGQ0wg
z5U92Jr)LEclF2&g57_t5D)Q61qrNTueCtxNW)iUnemYZ`SNuL~sA`I%71(Hq)MchR
zV27`6Qi$8)1C9Dr9F${PeHj0c{w%|A0MGjpR(8$7;x$P3tY+i5heLG#n}DF)%9YGH
z9CmlgVmKze>;{bKPWe14TW7AXg%k$Lt^_c(>2+Vtx>#_>H@slU3#DhsG>@Jc4&b18
zo*Qy=-2@etdUkg4zC<nuy;w+IHNa$w8ur>QnX;b)aNWSDg~N<W(=WT@`nr6ClF(vJ
z>+Lb=;gi*s)}%S8W&<T<4P!>16&UGi6bDg-W-d6z!!^WEktsLVU$gHpDdjce(66WU
z9#;nK5T~K+Q02O0fAXd1s;!aSZ4Kw9b2B-&-M3o|B2Sw5wEzNgh54ZngG6^>+732~
zt=<o693T%cA(LPXp7SY3OaeKaQU{eqZfCZQg^4rbJVL@H4L7=GJgr+OP@34+bS~}!
zKea@#?cj25>VD!@W#dw$Xpw5!nmL1GTfVtuj_E^%wiL55v1s5JYv!gu^e!;fCmVJ(
z1{Ww!N1_x{$<6-i&7v;dIKjwB?t|3n;9gha*kKL32y-fw=)t;Zesi9H!qiqhsX<`7
z6@c>790zM0L3<B)b=VZvKf1v+`a)Ab|JuXxTEv2-jncfnZ0+<)(a7zqEvEWsve(Gp
z_3gev)ggCsM(hCY=m<<3RTjrtM4Au!N~(+E4H`B0+U8J(^<^(2G~=|$t0pK{I9I@s
zBZ$9{#VCeLY5jYG$Nu&d9Z%9ca$81jO^_F3nfFSqK6rR%{o-6jn&A^;GS<^l>mg?C
zN@+jiv%8M(j0FoVn|_j9r!lK<H+(Ow7NjT7AZY0=k}q%!m~NDA{ozJT)xb^>jXf}g
z+Dj%eZ=k=3PSo*p%{vUY`m)L3>YAE^of>OPj-D63?ExV}Lb*XakF9bSnm0Ll9nC`w
ziOLoiV#naC1W~S8=hVi_UF7#ZBDg^7)0Tp&F-QV$S3(K;%ZHEjsTezEVY+YMW@q^&
zq;h6A;+|0N7=t|=SHaTpG1qi>gOrXM5H#O;p}<*-<zZEU#pkve1Z-#){&FSD^p1pA
zPdBYEg=$3qV5w4W`EuCa$KW^vi5PS^TjC;y1Vfv^lxu@%S{UP#DN48qNt}5`hT6_v
zEU5x|#zZ}L)sm|PwnJ5F#MH!8h1JHad>o6IFs=o9XFPx-*k6@EyPv#gTW&}FqGo6w
zslLQack?Z(Y(8J}YxzDu`eo&!*xBqV7T>qLc9umy8A9JlLhf?!GNCBmNrQ``;1L$-
zcDY8BB>39$?2|;^#_W0K&57LZs3Kn!uJrnc^2Aye3F{#9G$#Ty{Z=6}x%u|0up)tm
zf!6?3PEd5J;B-n9WwJ?+9G>j2ax|Z)P}L5x@|y|W5=Cqh`wU_-nSKF02EV$aRBJfQ
z#<Q1fwBKikN7b`T*~q+B8_EAfiDN*9q7bE`rlL#|Qrn&y#_M++?_?BN#nHO?Bg^^f
z#J;YGoB4~9;i%d{|H<ZG<D@mWdlJY|x$h$kFk%brRzEH5C9Jq+j8~RkuGeT2r@PA8
zR2PvV@MyxwbO(^@+=~w`G3vIWe9M;`56bK|Fg0*u1y|MtZSK@<TM<myG7*0p89}jI
z56$pFdG<btx8bZh&sf&(3mMj)69m_l!)ApzIh9FDe1`gEgRoJ>(oXeT{30_sdES%C
z{-wz*T?cTR+s9N?I6{wD7Pf?=%UT&pA+2nMBNdP2gr{Q1arniHX6vR-zeo%{q#)h(
z0{G6@`YG_auDNJZ=SyfhhU^OvsmI-AvgyFlDw@{pz5bFNum_n^o2CtY;h$fhji$`5
zg94Cs*i`jSzYr(;uS{uF*z{}reTMRelS$IlZiQFQ%Ni=ABdw$*E49vtk@^f72kX*r
zK4H^s-xVNb+%BV-5uM|ze!m-IlBm6`!0hhG?=q+_$}PK$n_Y_DLF4O%1j~iKuRCbj
z-r3oCGx3W<BqRgVi1EwZ`lqu>SBB}m8{a=W#iv`5y79=K?xC=_16-MDNHIZ7$>5F(
zCVn}kHU3U+^;h||9+9O^7ZYN)zO(8Rbh*`9B0#-UU?{2!V~|6Q%<iN~mgrXgYDXi#
zr!S@B?Lm`WZ|~~<yek`b9hlv%Mxn>(x+mPpod0uPXnc`AXF39KIJy9X!vl$(05{7l
z+sz~TS12iD+(kno^@0L(Zj(DMndQlgBiKeS>-`d;G_m)fkPUpdE?!pGm60HN?Y)w}
zx#CYb{E(XM>A03D8B;Y|=P85NbNWw~G*&zO#D2+N#*tpG^a&gmxE-#o^WFpeZ(p^@
zOwfPp#0f;qzcgYTzx3Ez%Wjyf+p?Br`<m&DD4tOZ(O|&Q$UDQ<_ex&pSo6EL>rwPd
zd_^m}+t?_U+Z$1Db5hODK(br|P>hXQP04J7CHTOuhwU>vJ6#{KdVNo>IS2Vz#5|ZS
zalD!+u#1-3cyU0QG)<>q@8Dc;8nv3(UVb_L5xZzC@5j|}{w3AeWR#s_@HeIa{k!}X
zP<xHW6z!fBFPge#gMCB{r2V>YobGEbP8P`d8|Ce+7+|Xo7@U);hDGMla#s%)ex!yg
z`rkZ{BU}{rNdV_6asIf>y0)O_GzcizlKgQ!z&FQvyOekFqvFe;I}Umuqt>r}F#)$h
zA>cb)dV=TM!8Iro@y53I1TR|}PJNT>Loy7UywxvWMK0VZEL3#qwTUXf9+zZ<)nF89
z({=TtbflZ#Xx=EvvubJtXrSLkQQ(O_UP`k=iu*}Thj+N_5gjt03vo3ycGWo4OjwDW
z9zA+n$?@T~mw6>tS~|`P+&p3}2YJ>(A<_9Ilx;7UPL;sKdc16?<Lmp8<r!0cTVfh9
z^PV~7D9Rt*wK$`Kq@=7omIi9P{HGoYVkuaE4W3Jra{YB;quFtzJL8jGlvdJ1vJ(k*
zcBEsF$r%(TzAg4v@H~8DRX^9B-Ds|lBhJI|{^#M{;MoZW*5}KOPtXm(nGO$nn&`Gf
zw;--DoQu`lykYHy`igm~2g;)jPGJCsEi?z)Qpl)X@>+>);>Fg%e6iG$nslGL-|)Jb
z`LV%L{aE7mcmluIWMl>lNzXG3ZL97e&TfoKMfL`-)5qmEKRI|KqjlE)OA0x^nzgD`
zMe;?9iw457AM1ql0gE=Oqk<}A^~<OE(Rr80ET^{BI`p^g?-vE+#wgWKjt6+i@;h2x
z5Y1ac(2OkJCf8Lrx0Ugohr>2~=Z-Jm-nEOfDwYwZ>Ud-2_ua96)R<6x;M2?{g-D{Y
z{;jKDhWWGgT=n(q$+xtv?|7lAv24Uk^xYq`FA3?W+zX-nUoa(~rqA?ny%ps4el?pC
zYhW?Sk||r;i=jNPa)FvXKM^IE?x9G5j#64jmFqjqf1CE1%YJOGYp<3snP!scd|Bs8
zBM#rHMpI)$L-WTF7Xv}@PQ2mBAeVKs4|IO2zbL4v*v(3}4`;_PAfk>)`8KhZeLEpE
zbwGDlaK=^ol)^6Q41G<WNNh%L#2aS~J=Glc5KIM}oclb2f;#XUJLub#PnM$aiNAbO
z7UnG)c$pn_vm5Ljn6C+JPlmS`eU4hoEV=KRr|?eRe;}_?N(8-g?F&1Ll0*l?2<kB^
zpaZR4rB3ps(j-$zYej7-?uzsdjGhBOqPJ=lf(W<X=%b37!>ZaBb_P-PW!%<TLvlS_
zrBQ^dPM;>!l76YrO+^{x)#mWsn_w$Wb1B_^ZbU&HOU-e~YIsB0fkU`^EtRszTI^3@
z$1lz!F!N;A<}CGZJ-}I<=Pa41v)_>FIpu^s-GN_bk0wj6KMk9b<L!6I&ZT!%`}4nw
zy^`Op-y|E(?%wSY-BXj)7ve~J7r+ryv)fX`?a1D^Ccs*JQK)zDwoPVp4T)(GWrJnT
z1&#K6JRyar@2V)<fIj9C3nm5C<GMq^B(bgFT^vMmK8VsQAu`Ri6<>)DSbrW`U#tx;
zu`;3^>U{8w`VT9==G+L8&37ReS=VB6i-%a6ZJQXPqQN}BulZq4sHtVX`ub3RHE;7e
z!cS{xkJ09;8M=!`X-q3vyQsBYk2l3TUU4g;YfEP=qNH+)t*fRV2vxZ@`j0sNCk;Ai
zDOdmd5TVG?4?-h!T&VfeK-9N}>gPO;qn?XClh2RJIWd?<TbfY!pwUK_jEoP2DvCx#
z@#`*yW+(q*^jKqT*u0jbZ86-_jyOlXG>thYXw@NVw}I9o7up^DR4+Vu+k5oN?YFf^
z%Sz&fN>J(nPo}^G@{hZ1W4j7vBc(qxILa2Sl-k#}fBGyDnOb%zt|Pu^g03SrjTbAt
zbV2!6>?*hDHoBm%WpjNYSnoPBu}|ZKp>H<15O>vL-)1^@@l5W!DutHDLfUT{@>WXS
z{u4v*0w^VU9aXZ|1Xg!F4pt5>3Mpl5CyZ?;%$6<@S-K;Pu%TkRKr$i5Ol@!lc$wf<
zaBsRj!D8_h&MB4rz?VTdbU-Q*|H|z65odsiOlr%sFm>l;0*Zb~U7B(S{1YEqDm}GL
z_<>;qc-NJc65G0Vl84nm873FH@o7MXVH&(BD+YmC(e~uadZNsS@`x*|XfQfP<v#Gs
zxK9+l2*b^TdYZ%^i<{sdE8@-+WhHd2XIA@aVsF_!;1;e*z)1TsMmg5d%XDMdr7T6J
zhz*(a32SB#G9qXNXUaf5U9O6IcLnUus-MU<(cpJXNuSO2r2O@8e>P;FY2GXg>nbCQ
z=0!{wF+Lt4EOUC7<-#7tX`qQ(lxdmkd?rr*QG8RFia|&2hvRrw%K;!9G{o{n`M!;M
zj)V0H<7Y9%iNuugPy*QLc$kahPrBr)UEYW;V)!2-mcxV!I{}@-N^jWmD%YB484Rvr
z2<e3kKCD;@^NsuB0q{~_0-vypO5F5Kj;kAMj}eBlP}^1PNm-bFYE|+Rgw`6?V9ir)
zg&h10X;4hlLJQ6>zK4|=sEOR`$ulNZw+efHXp|}hr{?fC;bYp;-6bvJCF^qiaFVol
z@k*3h7tbhNX5URUhmVSP48bSv(p8O1X#ug9bCY#?PgzKh01v)_eUbzxPf2voUdTW7
z3^unUG|$YI7pBC-Y9eC&r3fxIC6b%fXEFNvoR}L!aJQ_+nlB%R>u0-1G$>%|ybzc$
zTsD3?V{yQ_CP*)fyGSsXYSVH?n-7^Tnrk;bBQ6Gq*vAY61q7bDaIsDMlg{7~zFj|%
zely7Xa3pg>prNQYfWrafC1FV;)sw(DgNvnDv8FE96~dz;7j;ObIvzPQ6lF_2a!2?F
zEH|oJ{I9%F*LZQln}+apGRq^ERW675-tr!{^B9lX#pyDq&s^AAVoVnG`9YW`>N74Q
ztnFY407MYAamA_Qcy(PA#f~4Yy(v-G4)<v>{qhCco_tjQBTedXHfq3LEmk=fEJ&LI
zsIV6{$Z|=`sF;>+*mddlZLEPSZ0|t*=>;Rd_Cd*u3XdOem2Ue-i>r<4I(_rbN|D=D
z??7v&vgVc2In2lFRPzv&>D_(TXBI3$LkjU+cDLT_8N|mMuw56x)2WKpt4RtV<=|a$
zY828Jj(%S++<B8@xv*VYr@K?z;YAZ`R?z;^*ghM&s*gO&fgSzy8KLQMPvw*7^XgWr
zr7!@|uqKiLtHXh^`x~-l^GTW!B@*(<uU^)|mWey{oIw`34;-()4P<!PcDj$qqJoKp
zlZNvi=Av$oet+c-+gn_`zT7&5C-I)6bUrd|Js}>PMXSRSDy~O(lBC^Hb=bkUimuVy
z%`lv~%`xa)uG%pMa0MGPH6!6?*;<jnn`8AzSDcNrP19+MAx&X(bKd%OMTyaNZ-LLU
z%?9ySt!mbnF>=RJ4`EmRnTO?b>E%|HE*s_?2Nxt@I-^+eR$@QyYLYsyBVJ0F9&U!A
z!Dmc0>n%QcFs|Ud%5xT5Vt+|5@kb4&zWcV(Iyfixn+6L;ac3+h?aw7it+$pxaP4DE
zmQ?L+Q5TjbTS`IQQ<@+B1(;>U2JSb@o3;D17dIA*yikCJN+)A@;?TYYDAV7YFb?lV
z0H(RS1>RXy6cSGn9vMa0_grm00xnG-+f$I}#u(po-Bw0%(@34WBu;6M`=eOEy)Up?
z#E8#4>fl%Y!8PiiwOFqo{2J}LUZm4{HE}lOY{myiA?#8r)R_`1EUV^B$*+(=0;V~0
z97Gmu_BPGu<5#<pLp*rn5?z?uvAt=&nx?7kZ{X}6bpBo-hk3l^8)$1hUk1uBEzT%D
zr9ur7Wp9<IV_v*?@hT|g2J!4#2bqP{iPY_O!@iPX5Ppt=mZaKsH8JSv&C+k*zRh~8
z+wzzDh%W=!L=ybtYYMYwx6jfXtxO$t!R0*_`xds6R|OuxALd5J9?msteDR^@^2b2r
zfV+<6@f~Yb0?#^ok2_cFdfMJ<7H|LSV3$sNnE)+4xJ@5MDPd14*6LwF1iwjtDv0^R
zt0nmDi)G2CZDAe~IoA|1KhF-4ot$^VzYuMQ_LN5T_;&U+dVKl5CwFasoydLU4%4s8
zvp-k*be^*;$DGU>=N|A8i^jZhPQ8vE?WB&43gW86z#zna4qu7}Py7PW@r_uS(=Hn>
z3h8Y<gP&#7h&Q<XX%ZiT<yn5&;-s9{3J|C+9m-D_6P-RHvjizzkwMy(uKV7rYh<I5
zq8lG0tlNaMu?{=zuK0Wi?21A2?A&W*l|Up6qI5TCx5zlo=_vSgt(y18PN@9GZVmi{
zhOng~@GML}iFb!=@)*QVzp|65@&*y;H^*)8-+;-PowWRM$Xs>R$Xti@iup!tuPJ-g
zb5()@C)DGPz?mLLeW=aicw@S{y4Kvx!5W$mun@Vzr>V=nZI{v7Z?f#Edxe$9R{?ao
zFnZ$gsDsuz1cN}qg2zu^kIdVc^KYp4GT%wKur88UVQF3%rX1{tUoDYurT*=>(K|d4
z5+K|#frl|Uiy;h6&z??K76^yRI@U`Na3E@;Hp093XyeR#;iSg_$+NB`Ij~#_j;$Z`
z%?_Im3K-_L<#7J$iN#_$)hLpTzLNh1r`~oUw<OZi`ZY_^>Q8);pI^h8t?2zcmx|Qw
zmQ`lPHf@;`fUeyotYP;--vO<`6Cl$*2o#D&NoS2nw_6-dQS0Ofy*5IUH&6!k7do;p
z=sQnE^i~R%8a9Rb?;;Sf#q<;=<{Hh{Y3c%D_4P*~O4!^tYlA~Ge=794_H1+myIK9E
zK~0UOX^x4iO<u&7*v<{^@|oEfc6aI*)(h@c;M5YfbngyPqmz(kGMitJ+g0tJl4g}b
z;6I6^m3`zfamsxaujFQG%G7LLKNRLp5H_PHY`;4lXPq}yJYG7jYwx(MW=A=s!bj4`
z0ng&JkRuIe49@I|nH~oya>eiQ10C{B>0)Mf^7#DoySHC87m&o98T^ClsVDMmsPYVz
z;17Ys+H}8f6Wc|jpTYAh%0T~KzVBP(uq8jPyn&d&o=ui!^;hRnnHzVbe#DIAYf&cz
z%=F)&lyxk1uU~aa{OR~gU^VIdrr(Uf)GIY&0hY%Ik$U&a8TBULs$D>AB3AVGbC~bB
zVmRWj_>X;Bb(i3%{yFZyOH4s7lUt4@d)IPOB&|~Wd?<P>v#ADmd@Fjjc*B;H4OA0q
zBqNiJ!7P67`*gUk>PHTlRlX|5j<~7sS7&1|gm3mTZgsxi8GBNJMqQlelVsM~!vM!^
zY2^+2pnnoHn6=<gI1bz+As{QF^7&C!WS6G7p=kBvo@&*#2?Acn0lhj0+0Ev_9dhd(
zC{eSu6Zey^BWjd!@(zQw%7ptg=mRL@w5IwuViWkR_0eUi1{--Y>BdyVXf@{LVMXy!
z`xX&QTLw`3NqGp_;+>uej}%jVwF_lkV)HnHiE9ve>_t)mCYiNuLHVN!%N+A#6JwI@
zsu0zILVii^7nI9IZC10w7gF}UfDQ-<P06Enz)ME+r^c0E6(8AIbZsglJvO{@f&Mf(
zdr`|kNXbIuqN9$l%Q8Qfr;H?geFs&Z8*!|Nj_yzVrpU9M-xGhH2&78qwjAB-*}Ea^
z*xPa`vBPS(K8?(udCp|;y%F9ZE4!BC&<r9d3d@XfRBZh5RKr5{8A2aSPq0J>G~~_d
zmS}YXF(6KddgHQeLn>AiwE87^qJx<x+3Ci&(qWuh{4wLX=c?*^#!x+a!|P|;C6d!1
zv)u7NYeI#|-FtnzVs|yfqtf-DXHTl;r3+ESIenvy2lLK@x7Mnl3=9t(Tw2&qr~TQ~
zMYvok0t~g<-bYI#c?=b$hI(Tz+Zj6+s-=ND2hjCI&U<vdmL#dxqac1ev1v98*~kEy
zI%T;#bh0l;|Kjs^$zjRDn{aQ8>!ZCkE{Z*QQ>^A<Y@WHL<&_L#vd00*u?aDH6*T5G
zz~|zuP<aZ7p)mQ$f98cn690r6Uw<`}$m8qh_qiznLG;3e412uS7NJCS(AZovA=P9V
z^*Gp5Q6eP`nT`|!%C_2iwy(vtxsYK)%b}>~oWxXOQN3=SoL!l#I@_b1?os`2L*5lE
zy9=IqiBGc!sKJ^j%%!+bwkZpvft)ytN}x&b3p5i&BX$%RWgxg~JISgTN0Us1+cfq`
zYSUrNjDGve2_7p29am&7HwRB6z{R!PiGB#WD;gdT9S#kgCbMcREG#)k6{#8CUxy=G
zN6+To-G>)Eygf%{6qW3Np3)zGpzD?Ud=P{}k@g(<QTC#WangD{!=<k#A9c$xmZ`bk
zNM6Fc(R+_jy}$!+8Q2fACHb@KwU+>CUGI$t#!i-}Eg=TwElAiW$cvIS%2yl6)u>-|
zQ=z(pvmPs5l36cd*fXSbWEAI8sO>$Go|mP=ymXvfpjA&opvQ%}>?D`@wqm<?M!7H$
z39<$Y@ZYF+(Cxj8WXLM9jGsa1RQ*>JLi73;GV-_-ZzRh=jj7;OmotS)f3v|uf_zX@
zsV8TPgS50mSowz0H4%k48;Ag*t*j)Ot195C&zo2nsvf)If-B!7Qr7u;T&99rE+?x$
zn@7LcCC^+iE{EBQ2nAxKXCyXEc}^E6Mu!U`6+}SEMx#zSom0@xvIjl2371B!IxjXV
z993qa#oLQp=J+YzQ)~vF%2Iss6&HnepqKjl2L3!D9cks~0m}uHxE}oM%@EO0(Z5u;
z+<S-NqFU9qvYvAJmoSMd;Q`-L+nQpHR)Y<#FA0A_%W6|y7F~N8(i<O4GskE3MPMT~
zH;TRLNB3!;aZoo_Ae_`V{wXwFa>``Wj_jqK`IK|iRm#~Ek4I*xqABL{pEN%x)|z*k
zgHg4%1|>)3_m8K!+Kj03OY}L-F<YEuSgjkEdgFN5%nBv18{}Fk9DjBBbNY7gyF$s#
z!nek*I!T3P4Hg@(b2adRd@GAF%bA-x=xzWUToWooKi$vbytv{D#PG9+r>5X4E8s<h
zhz?|u`cBgX9c1&7##y547%gP+TPzF*^i<DT8PbMM($PFz<|QqmsvV~wTg@d&s+S9R
z-Ww~o0^4Zvzf*IxSXq1smQsyX66Oag^7bm(H-iq28JbRP(4@4-@?;Znt=)iZ(XB6Q
zwp#OZPieU6OVBdku$y_RbYJhHurdd`p7ZiLV&~j<AN=`0`POCN<Y~LfmG%aR*@P)6
zoD7Ye?5NjUC{TIHWktlY-8V*$_};f1bf5pOny=5cEa_EylMobvif;A(Bn%tzxjesE
zD>C|~4w>9F4)VBjez;|!ZBm_-pr0+0AbbB6-MPg%Xt4%6`0}+`@ko_UzjbjkON}yH
z`@p^MwgO<b^!a<1l_@V){po$LC&Hj>ss|YYp<J{16j;@4S2GK-WH%n7-+qR0n>DRU
z6gJyv6)Y0#i~fw|c?DB&>K?tEUej52PXHmc_BQ+HBeG7$*?YYD8oRqE0;b+vTJHy+
zBsT{~Wj}e`w<9_9Xy+%OQQ7%<-*dlYX}g)^mQKqBH6|735}ga8_q%B3{+>z*08?`2
z;7UT%i-@F?DFhrWX9cyae<~9Tt4o;q;RHX!b@n2EI?l)gP`}LR?gw@KDxoj}s);r~
zly5V)3QtRDBeyt*MaV1eRZh7WuiR~w@Q*gGx9`VVqYEPDp!CnapJ15pCTwr40GdS9
zn)V7Ue_8Xp={LEIwZ=iuTnSBqs**w-hw-}?fccwGpOdgW5rHO2P0B9?3XUv36V-F*
z<}K~84q?u+%x@sPdw=fY)HX;Kv{aNvEyg+U!mGnC@n0iVXPiLB2_zCj!rx&_jSD45
zL`K-w2DisX@SOEUV$924Po=ob{L|mPd*{bgt!(7Olvxe6uF4KF<?+5Bb0$(UE9@rO
z2n>)2FAUOl-hg;!d+wK=jx+m#VgGH4KC`x}Eql)uf|Rvv+NIUCADpi=IxD7QEue~=
zN%5s3<I~+11(t8NC7Az}y|)_L&C;$PdLM4X`{O%#e#QB46i<YgrcQg<GW4aVlh*RF
zHVacD!X~CH;3nu$Y-t9lLlK~k$DhuHde-^^F6}=iIm-Gzby7FINfO^;`+FG{yKd=Q
zy1tyMTL#c*qx6fKR2Z7J)p&U1S;O_sFRzG_@u<oXwsoInvuchryg0GMC)lRQ8uJyn
z0H1+TDb90e^4km$IV~@N1pjHYMD4R<UMQ^V@^_@ZzEV@na{AnKjWpCnY;$W%YhW&g
zTQ2M=sYk6@DrqZtPHNm^6?K`-?hjXgZ8~(ioqBIpn)RasdGG<^%}=LPm4MFHioyp;
zdN<70lLQ&##$WW`8Ah1Yh5k10Gs0W8;Cigqf7vB~r{J(XI>Qj9t7?=XuoPzZ2{l4$
z;QEe*1B<2;AAa<uWciB|M7Q?Y-8T!8C_7S9qikIKX5H;JVjSkllAa`=f9W5UWi@Iy
z&rly5&v(Z$Hk>oD+h+oZlcK>US!H{1W4r10h<wb0w0HcgPh-PG|44SpsN)@t1YN2<
zsa`pk6jb7ei9NZBM%iQi!lhqc1iKi42qi)GE+|*n%o1lp!}_>8SG@AHRV^U3x~ze;
z{SWfH-nggL9wCDfp5YSTe@F-T9KU=Z3`%PTKObb+`r4au<V48ME1DR2FzhmnNHXM5
z*f=E{sbQg#0h27=jSmZ7pAoQUWyoGbYzNXN=`{Vd2(%E;Npcfe#);lS6nu7`cmkbu
z$u7V6@qfvSWIhDl5y~WQO&HkQDraKXk~KLfSI;Nu1h|%ty>!}MODlV**G<3QbRj}i
zj(}$7nG$v(;=PdVH=a*~L43StTFIfNtZAnk*>GdZvmL-*o@riM<E~V6SN&SiFDj=x
zJU;aii;<Q6MgWn!5z)W@_{M7$2bqeJSxMtH$-^>#>W*FgJXOYyyiRw(4TYG{b63K*
zw3ZmX4RHq9AwnE9jItm4>)}e9*-LV99B|WXEg|jC>4W*f;dvsaJa(uPK10_m-ml-M
zVzPu{g+AyKRNhVJf3KvzJ`bJi$#~-Q;T%TL*-qIQGXm}9Z#i8ZfUneyf4eEH@uRjo
zGs(#vr@~C8N^D|#e3%%IA&(`|Wkc@HXW9M-Y~++$1WA{I`Cq3dmLlp3^!Fww3RI11
z?A@g`vrLccb07K>vNxQeMrlxJhD+mPyZ(5Qrk>-@Y6q;E%(Lt`g`|nI>I@B`G+;+J
zPm;BBb9bxrv5<hIoc@-W-_xSy+;XM4FONUd`leQk**Uzr6?<9HoJuT;ahusja!Yt)
zzPCzyivG!LgVl(fN>#7Ce=hqNGCA?T-0u~*tee|GZe98AV|rh(zUt*CR2kc^#1y=a
zVd^sgXZx4=bIT%lYA8mPe?FdZmqUv=C5N6#^*wOO^zV>FQ3+^UHMT@)%CNoGko!r|
zkwkTK0gqI*qkGjg@UgzIJC8wvDqD)02)BXWb#PvApNQ1UZWOD+`c~+~fPXNrRpU?E
z!HYQr*`{deGjk{LE%w%^P4dCdSNGZ*pFEm&BdFZ}wa{3l)$4DtmEltNQ%P_xHQlY^
z8bcrp`m9HU)_P<`stQGW^%s*|(%g0k_Mdk;LWH6!351@A&~j?SnNh5cgucqs?$2L@
z^ZNDpIq^E-N4-1OVGlOSM*y{DA{>`9L(d5U)s61VOq8#F8=s)QY&&xtkN#8-rz~SS
zD!%aEX8Rb>uQiEetG<Y|OF3jlbWhjVIj$<PVZ5#vzlF$<u~BD7F~SRWHDCDrQa}sO
z0!t(Bm#|f%@OOkJP`9PM@!Abmdd%~dr;BGk9`hfQ>n`TuD<hW`O}s^Bpsbo5pRp}R
zwS%a3`P?60lUq^WJ}(W|hQHVg!82awo7T_bNCjJl&JI=;i!#DPTK|*Rg@bzXd`xDX
z#~ZKIHH)7=kxeFH=NoRt74&+B9ZWt`?sMCqi*OrqwQsKx@qo-GS#`Z}J==+)mN$$$
zM_FR6aXoi^+!oe?Nv{_&v?z!4Pf`@MXG~87FbXsEnV-dr=~c%<7*->x=6C3F*OPYD
z-SWz&V_C@Y#Xz40Kl!=sCcV6Ag|=&SG~48;C*h0SOD$<p`Z{5EZ?hInP%(Ye47kcL
z5xBx?%+{Pq`&~PHS`$6;O<R90>&lc`{#YStB+wMi=iL;x$|ouE=Gn+XgsXGQI(e`x
z`cj{OpjKlaDB^|hf5xFX0P2)QB#E@_vS``m+%+__*SsdieUC>n3dC<&vEu+v?bChq
z;bg;RvspK;F;FqMz&5>Tqwfy6kT2>T?l|`Z206C83&v(j-@v`yhITFU#mYww)tQ`t
zOavh8OUp07ZH<GaG<nQY<swr(r;{2)o<_N?0LSidWc6bO&)VtPARw`Aub9<hSuPP=
zs_2YBjJAW^26!Fw#0p>mUJOtp>ssRlxI)Ii<9Mdy49rOfx_!I5h<gGdRva4r$QxtC
z@ml#WdpdqmeP9qB3-g|_tJbg+Fpxy>&4S!c8~t!k?+CPQT}4?7J=Hfgq|^O(l9s&z
zcCpqz;%eJRH#m7m4p*P!t*WYOs21arkdR36juN24M7L&kbJ>C?=r<<JA#j~3qI|`C
z;*ghjgC3gyi(JMxIoNIlzKs6OSjhRX<aC&52$JFm*#vuobRk(o4z@doEb`zMghd{w
z^0W+5C3)li5t>muBEfP0#vAv3i0K7N5bz?xp?!)UPPqeKk{2l`xIwv>o}bTAaW7Z^
zSfZCk=*p&Vu&Y`SS!BT6W~uOen#duUC^Q1D4o%g`XNNDLcq#eGS8%EgO28c)sQ)UM
z>%x|+^ui0}(#n3PxYhJkkp3A`o-PS=!4!P1Kibb6{_h)S!3f5Fo#=O49X15+99CWB
zJ?913!^Ph-|8KTBhg3dps5Wo#QGYn%IbjjsE;M}^Bj|wAE`4;5vq)$@^Gz$H0^^6(
z+r|YT^*MP~ixCrEY|#Jz$Wtho3boC163+9Tf6i5h{Eg&J%=l!~c4J?@fx(}>f%qPd
zm~xxU0r$uZ(B_KQ5Aa#sLEJqJ{zQK$7S0_$?$ZjO?ST%k0MSF7)+0T7I$->eAeQN6
z-c{y9A&MxQq1N~d?<U~l&rV-*I()?6Y5qvFk3NZe-pR>4ff-1gk>K{hiO`W^gdXP!
zEaVjYJ6E8;cq~qYM{<%c$>1Bz15s?ezq@S=P$DucM6eF}Pq>2xve6RBoPf~a_NZeX
z`AV^YAb9A0a_Lc^fai}dv%Ff`DgWeHf1@?Z$s38TR*FG&pwuB#yhDg{I5sXs5c?<2
zYGj9hJ(mE%Y<#ilWnxS~YaNpDNNurrSu|L{{~i62o5b^o%^C0;jY}u+<m1Bt<-|es
zsMeF<ZO+BdZ~6z>1I)azOK#<rg$DHMd>S%?Pb-uwnMH8;W`+phw!iOQ06RZ&jE|xl
z9zmkmivU&c(>yoDBP6qK>=hRZuw$ZmPh{|8e}b&qaQ7|F5%t6$SoNvq4_R;z|0Fbp
zd%??GjRdFjg=E(CKcPYYIFEmd<SLh<m}^G<gMfuS%cAr$cUSQIWBv2*Pk}EH%y0w1
z@9-L6wZ``L+|fg{Ai+ELPZGQ%*Qei}?;|gS+}#?>WAW#BoB#g(tu`>r1)y1JD>Uoz
z4;IKoOs?Wi{Pk&MMBM4(pl!O*@BO=zmn0(;EIRgiw2dKf@&y0><Xy*}r7E?ZlI61g
za-HkRrX_(=E|;WoTe~gr(~JR9G;RpIh(z5W-ia2l5*wuX-+9R+H!ADSBcru1x8y0w
z$eJHsxQ&=BoWfbXv6(|WO~~};U^)`;9;RG~XE25W2O_I<`I!5gAZz;gHvWYekvgN7
z%TpsC7h0?r{!j_pBCVjbKiI+{RAX3bjrQU`ckd`TTfptDkizT^C0%AQ$>UOQ8(QCC
z-kW{fNa)oBWzimnUg}wR9LKRJ?t-0ZM55oD!ze!HiBAvNxk?2#gIoIe`nGka$+ZF9
zqkw!m`m8v}cO-4wymeFi==v1Fu42fHP1f!3L({MV3IaVEV*6&obLn?}%6G)kjqlsP
zrh1r`{l|`O5Yl&u|2iE0Ii#^|R9G<4yVa)uJ`(?U0oY&gWV=%wotMWB#)1!|q^1A<
z7%*nOXVOTCdwCRk$r9K=!|^F)((vUj10M}bnSYnIhek)Oz(@Vh(f^-i+oP+mt<rhD
z6%;nY*NfWE0iC=BnQVcCBL;5pXs|#t;X88$N<x!o|2qks@HPH3a0L+uQ(%ij8iK=r
zo@lW(e6{~@E8e@UcrzL5Uvc5uYZ6tVCvHmKA~Y3%Fpa!ae-wb{keQ-5ng0<j5%@Ws
z_jG~_bMcAPhy8^{K9+{oS&`E%xI&yKj5YKU9$s4Akajnop1|h>>_)cZlgeTAKNZ6w
zMx3%Tk%>L*Ml*&TG>G^Q8O{2^-iv=<Q$Sk<>{i4dtRVI!!Qng5C_4d`I2iZn?%i70
zKoARVsy+SvQKOuV@z^MVk8EKU?2~jw=ifQPZVR-ch06Si!`zEB7{&~eB8MMnmK#Hd
z!5vB_<6l4K(km(|dQkn{zn(K8E%QIjtBj%jw=i=Xo(qJ)HoNq<*X&cVJJ|3N#cAVK
zOqg&CuKl%hAU4YTp~!EQ7XGETMlI(N&XWr`R?W_uau!J_x8*ZKQ8Y;Rw{JMh2|qn$
zU-Cs1#Kv*esw>#2joC-|7~6<WFpA^LVKqXd3x*Zw5gTxp`F2Xy@V6u)BZiD~h?A4v
zvcUVihPLZ3$`7bLJKLm>d=bBj)4=Qtz<NHW097LsbQfIlLgmg`x?dq@ov^0)cyyhQ
z!0k8fbH4HLZ3y{22^}r~;nyJcpIt3qjdH$?{{iRlU#$O*v@?a#5O@Ke<M>B)5Q3Q_
zv=8Hq1jlROHfQwT7rG98Ioe?4$1@khe^jjy&Wa(?Mh<Qr1Bj~`1<X^7hYHR9Z-#C;
zR-SHIcWWG9M{A<+{|sGFkzRq~x;6UhC7ozg8p*>qT<6o)p8xZh^51!E0u^W|(!V5y
z5HjQEzrRKWLu&z4ZAIk22r=ufxo>i;ff{78_n@Brn^5Y!hgobnc)z^7TwX~j<RT>{
zUb-Xp|5x=iSk@U^TIO^*&>q#sPsO?ristzQi_fcY#EbF&_RxY;Cha^Yh<C~DVf8qo
z9frmBV`uR2jDAn(&;RT-bkr8q!%4Yw=G;*=s(j(MSE&(!m(q5yZztrK3_hheMMDVp
zHrcZemk(<Ic@{Y0FdoXfa`@*bC}BU{J@*Ls<4=Pkr9bO_c2=%u#C#8(SX+ai?%}EH
zbcXh$`L%jNe{t`TMeO`pG2g5XMt9!B6FkAj?N?G3%4jp8wt)khUBPb(V#f~uC#N|4
z&!Mp~>qrg*5pfmeqxTd`-b0E6uD`H-^)e5B6fhZ!TZV|m5)fP39-UXej${;?@WHiZ
ze#_Z{LA9^|q7?)Mp!218aQ@2ePBS#*R%q+MFX3*m$S+eau<f)1|C^%1>LvSz*XoId
zP+k7_cZUC2x`oxtSM~JtU~5Jr2trS(2|*PTiy;o$puY~AL#v=jV16p3EZjIsq{lp8
zpT*Wtrlg`ev}1)8-laeT*6F&6MS`?*c7I(BKobHhMYwo*HUHdE{#&=%e{GAOVtJCV
zYDP22MMgge7HB}|vUO?;@`xH^<PVDLHf@uH2qZ&88o)yu?*FwzZ|$?+u&z^j6^^-c
zOG`ZKGaY;YXZ)io{}=m(!#mjYD%{WKPt;Q$M)o-$u}s0%d2}{{%pjh&dKV(k5;1wr
z-&7`e+p5elc}#_f7*hv9AOE17fG#)Ghfe<J^#oub6?w@jTLKeZJdR(&$v*jL*ypog
z65J&-C0BV4dB&dha2qbItnh(FywbmvC%^ev1_lO3!yY2UEBPh}?jOF?wWB4?Dtb}W
z1Gk%_4s3fme8bNO$CQ<M4Ftr1RRJ0T70zqqF!RKQ3#gMbM1Jn@jz0#g(T9*BcF<eq
z(}&lY7chRw)a?k*9DZ*xnp+sOr%~cw=5$P$=*!9V3WVwt2*v9OGM|m&OFMdXe#jkl
zX1H{}nLC5^V}L63a$BP?W%xJ7#_+ZO;i8;ehYcH#>tQQF3~bKf5)>qpUjmVi#c9a)
z|Hf6BU;OvG&^r(J2v#|U>92^C4S%`{B^xQ!cOh2Vpe_B2Sb-7_3amEJ+y40sunG2u
zP9TUcs1*J<S^6DC0n}jB6_enwe4_%tCEYUHv=~0?#2Dcx#l9JRF?5;6CS_(lmkTTh
zuW^ATD0pV?{%$Ia++%+jR{ix$9?=i@W?}W8g0;0ZeLDVsp2Y#s1h)^~-xY@ceWl6&
z`&ISR+DQhaj$qFH0Qvj>*XG`^0D>1P=0Asi(X6gSFf~~EIQ%_q=|nhCXcj(LYV!v1
zIO+<hCnSb}faXA@a{&UR4BqplYy}>B<o}&a3(h~Sy;*8)U>}29XG@16shog4e?0Qy
zyf+063(!Makn}^rjDK`+XdwOIb}K(WLbcc!!Fbv>)ym;T@{s1pxAP6&6c2|9wPyge
zUE1YM_c<&kLKOkMEHG}n28*I({w32w6WHaN_WR|Bc#r&_fN;kI<u@-={Hq=Pe^Qsg
za>v$&Aaca9A2{icGc@wkHgHswM=PD}dMzA5@5l+REO+D<h80b&$WF<S|1G`&2L0ec
zDPF`zVl6=nX)*Ng>lk|)PXW8>=mf{1CS9O_{;g;Ie>t^l*fqk6S(0-0-?A$qlOV#4
ze{F{+fWXmx|LisTI~@JTeh`Xyz^Zb@xzw@5c}}FdXcMY0bbX`1Tmw_|-|TAOQY=NT
zEdU9QnnPEd->|p!zi0=MVuN_xdr_OAl@^L)=#tG?FplJW=OM7Ht}2flU4{3spQGxY
z1D5k5$Um9vezv@rJUFtSed<L=+vy1Z&q`X+^QiqqFgGzWFU}*>ZV9T`q2S82lYqIM
zGqFjqp(QO@dQtfkK4QR_PcL$bAH$4AwkaS04KQZ-W-wU~FM321j6lbQKxT_YP#nZx
z#1hbf|M!|eRO2GZb^@gaP4A&zUxxP2QsD0|LPAqqPr{SvV)aTqZzw-+{IhZJUp{?L
zOfbebOjk@WKhLgv;cttH|MH3ko`V4}B*gvG5M&+>ZqW3VQAGa)Zd6+Kt>X0vv`hiP
z^cRaFK-XPSF&y|`3;eI*cY)ny466#Kp^CnWd;ZGz@4bR*fO1^<b1U<YhVZXz`hOnL
z{0ZS}+udGo>+Do^tgE#JgogLl3)1%{5uCna_n-znpPq$%^jLJ{;ZW9#*&Hcd!QTvD
zEjnEm5Aa5kmPw({-q-(VXH9%mC;#ryc=#8#FC<ZZt1fwZY*d@_f*3jbB)+9OO8o*x
z&}kGDHto74keUqJhhG>7w6#3OcGswpaw05vv1i>J;9bc#Q9%@UkbPq`M5>y!twERX
z>4DJTttv9VGNPK(3>;(5+idq+gWf9dk(t#=x$;f$1qA&A(9<{xlTc*E$6Zxqg88NO
zU#suMbK>aR;$D;DU+t21ber5eWJz@&c_Ma!$2^U(2b|4cFIWSB7t`vHYrOXuxcJ6v
zS@Mo_ORerp61K)Q&Cx&xu%I`te~B)ckK$qW$#IX!f7v^Cm9N`<X}6uHdw*l7AmM8N
z4t`(2Rp-2(8oQuEt4sZkOf_~Xt~0KAR_GK>p+Vo5vs-<qQBC>mJBMlIm}l^1u!QT6
z+5|tRZlK+uyEZH(r-Bh8+{#JT%;JxFa<125Oro;Grc;>7uz$H(=7CbdSrVh#DZ0#4
zBmB?%Rw3uGo*b8=8Y1%7uewe~<iB>#liaMvkI>)TPcT0$r-p~<Y6edB#+#h$Ee>le
zAyRo~G11c=aSM8kO%{aW8|W3rJ4YYpICkn;Y!zjNkU}SExM+sY{C+<XlB#dK&G-H3
z@>)H%RCVFDuxL~0g77iMJzqL*nsZe{{^kpYLg--+wC!%L6hG;dP}DgGij$Q2TYLCZ
zf;Owh+m@+&<~A~{jIUO7_2D<7N?px-T*c5qG9~EOINBUNVdaw?yJ<Ck;kSo4jy;>`
zfynQ4k1;b`%1jqC>q^O;Y~1gXHV->y_m~?8p9{P2=~MUW;x8^1>*;(|O<;SLwfL^D
zBbHKCxt;5a)Fo%0DE+?eA32?Oe{fis(+5|s@e<ozy7K91w87qG?vJmD+Jg!WDixq;
zDvlkq*lE09zvZ+%QZ`ko`CZ1U-|^EfOWV+$phpkstuMus-y^aPA1ZR$kjK8t_mQsS
z7tcq5PthQ?S=1uuEuPA|I;|Wee(&sI(x>74@VoT%2!@@-lwLPqrEO37Wo}>p1>wGl
z{+>mln+a$CarepxU$8v;c{Wrm>hdDL3eQrd(2Ri~6%A(@&g?JQCuOuM8;C7k8;}bR
z7lQz-$W<n;^5p0~sm1&Y2w(_PZ9Zold4{l-<w!xHET=5ly86_g8B-NIGb70*r6F8j
zccA_l>XT;w6ZXU?D{Qga5V4^E+&w%O09K-pAcASFV|~8|hZ7NC4I8L)4GvJx_0@Cj
z%V45cFTOm1;Y@VvN$pMfwg6uD|H0mO$5Y+E|3{KNQYpKLqEvLua3qu>vmzs9MMhR3
z<5WgvRQ8Gprz3>yl@?jqdzGwXmKhy>*ZX~{`@Z{pzMs$c_xSz$`^SCDIq&ytUgLRP
z*YgsR#TDX(59__YqHU&pI(~9s3o$_m_obC1(_i?O_2bsBP&fl%!mcgnp}($vj@ili
zOV=uIZBj&e%6CeT@td(R{bL`QmDNG=9`OI=2X;c$knnR>Ct-&d8*veA4o=!cbCqwt
z)Ey?Z*M^{<6jg6t8k-%H9#xz0X)e&dkbA%+L>7dkQ<m)QNB&b(nT3kQiLU81RaYg|
zqk~7R4^Veh(g4}5AVOqz=X06^y3|ovB;$m5^N@MX)?CQ*IKvk|dtU|mcn5WFF~zex
zUJyRBiv>OeaS)n>Bq2H6@~n4d;&tNFb+ru_DQTXYRP;|m?RMQ08H&k$ThP-Y7_mX7
z?(Mgw1Kf&jkr(|topxP6p`;!3nl<3#p0vSn?sJ8L0)v!ngT5*_&X7h~zgJL~ZP))T
zq9E#L7&-VE`?jBcd>}MwvN(Dy>+wIqwKDfhlm$Q)&f;}K*=d)5E5}>0XqvbFoHjVF
z11idp`qiz(w-2AQ!_cEphmPtq)nl)fD|0|PN1=#e@`XbIwoKGkG_ofSLH@HrBW6r(
zDtGsMbJ`{3^MW2H0dS!$o7FuWmJatcQH}ZQ))QQ}unknerKO)sTI}J+kJRm4`YG&o
zBjV+N*vA`_G=ys$>R0*k5)_PVz_R9ZJg)FX_wItipL3x)18~T);BGAW{bp3h48!Q4
z_tL>#|8t(i-#vV8m5b0HJT*PNb{=@{pakr{g391vZ02(C(a*Ei`x9C)D!q}t6CgGq
z)WJa4!TmBLiJuXqf$7&hCg-sJf&~Ef{J#zSF;m9ys-3hi0yD73Ds~-??jma=X1V#)
z!D#5PQ4QWxi0mG+#kzy4e5q_E8tL2YHa8LteVsS6ZUtY^0&x)a^RGrnAHM7tt>EqT
zOWD)>fhNxK9RhTeUvU%)0vJX@&W8{$(&3J=Hs+<-G^TWvO}mT$Ym{@zO*FW?fl3Cl
z6VU9ltiJypvLPD?9)&+<Z`(d-F4uMp!(K@aQ#1YSfb`4Ii&Dyvt=a={+RK!4>c`3+
zlnMJDrlLcV+4&fiqqvdcx}lgbx@`gQI2^})O>}voe0S_n(M8)>u#xhgFqQC$etFU3
zKPD3kAF?bsm`9H!cNE$xnZ=)hLk*o->@i4}ESZYWKSPRqv);&A_TIMruaI~00&~uV
zkw}PmE9CortMfI)kL>PbLL%O#jA|})(7z+^kh*dr_X&8UwW2>*^Vct`Kl^EL@Wq48
zBNO>D`JWcY%l)<reu>?#Ure`G`*K;7u(W-F&RsIn-O&bqFf~l6#IB?d@MM>7FEJRh
zXX5<NFrus+n<_s7=l$7ii>^l-oX0L_vR#S|r4kq`d<nGu9O@f1(Cd5=LP{gRba(B@
zRVygA#uR{(^q@=K3Id)F4?Yzl8$k;5i6Dmh^KAde%m708l0gWZAnnz>wzKB7V5sqp
zg<W|;f%`#Q3IwT=P_uEYh;L!L5S+s)$f)V&X$Yf4g*l7Woj)3-1x1a-u)}he!We%N
zG{b|T^9EUQ!yeJ|eWs*ThuEU+7B0xnL&Y^0d}4$~QnLfT#dw=6_m2Scc-~2eD8<B0
zVCP)ez{<{I^$4r2iBKz88bcoi!7@|mUqcQPh#-;q{3N^O6A+4sY1lKTg(SdL)8HbI
z7wyxCQ%r<L(gem&yTz#t0C3Gqk-A3y@+JY6&q+}K+EsC4zgl>_p6cjv&8e}G6h{hh
z7a0q$(PRwKT4_n$l@UGcPvEZ<qUu&11cE6HQ1E!ZEdC03<SJdN0C^aQLMC(Lrkwv*
z5c?$a-!ns?PiEV(BW#MUd*OeWY|j()p#Lqw*IM+}Pj7V#1lpS}eSoyb(4ucOA{Wm0
zG&liAx|?mcZV+6WSOS9zP|rbIQawhc?~4T$wnyR=k>*=skkTK*{z)j|W-}nrVV+R4
zQd_#(H<JlHdw74mVqj*D1(MR^>8I6x$Uy3Ny+qGdo$$zw8GB}-^I8nTEY#~*dte0k
zurXjE^%QRU0ZF@JKmZg{1i#j;|M<8b<<uaZmKpSIz_~?z1n4)#RxUp+DUn^_xd|lE
z))zSaMe#tjUU39^4<)++5#;U&`<p|YXz!<vVVVAuMz!C*hbH_Gs1cq<_(OE+W4`k|
zmHB*8iAvy5KP0AW&5T2xLJve1E*g3mIV}x%dpq|b<z^#LRl?JuQh~H)m4up+05|CN
zT;J2bD1l_-DGy{Xmt8SbeZ2E0tcL80?;dbwI#cgc&VU!tY3eS(OqsCSJF2$Gsr9$c
zU~1To_*KgRDhca;=QV_?(HXei!zXWFH{u2UUvFvocSz5y_WFe3FOZ8Na`Q1z#p+og
zJxHpDcIY22Fx&Km@PlhsXci$Qa|R<S%oQd=w<u#QqF<RcKYqr$_y|z1({WC9oI(<5
z_kEXhuzmh5<B;yhQu9&g`Xa}Lt_El5xe=)$_32KtH#B_f*pFmkW(1t6yBS&&)X_y}
zwM{hkJCCl^#k75t2(=ed<RVgrG()mVK&u}WWCgOEr}8=?CJ*H{^R9j3J=-~Mm@(fo
zu$J%qmQ_{v%NZ{rFVcibhnW5MYa|~0%~i*Y;qssSQP92t!NJ<~vo9%+Bb8-c>}yD?
zg)6e5$GzW-*=O%gTa@3lEm}9Iv*>}1&!<CFj)E-S0k&z0?no)J!WzOUNo=2m?lEuI
zEobarJtq+5g4fMY9IN-S3HW@&boRj4O=612HwhH$#@I*RE#Ce#$>9`F%Y`u0L<`n^
zVFpP-!<<CTXv&&0TAV$eFGfu``L-0t-P9{*%bz+;ugn;fcp6(KKwpB~Mfm~J(L<w?
zW$pbop6${b=VD#j8&;l_aNr1j)RkVN)C5~wTm33#nX$y@qa4Sw_0GMjv#S+j3O@4;
zy8xQ?!1*lQM1pB>^w$`@gG)ux`y!iONK9vxQ0j>fwa!$0uXk;-i-~^fCCwdbO+~P1
zzkYUkJA<x>C->H=4V#bALuK1F{mZrkw^JzF5a{tDcg6)_O%gQfi_kk$Q8nU~fHd|P
z%U3;MwXtnvWCWj<BoE!0eKt_u-NNTjt6&KE2Qf?Q>~klpX2UOuo=oHrchJ`O$SJ*#
zUs<+A@S?X>dsZHU?m~f;yZRjKn&?Rv>UdjA45eP#Af#~)C5TIE#)GE!o`m}K+3vL5
zf?MJom<N~AGkqAHTlH@<%8sN^>dj@FWf`uok;-=3ujoyL)9KcWQLYs!EERn;uiL_6
ztXfy)Ri@vvSnN8P-cvi7>vArUAnstRfH`s?@5+XZisi;{Z8W+I1{`tS`tK^SdLeq>
zWE$PCHc<giGZvJ+=BVg>#ms~kdC_4{dzGRFjkas_ULpIp;fWpwAG12>8mK#F(ii2P
zo7m*B;@j0?CXo5l6bdF*obS+&T=BNfZ>AKbCaic1aVt0}5t@Up|JW;;BDO9boRjUU
zx>b(2TeHZg-?aCp_qGz_c-kAn3i<&tBmP(2R>srb)oCuYSG8j1yw>ItEZbU34j`>-
z^jEL0o=zlYWEGgcp|)piMVh8Trv?8L1LdB%HJcMkq2o-F&4L`TKIH)0NG+tq@r_*0
z@Jdim6ow9(ndjc!DaZk{)gwe5KbkRP`EdcrTmv!-2(h%hU*JOI_<t)WV)E+WbuR!p
zs<N}^iIOSE2p2P}W|?^wtm@%WGRBeM%6Y`<L5J&VX<SxYVLR~ik7a}uLTu@Kqrbes
zW}X{~lR$&e3{A5hsUKdiuAT8(l5KJ)TFYx3T;XUb_nf`WJD)Li^7M_-c%6<6%%{MD
zo;wE}mp}RU>~=G;<(4~i?elSaPYt$qH=CJ}Z4$E1wyI@pskFE+Q21KQ!EaQ14SIMi
z5~BB5y{&sB;(U)V)v{#baDSDQ*3q0N>|%7OL^M@jOc6ka6BRqlJeDGWb-&izdklv(
zAVr8c+%6|wXKBNTxp)qxrV8$qzgdVdihGfIH;*LuT#8tJ0NCZd;P7(0V5!eEhbiKa
z&UP2Xoq)_qzil!qVorwUF?K)+;@hmA1D!M6l|w7%2QC+@JnwS_mW3vT;&GX*k;uJ^
z59Y_3l#u>(q9!w5zHytivQl`8<;P(ZyFi1*FmTJXdN320YP4e`JfRAB_*vTHM^H<0
zftq2*V9=BDrMiRRPoXQhep^P#^h-NII1Yy!gm>IXvPrYn$E-ko-e_do%)%lU?(>=w
zseB$G-TvqdnPQGz=8<m0FM|+S^+V>G+zT80h}XxE3HLZ)6ogLK6jYCd4%v1expL*o
z*XehMy?!izad|boU6yBAX5gp=UPj^gkwfn&Q!KXVmGl%%%tg;ktewUWUUs_t<oT)1
zA;a?RU0Mj(C7gA}d3$dNv|YM2%G~gju0a!Hnq{(<8f#Z0n`R``*ju=FCaUi%&`eY-
zR6$EDosyn)!bH``qGH0t)suaQ3b<8OUmyCz>5eDMf<dwIbJ|$?d{xT_(GM~%>b&d+
z`@`tP<EfHN)eBd1#XDbKeYTTOv@jJ@l45};P8_p*Mm#@iR~fR~1PY^y{MptGE&B}C
zwN0Ez_sRsPq@7(aKT(f&>Ui5GQ~a?!($Nx6{ZZ$`1UAz8kYZNH8KdGSjRtY9ex<-3
zOPFEzOz0Y|%o_1?u1xjy;#x3}xh$|WG@EaP)N71DiARmi{qpw+t;zEH{F~F~;=_%u
z=9-_X*R0P-pBU_MA6_fD^tv~&AtFDEo>Y)@mQ!5UW>38fy4EZ-LWeBscC1?_J77K^
z9RAGEw(MaPEAqn6`nV-!TY!(QD&H+^k;1Y{p{~kCrqzwtpJf#{TyhZ6Yv-wD+<uGq
zLY_IDCnGh1qpn-bm+~~{Td&V-2{aN1C$gibMiY`XY388XZKY}|Z$6#+U~yN-(aj;_
zml@M)`jQM!-QLS6=hX9X^{C{H);%1rTU+L~(v`dn38%VZaPV{@VVKUAS#JIT?}$~8
zp@BoWriVyZ8t>*B?uug{ogzPv32U}FeJQuZTs`PK-Rs%Lq`MG#xYA~<aKvt<oFO7R
zBrh|XWITKRnaPGPj}?}W*5uVO^xv}0o@&clk!!14W-=B_o(Q3?-Rg1Gg4(i;UOwC9
zGdzRZN8)&1M{8Ezk1-CMwZj*;KoFUHKG-F?v4egK>y#_>j*$|SZObaRv6!R-IP2qB
z0zZplVg!!R5YX&fYCD*^u1S3UbdYy0d!n?T%Db_>%c%lLzQjj{x}yiV-`rO4+1&A=
zH9BnMa(^^XJ$A*0FC#4yMXu6N6S^fl4wQXTn@;U$A1V@GzBP<cz1VYO%QF2o`AXP&
z9GY*GD1*{0W39QveRbn$3DX5iwaT<h!ewV)cye@)^_9AmX~n;jgoe`<o*aHrA>ONf
zKj7lc@V4k%j9i}D$$gzWcW~0kWxwlCn!#p5C7)4G3hC5@h5TAP#%c8yy=b-j`gmY<
z?=@O#f=tOg_^5SAW3y_c(ZhU|f&CH70^6Q^b6>NSm>XdAi5EA7>R)=5mD~=?@8W63
zd5L&X$a&=Qw1vFg<EBGe1IgXgq~t7{lIUT%A0z`6bw{OB7xL4y!i`T2=u{?ZS>BtF
zu`D*J$m%qiw(5ZfH&c@QQg6=ocnt+fy1UG;66BfIv`<xKcUl<1-q6>Zb-0d}Q{T_a
z<yf^%w2simzaN*{OjheO$5|XOnwV2j=JS@+MoNz}ilM(es~oK^KczM|j)oGHxvltA
z)kP=E*Ih$JCR5By9Ft=VCrZW{R6QnbOjcw1;*re!ih<{!#%Btc>tsCFzFKc4xHFPM
zeAf2mUAcI1BO7CcZAG1d|60}4g&UY#zK&Tbw#<I&9Jc^Au+{gzYL&7UY3O)&K(DMR
zaJpr1>ZA8sZpDE73(1P6Id+e!HZjdIU_T{w+{$0&99Z_VwYIK^aMBT0et{zdKU=%B
zyRgedj7lMi+fYW47eohNem695kMLCJ8ucv~g#;ho)xpy?x$+Se>zfsp6`v5yXGE3^
zIJ~-NY^`Hg*XTKHq&c}_^WsVFzFzyDin~s^^>laNTYM97S5^}<A#RDZ?7Sg9KeeP(
zmha_FEAFQ(tNEUS6E7hl5utS-D##GyfRDxBCmm<a<vAR;&cde|w5$d{Xim6L?WWmE
zpSL?Mr<M>-O&CelbsNoBxHEBDsH98(JO|ab?mG2LKQwpkz*AdZAFr}?lH}Zuv{@_i
z7k1}~t9Jv8-D8x7dQW9yP7*zpaJbWf_%`c};fG8xuiDiuzjl2?n%tIo7I|MOOrMY?
zI#RvMH=rV{6nA77z<nJ_m3bGlwbsjtO?YM{uRPZ}1#a8Dx<2PAle`ue`6*X$3*1-d
z%k(GN=_Ou-h-l{7?px67Sjihum^_Ga_DiR(>K2kq6+fMx9qy5FOkDDIR$FhCtNO)~
zFDLe$GYVF;oQmw8{655_d)u~gXI*7<fp3HjO+?7bdCTZdFXB0;me=^Sr@!q-hr|B6
zG#S^o7(nQM$$Mvsv*B`#UATZ|kQ9$-`9pr!yF?lR6}})Gf$=y*)eIcCCIbaXyJU9Y
ziK^XG%$*!qYDq@Qsch@|ORYQOBpRDo&Rh0czew(AB5}OdwGkOL$SPkU44ms3)i#-m
z$*FXIXQ#V0RpHuZ0`YlP(8YwSiQK84ZNj1LSUvlQr}-Q)+j}Mt%BgMg4lifAXO!Ex
zHW(D<d=_8MG&zyfeM%t=*7}-#59>&}(Og&mD`My6kOQrKjjNLwo1J*;p<9y8B~S&g
zR8a8f78M~JuLT|1S+IQSl%gYts0g1-OyNg{wo&stNk>BEI2R=aZkj>Y<-O$-^$tsh
z{Mt1KUROT?DImx1(aOW-UUh}{HXnB~=;p=6`w~nw9r}*1YPn=YdTe~$I~O^3RO(c-
z*G+>gEsY1Pi|)gQwbUiu+l_|KD)1OihBWNYjm>!3oXh-+3m~mldh8pBJ#G`Dx%^H?
z>Tv2Srgmy66CTu-mE$(YJKUy%>wU3$9Y5;(`hiqD&~L~!cP8zHxGu08iLH}Tp6E@d
z+TgmEOs;Px6kBAIQvAwYrzXxVb-4sN^_Yy$IcB=Fs7roV;RxRDG1Enou_zul$hS_P
z8pts}Rhdd$!g#K48Yn2*G_@Qy=ec*sj?ER`qmN5ko#L<9WI+e@bCJRv-*@1w&5miH
z7w3Tkg=3T%Gat@6rz*=jV>HMRqo#2O+eI0^Hh5vtmvY-JNuy%G5XwTlZF~}Wx8>tT
zMitiG>=`xW?7UyKztXbjN{J<n70RXZy0i_RDY&@fN#)qN@^9N_RAg2*7B{56bO9AA
zBO_9u*}-1=*zZ>H`&1H7%CL(?yirzT$@vD8V!J2Qn(j(*$82_tY+bq~!a3IIIzA`0
z{b<L7{XhUG`MCMP_-p!u^9(NP28>|~eiu3B@v5_a7p-kqO-b_&<G`(7Hzr2s&L-e-
zbce-6-4VtN%qxV~)I^1MLY^d5zLvJDaNGu4We_sKD=V&F(e3GebVqM@N2Kj-FSEXg
zwk#QOO%pbx>U~x_`{;Bg7E}22wqN5)d6R7KMASr-0jCiMmUOimpIKavZ{TX2;gp$n
zO?_7WF|(K`Jma~Q;oZC;Kqfg$tWJ#fPL|XY(k{{ToP%e-$y9T5d1erUX67**7WCzU
zwgdMM=(CR8OsTn37SUg=u5RlwzxqUGd}(WZ5FsS(Ov2-)1-*sUq=w>l^&ngBjI0S>
z8#hlm8z20@z~rjH4hx$?!jR<v{UVov%F>nd1e*ac8oHa7QX^>Bj-BXGZu4|6yXcUf
z#VwbXuQ)KXQryDXjzp$+#y_;~N$u!3!|=A~ng3Lp7YSU7t)s(Il4K>+K--ycZGONF
z@vHj7?qT!hquT1V`m0^UTb)}Kanm%qw-x3$dnYo=xAQhEZ2mSVzk1$MipV^$=r(Jk
zs_LJ1wZoNxM`U3c@hXW|UJw*7X6Hz}JRlKP8H>4s=dg&Cy>8dO@S0lDv?2b2;AGyB
zBTUo+?~XcqJ2~Zg?~EsHkE1XkJSIhzl~X(2?mxEhGLTx#mELc_x2DI_pux^GGkIY?
zkA11#2uE0~^*Ykav2BH;^r7}_VL#tV%LVGCuBsJ!n!eY;K^bYaR=O)Pv!cr%D~^;F
zjJ9T(dOu@*o*ZKPiu34o49|qy8)F;rINZ#gCb~x}V`t(4j(S2IZ)R}KoiON`+qO!m
z*pVG;oO`}#aBXnO{}_omA}{2&ZEi>UVn;95WvuZ@TRGLJ5?K2z*jHEJ>dd}zf=(!I
z>oeJ*9<MtBvjlL~H}mhxh}(ZT&d4659*w<DMNq?wr%g^utSY_qYTK!{(&QEY1dyuh
z7WL<whd92C2VHhn(Q%Ln<!R9;xTZ!>C%Xp?7EN9D7}~yS_-0R;R(Rk1<xfT8`j~*+
zC+P>yjZJ`CHDRK2JLu2_2laEdp$!`M5%Fui!;q)ay(`N)Dff$neSch=VVR0UD&c^A
z1@XB|{P!Kbj+Ua`Ih9#OyYBbDeBbbxA%b4MeQ1T%sbrHSX;UqDAlCvtCK|AMmJhen
zm%Bs|);?n`w~wq48B7?Podcah##i+%IlI+AKD?k3?_zSOv;DAs`OTtp8IL+}Wskk;
zJv|W)zREYsya|ACt*yF3y@YbD-Vj#0!yU>;OLxW>Wt~|ywEQe;@N#du{z@%NJV}2t
zFLTv}`k9d+R%9aR^Q82B&o-O!oK3DRoUYqE7`vRtq(;4*O_Y`{RlIDT8Lu?)>9l$G
zH5`j!(rdVj==}!*6r?@hqYhvGwr2A9seCIT1CRmeR2C-bC1Wc1p~toR#$1AGKHa7|
z=8a;Ipm|uQ-W(#K+aCK`)9`6#`haDlw`a+%y)x=?)Xp=P+9|?t8#mvhHG675P_9?@
zh}L8n9Hwxf!BKHvnm(Z9$M6ur*O*-x=d`XJeiDyx{pJ9IPy?n^hq;{iZEc<#gA|7q
z^N=Eq=ElZGbM6lMC?AzN_rOcXq_PwM6^)fNmN}bp8gdejddxK$X|c0?sRrgwQ6OJQ
zn=r~ex$4KvD=O#%#3s_p9Gsjfvi;s8L#Hl9Y?ys-2_AZc;%(}aA0Nx5w|jf?jxA4k
z^is|Dd)Py_j!+&scikzDA|bI|gy%*jXH|kYhRd!yg$R8uitZXOTd#|0yd)npUA7LM
zFkhMbf^auCso&D8>(loM53PxD56z0rXPpA?OC;Xkh_t(3>K2zAm@7Xxnb%>Q11u9j
zG3@SNzu#u3q+Oi%F>xim`2l_jvf~Si_uQ-fN`e;SB8SCw&3`O^E%JBcQS`xLC?B8Q
zLpH>Ox6=5TTI9z??Udd2_%3<zshcg{Czu2c)URwkisf^qpls{BTZH;88z{HYjtSiM
zBCULY%0dwWuj(GkHYwfpYZ~C(<%5Mx@hlkY3#Rh6*IJA?VHy_vS&D}`e40--;H-Te
zbjkOVPlbZy9P0J@<9^mX7ANy}Wa;)lqt&gQ6HYsVvz9j4RI>%Y9c#)?i2;i-gF-A4
zn-~CJXWik-uelM|i*0*8AY9Bq!AX{pG|mZNVG<#=>22>1T`0TUeS0(Ww0z=L<7h1A
zbK(}f3e{5?%58Ko5}I>E)+5x02w?D`_F#o!d$^0|I{+Uq$gh13iPQ^uoyQ6<4038s
zALDpEqN06i`YACZh->j?vBvYvP>&{M_He_ABa`3vo-=}BxPYXH=rHjvD`x+6^{2I~
z8G%^4`tgYl`mhG(*M`{zHR~l!nJM#-srWk1+i*c=(M!{>^_%V@l?_OIQrYF%)-0d0
zec@$vHh2Ttf^vCaB@?^a`Qjd@3-HIdN#fI&sdNL?u}XSnrX2v7kM_N=>B+avHoDKo
zkD3aDF|VN1(;NctVxB9`(MR2K*y{N;85J1Zow~#uq~4T#7=enM*XNv{gAja6vFq2y
z!;n}q-?z;N53eM|Ls%*OxyZz&X#~kc`gobQB|F$s%`**gjCBTK+tdmd768DXvxt_T
zZ&Wh>sEN=Vm33tR;8S)XJ_%F)N+hdvH$t~?L_$Ihlut#%X7Z{$HG1`q%MXkBk#0v?
zSrEJQmL~ehl}G#aa|CWGSE<1Uaj~!uRI`>GccI#{3!4}dV4C!B-!?u?_3>BD8OLc^
z@PgKbR{N3N#%|kGZAu8}VockuIYsTve((cl*E_8z>I;0>L<8pJ)4WFTH5V5%t8;Sj
zwe}XPmUGBOz$I>wKCQ{`7DB5VDru4ZGD^~M|MR%5IMGcsx+MZh8z{5k)9+Pk@3PVy
zxQM{J7FTR-8I3}2k;gA!<ijQSgGM7(Lfyc?zzn*6iUEi;yY2FRfO+}t9%<BbN}o(t
zzR)57gHLUGdhjG048A*{ss9L^y3}jB<80{QcTlj~t8Gi|>(hGw4jUe;?NTLDS!uHc
zFC(@0$TLx@BaPWTATP}zZDnzv2`iS*g2J<+&!xj?O%Do+rxZpgC}~<%e6V>E4ThUg
zWD6%iU$_G=*d^ZSYNxiX6+d_wXT8ZvS20nI(%tgw;OREu_=py=8#Y+F0Y{iqI=+z=
z0mO)>FG3+(bIEo4ozjj^#pQ%FgY5Ux=<;{}ye+I$@XMgqHq>-)f|GogN1h`D73*TS
zh6ar*R#p!HO1xrke?1k{=vT%~09;)71@FLJr8-L3jT}t@gICmqWR;*tkku(zeDDYl
zdE8Iw3pwz>_HY_9MED>*{$b_q%|h{A&lcI>Ffjd3&w;em=BB2HW95xE$l~ph=%96C
zjPtq3o->IY*PLO@WZxXb&@BEQmWslNS8YAco%k%tlnJCo>shQl!x9jf)<6PXiud|j
z4qy-65<cm>&|B0PM2WydK#E}~cshzNSv6$(eZ-H6W-7vZ&KR)?+U5=u(lZ}DQSz^O
zdNQ<FczW_0j8sV}k%RRC5#R!FY<*Pqz_AVFlx(2a1(jfIxyHu%L{?XsPm6M;mcf&`
z+xDl-=#w87A&akP>_6SVoC_6RWq~^mzoyrT+^@f-Jn8Zt02`PB?%RI}FiMi4ybH2P
zy-u^KF$@lC>Q(HxMiT528^-X+c66JlDb6|VAW0TqpXnEK{8GKcJ^r!I7JRDZe*T5G
zZIX^pHMDl(?tnZNG)uIyAW~gTbcc+r+J~w<(Zy>`yQmDeH-@YxK$9mgw&*8Bomte7
zO4OjLHhm=5NJaR-Q$fgkEa}*`SDKa5oSp?Ap{}xtim>|yU=?noMZ*ynzT9Ecz}HAi
z(^AshVXJ1;UFx<uhyeR<^isEyr@D^?iIfB&m<ALho`$Hy-Z%ct%qEB5KcXTW{wz4&
zTKX{bh`~#YEhPsY7+UY=01ZY;hRytDinL8I-cP>lH4vzaw-X6c;<KeJ!Vx>OFPkqA
z+pbIqk`&1U#=}k5iYeU)EC8Vq5!>~lEF_RuL4M}O^+`U>DJGZASs*eh6`!1R;ZMX7
z86|rM2h0t<@n%XJa5pyHGHn3n=mW4vkFVbr?)&Jsf$}STr-@ak2u4!!l#NZgXALS(
zkoA5A`=+_T89BSTH%$2RhTj)wQYP4)&SLDi&r@Maa~i6F>y_3rO2H=9RLVC3u?w4S
zRW!b^6w=Jwa#I-{@CKYAXjj9BMC1-yfjl9frD{f3K(_ho9^K!drbkuGvUlr9r|=K$
zH)^xna5t#t8!A_3jHgu+V?<s%k*|XF!N#txeT<BZ&8@8hQ&UrU_SlCaphj`eUfaPb
z4fv^<sVL%E(|Dzs?CKuTThH#m5ox#5IZ-om2mreO_2BD+n45GwAVG1xCcBBuClEeC
zv(Inu=3IJrk6;qQhF)oq!Z#?+ZPY_#nQ_|`Y#nQrKNho_uZ=Z;FVP0SIM&2J!*EB&
zO<)q6C`?(olOV&dH(*mFK5}DS&o?HN?}5;R)l-Zlg)vC0FLQY-D<ku*CU+w$7$~T$
zh2TmX$l_K?wt%WS42$v$EYe*F;~<W`-?D*cqxUXN3<W2FxOzr%_2?2MReI6xqpEzx
zK8$(`Bo~O&Rs(yUS|Ue$xHu~O1WsR^XwPVFZx;kJ@vFUNZ=Vq#Xh%x>(yxA?W))YK
zW)30&;bjPdAgBx9@5SQ8PeTwYX&t}4=jyD^<P8UJX^N}T<_rE8No*D^(cUXUlV>$#
zYxc#ym56MqaEz<XZoDRv#yuS7l2T7(y%{v(wN-k#`+()nVB~KBPNzRxJ4ufQ<$4cG
zrIpnWpMNVYxJX)nZbGbh1BP;Rd0WGM?n^V^td}DPA*<Q^Z2+s-m1Y4XwQl)dX7~}s
zjTfkJK!krRZOm+>K9>E{eqH4aI5ny|1%v+8k3THLE+2~x{BGLekva2WFu}2b`*}y(
zkYmGz=eE9?rWQQD@_CCTjnem~=shp#H2O-|aMpC)TPSJt%A+qCkJ>7u`owdgapaiY
zyTF=juAeYzmkk=WpNqZk);?jAsabJ<>eQ-DL%DoCGk^b5%KP^!CHJ$)DzeqrCtW&l
zZG8%822G6u6~i<#p&)uyA&^!#IWDzlB4SBLsX(%$jJj||#r4d+?YlHRaTgP<KR)I>
zHa6#&e@P*cz1yV0ZHUf@MmJaWNu$)Zo^u*wk5-F?T{ZelvH;M$6EjlYo~w$V&+RQv
ziK}(97y9b?+D3FWPn($cz~gz`lIs=|r?Y-wr_7>iE3qFHU7!C&bZw>6ojQ3W@P+qe
z`5_Z0??VR{j@Es@8ZEHaIDITOk8${F%6pNe^U_b~Kb@6Mo(;LES6{=ez;5icBD2r(
zetLcBNRH~RCDH|=oAk_7;roLPECV&2SeI73mMc3bZRAqp=~4Hy)Oox)aIL}-h=D(e
zyfYL#fD^XC$E-h8?J_hh;aBs<-^V_ps#|g!Z%Xzr*zsNb<58XP)CmqBQ^~p8H#(EF
z8{8J2UW=E_oD<B3qhhsQv-1t?XNTL+)!fIj!5LiiW##k@SL~gn-^&ON-w1QEV{e$K
zvC-&x5^JQCS5%~LaN)vwc4)=VF>OS~bo(ed4)zf8&1#mv4!t=Sm~Cdzt_iNd^6pPT
zTx8)Bz^@t9$VFD*e464QpJKF}cA;{nifZ2PEd=8G-*V(e6kg77N|y$=?vIO9ap9u*
zOFwKL!}DXxw7T@qUNGajLGiS|k4{JLW1$c>k=P&AxSK4x4B0@Ey42&F7r{gqUi#?%
z`}$F~(cFOJ4NH3BB^VV+{vq#CgZHT>Hd35J${}&TacO6CKP&#d<W~5S?Qd`%egOjj
zwO6cQ_{W)fgH+MSMEew(<{UY4<b;k+=*xlv*%ny{z4+bwd*}smj(GIYRoepApq`Oz
zW^yurljoDKzpTMmZCK2Brf5L|_!na=WdGtnSxhj3Fh;9@X%@U6R#@*2HZP2+iY4G2
zyvW$3As89uU?x~+C6|?I(Gc3_ZDf-`VFWs1MzZp&RD-<%#D{x6iO8IN1TXa|TviG0
z+(Y1humhg`mSGzsx=8pOSfp@8X>QcoyhLSKZ)#Y&FZ}*(wM61)yt7iQp(p?KjhiBm
zCyszghY=dRh0Z7#_C`jG<p?T*;ZI)<VT7`Xn`m{@YodEYzp7|VcQ$?d0xDn7n_(Pm
zP*4EoloFcUGS2v`y@lC1p_=rl@WxTmJ$Q46_ho**L|3s(Is>P~X-L3lE58&7hRo%+
z>dnvG_=T>#w6f_2yQM`U3Di0q0KVrwjJmP0u_<|Zn5&o?vMKi9269@Jr2h4^f&&v+
zcv1#q|3x5ZyRCXG%(z=6Yz;kEcfj~(6j7E&juq}D?N|j4GKwN;*#xV{nvz@E%Lt2A
z?*K%pL-_WS5^|b2b@sRnrRuV(2LDoD*hCJB5CXa?cejJNH9y<*)6}%yJ$Nb6AT+pR
za5o&ThyVSgJ$VI<3{p%gyu_SFPFuRGCUOjD!dZFw)9c<1i>~r28A|wJIg`K-VUxQ_
zwvPGCDdXeg0ixhjS66TD@1H(BL!&#yNe5c|mw!zKyZ+bBfc0}BWkU@APXjwYFYj)q
zzRcCmLqLLgra3y^S^;$Gr{4jXe~jR2Vs~ffUT$vgyVLaK%Yx+_r_7`#oV@N*M1nc1
z{#Nq;(<(t!403A2fkUhsRJ-}NFCSPNbjt_;_#cq8?G^yea6d4OHNdc8`(@qI{0eyf
z<JN&2HTEf)eBW594C_p3bl)(K{fj#MsjdHtk-$R^D=90XcbBjqTtahA4S!S$?11z1
zmO-!54@@GnXBF|K4=3#F#<OMN9!v*rI)ECF_zYOdqJ4U{eMKDTwk**j=XoO-I?9P0
zUO%*r9zo2)Ke9nu6y(?PhNh$FLF)uly1Tb`e?~?|wvqpDj~wjKWmW|2e);lcHL%?w
z;L9hG_vhjMs`LuP$4ELpQn{a>st#ke1!aI<{MX<v`h_QPp>OY|LN0ImdyucaVKr^%
zP6hQSCmLLK5cS7m;Y&W1O6cp0w}2O){wXyVJ+(Bt0?0YlQssL7R{hy-Cc6K+zWcUD
z0l(Y)M8_GRQ5pYlXYC(S_;g}!t_}wW$J1xePImY7#E)|W#Hys2$k7z>Z!gpf91*XT
zYzE92f1WW&?ha+-l&FUKezV!Z%9EC^1mwv1Lrk1(SN`M3y}(&tzMSmr>@4tyM=99X
zHjKKE+;CaYOamD2U#=*uTGJcVA@q3crrB>)tP5lld~h`)KZFXYO6e>Rw&48=dLi)|
zX(T3Q*DDn*ZmIFFR^i_--`^JmBF1PWPa*ODS#ARo%8#V-Jmgmue4iuBp_bUqO=L1|
zX_L&kt$TzPM*wkrHa?7OkqaD$neE!BEKWZ7qF5#!m*$wq53qTuz0<~lk~xTGX+8+I
zYVB~UKYC7l!JqnhXnpCk$$2ukt*HENg$|!5Po28|+^|UdeF!h_f0f^`1n2jW1@}{R
z1SttXmQlDszy=>75}^Z5BNNRTRy=|LGG-T{zMzi|Ts#fN%|T?Z?bS&N_KkW%PlT<u
zA^=9EFcm?Ok%wRN0?4^;cP9*T&?&Bi*67<wZ*U8_&^|tNg*S8vmvBVr`ylAKx>(Oy
z2mu3CQE(eeNl4>$B)zDn+Z)oXYN5kWWZs#Cn_usngxO>k3EFsv@W_1uEUUIVa$X1#
zZ4dtKIs2Oez=hE|ySl0=DcwGT!JLG&F!3^x?f5lFw0H<~8G>2y3E%51&46;wZ8c3J
zUKqTo58d9-5T*u*B_rV7Km%ifwDW#WvPK%eKleQ8eGDKAs(-IXIPn^BQd-a+=TEG|
z9eVIp1JY^s>9w!e&E?sr;<9aCaMs_zrTYg6foQ<v^qtte>})L=8JR78>}2@H2iv^K
zkWn{un&`L+?+#}AukwMFYJT7q5X05pp=C3J46mOey)E~2O*8;5!LVcGlodFGf7wm{
zMuPJ*SRG48z&@$pU>`qxEb2+p6ZA;xfx^uY+na_$#3us5pQ!swgVw!0-`rROYEq<N
z&boL7te*p5{m{VqIL^QpbR4nm0uWf6T33S{eV_q>f1E*w$O$g>wE#DMRRje7!$_0R
zjF-BW1lex8|FZ7F__0iE#uJZDi9Q5AwhvGOZ@xVH>63_d&MA1A=v_7T!63tK|74oC
z`#>h8iH@ou?{?4!n5(|NzPY=b=%{n$Co-+K4e*YS(tjE;Pt79X^c!sn7k$N9^paVX
zH9!YOu3^*=gqnh2#1U$fm3mN(@Of(|m{KqE@;*G+rSvpA`*6dlJ!`!P;<gLc{of;Q
z@UY)vjs<@guHDT#IOP8u>lkZknf8BS9shlo_aT^Pmyh2V94VTo+R8M#r^X!^08|0v
zWnp*{-QsDwPKd7uPmGN@6g+teG>XVEl|k}sGB&Q*P3(p#x&JRDH{5(-kii>c8o{<;
z-TYszH?T|4<O5dfBTw|;)HB8XiK%I#BN0Y~>dvqh)CLt$dDX^cG1-|d<}irWFTHW?
z(*7$}!?1DTJ#H8yPkz<O?QN_l?!fBq_PeWsy6$fQh342OoQ57p_>=VM-G2KPf)GgI
zZRx{*V3Vj(Vg<=E!5kR1_DrMLzd+3zRyZ|TdB9uJ1;jh2qs5e|nR&R%cke0L3JBIC
zyfa0Ab&99dJ!~8d<{Ht_4wa6Pe+hI^$?$x#8Lg>@n9*^6U|~0v)m-85cJFyu4P>6B
z3QnU9^&)+3oca@w0shNzUm%tiPr}An!SkDb;L%h0070p~1alui-KYS;#C2-Xzi`rE
z*PO5pLBNoGXmyo3t@p~1RcG)ynH|r!)9czkAeiJCzt9Lcr40oV$H4OS!zR(7MvgEX
z4;|^}&UECnKtQ52;o=i!R0MslcL`RBgim<8%8M9DO-AZx#-?+Te)14$(e#AD5Mv!x
z&|Ncw=Luepv)Y3`kVF&6Eqj)liEA(L3A(1X`~;^@Twqkg=rY=IJSTcvKupe68hTZv
zWMtGc8`%PPoB?h5?*H^uPntiZe>^f<K+J1jFg9p2UI^-XW}33sANw7aY&cd6^`!W?
z|8kNJbH^vKqgQ}NKo{IcJ)t$*zG6K;MovM`mHCtS%-DKC%>$PW>FTu)=m8F%;w<Hd
zb%t4v`NdHnOT!lp53{{`H7qzd*v$3TS#sRx!4?4G+QMdKx)>?j#Qam@^a*tu2iPJ%
z(}ckW8wl=rhgt;Mh@6di@{}JPfjQlm{@T<!V)j-5*L>T?L$}}~q>q|fJ`<D8JAM8h
zza|dOG!jKX9cEwnMcD752I^qxB4HK^4m|=Gzy=goqL9~RY={v4&#!|JMMT%8dud>`
z=G{`9w2lzY)e*s_&{1xKojdEJvJKrvU7AiLCX#YVYMct)$(^v4pRx=0>D@?oJOyJE
zS^W4d#R3`SF<rtg%~Tjz^u?`Yw?61E9}+`KBF~e?c@G@&@WzVZGye?ScykYGnZer9
z(cZ7uHg~o5P1IFNq(uIq5sw-d59r_*T?YPmGMa<Mt}_}!JpFXkAw`uC9;3>g#5Az0
zZ(deU<fzPV+^=Vmrm`Ug&|Wya5wt4T(CM_o+)^#}j-$u?SAa<b#MM<NWTi2bf_kiY
zWJM^C5YV-KdJN?Dwl^n)P`R%yfK>I3cVs01+jvW~@o$LZcCK!8JO^ti@jtEkK9;e`
z$uK`Z>MPdPj}{kQE!~g*azFFP-+x4;`L&tk_w9nX7ZTY3)&!j<AsHNMWMQ=tV*#@M
z%m6=yh$QMqYP-c>mBd+Z^Ha8bmT6p8OlSFdE8NLo*bixgFfv*b3j4tcA#wmt)Qyr4
z{yg~_iMomIbM=z7Krr)d#Ti;eRP%UDKFnK(Mp7b$X0IT7LWCE(ZsgVwL$fbejZDd|
zEe{PtmVQmo$6z8N8ahIAcllr$L1@?n)Tf{$j|2b5#ahaW0X;N~n<7Mwgn@YB0l2)i
zUp|D|+!+2GDZCBe00tP4H-?{c18JB8wcch^6;Kjn4vdKUtMeZqlrCbj{F*&yu>#5n
znjzo&x#!Apqs+q3_ex2Vcz|cDUF4*Z7?LLJnO~<NDzIMdqoCAF^Sk{X-N9B(tffao
z)L|vSWzQYLKR4C+aSCcQkeJG?2|_TJv^HcaOeHAsBFzw>H(rR@RErQ|s+xBgU9a!S
zs?{8^+qju23=C7*>yjVS|8ijSK(}VZW6m8zZH-et*tU$Xi5PTM^APYX!)T`HkqF$E
z%s{FsklZ=2K(&I&l<1PEf!(Yl_Q078?_7nWo$~?DavNsEfREb(h&xCJ15V3K^ym*h
z{(Y0$%BY)vB6OEvje}?-S|Z>^8Ej|wWyQYQ12d0Qxt&j*xi&0HM7JS|1HqrbV;+9Q
zZ2K$-USb<pL`O;Ur)vD0)&%@R4bcKJFP!Od?ns#2Un?~Pot!TW9f1$Y%3+%AJhVxk
zpYFS$63o@*88ldfLDpp_hW!}42hrUZ!sf`jTN&;WQh0-kKceD4{nUvkwJrmylx@Y{
zv>SY|j_~eKS=X0bsF;m`>8l2D>;c;HAXhPKvY7S33c#wJxOw;a>BN|7)8r-^zC>8;
zQYQ2O67|6LBZnrnvKZz$(ZNy(!O-J(_{B)vLcB0TuyqKCNLE*G@I`e$@<ar|A_I4+
zK5LwXH@`bZA2Pia$Rk%Ala{$`2ssojd-B03y5t}12P44=XOS1e9dNFUj)!Rj=px<U
ze&7+X(o_Sbuo<|G8a{yL9cIATQsJ!Ci;93PM;FY?yqK6cba(hp!pKi@9Oy>TQB8Hk
z@tPU{JK*WhxS_Rcq?5ET032hl8uwGNj2Xa}ZTQv5G72$N<A>C;(1{ms2g7ZucsT4t
zD6D^sN@zKsrX@VxMPkqy^(bGlgUA2&E}J=!qO5uGppW2tfMk=r3~>-!QB8~qyp|LW
zvICKW8|X^N%X#d#DQ&>+nHq}Dkp_><ej__9s?m;3-4L$b20_rXZ&mi7!gebnY%j8m
zqX!<ASKus+Gh0dZ4J$l3yn`&u%0Pg$PgJ+qBVU1qbO3*$%~vat5eD!OCZK#(7rkF6
z5dB9-{ZN!VWtD#(9-WqhGTMeP-a<xI2m)S%Yf!a0U<5sOZv_D^hCn>$VIkBj!15ue
zwH2XttAM6@y=F5j74ZywbM1W`4|=eEDE&S;6!gelg@HOJ!&f9fUvEjF)h(fYT)W%n
zPdfkt8eEbFs8z;d^tZqkE9qSlJtrUo$(lV9FxB>0h-*W_1dVQbz=vn(oVURJjlq9P
z%0}O%w-srG00IyZSVBO@Mfe;oqOc8TJ?wfi<78q?>%-(e8a@bymC}%RMh^yWtoqms
z9Xx1tBEBwO_8h=+LQqC;_JD|NBNKJB=<Ww+kv$IHPeK=TObz~BWLGfywWoRrdY;ig
zpZ)?SYRbL{6QqP~Wjl1@2rGI8xBq1obJk*{_K_Dx6n3vzN_e3|kg{!s)5$#$YYpGh
z5f5_)lad9Scp6DOM}xFB(Ruq&Q2qDazG5x?Kb%iK&4qn8(BMCu+zSPE;ecK56f+Be
z43>8Nneoj3l}v@hv<2?=zmp9ASTNy3u9N3?{)}3*2{va6BPxwGR0p=n7PiS|qP`7X
z=w68QyIAhGf=D`YCjNm2Go9du!`@!W*P=|-4A(TK{b7m;`Y|KgU(VGSL8=Osd|)@K
zz)tMA#w~}z5t275<-$tYz`tY$?L{^G0}c4aIas8OeDV>SO04RDS~^7S#<buL7b9DK
z;d_x3s!1eaysrH6k?7$bJBw4kzx@L=fj5KR@CcKeo$dWHDIBDHCuC6(7R1}<x8hon
zR0X>e<1Fa+Pr%C_Xl^At`0+mXafIDYI)v7;RyEfEY@Pt~X3#6<h+a|kCp$2I_}E}R
ztU&QkO{{%+blVSk&z0&Irf+f<$3r2<S3BRfCnB#xHH62pRZG<zG;75=X=WHoS{i!R
zn?k&FT_G71RQZVtnBz)S6wmGl1~8mfrUTsQc*YR1qvX#N4mi52R?HjnZG5T<@05XJ
zz%HPCJ8M}LxRW0y`k_1Rj0PknBVJ+qnsJ%?hYR*);ehfOdVL>ugxZ0TDR1bFP3M>A
zwyEo(4DuSrecZ<qz2L-7SUw*|k2E&_x2DCc@_i)Bq!Za#ay4uCITKGz`qFJ`#z)U|
z(kx&)(D>~B(Z$Zo`l}v`ll!39umMH^IVnO=xT@N}hL}4(6dS)S=JJz329NVzb`QW>
zBYIW#r8DYjC(^?}M17>t`XiV;Nz&o_w_!2U*Hk;@+?}mgPgeB;0^{Ow(+c88VW5W(
z;NI*)#j7sp)>QW!kgQK$6{)}Q@4bj8(CbIGxW|!@$<lJO>@Nv>0VM3y*C#F@VZpl1
zqhj($r>T7a&Mom%iWB*9e1nAmc?@klJ_n~0{@3~%XE0gzUifQIQcH5@nUI@~itf|!
z;B1N~(okMv45Get+aC11H}3%}7Q)iwV`C@IjgB$?9PfX40erb{=+q4Q*n9N*4PKtG
zsEgVK5!+B8=G}I%nw9Djc-=v)kvjZ7*iu;7uPctO=qRqk+V*jC8bFQ+j3Nt$U0NO|
z7YF~95>ngc<8vE6%)~^ej@~R3g;9?xLcM{Xe^TR?xC`=fbG#`3gYB?2FQKRdo!#DZ
zFzwzjf}wN`Dl#Gc4C0OaQNaLhr(Wh7-$dzxm=ck1G=NtYmLu@@LE6SfULYcpC1&T>
zJsu3nWpYQ4z{07)O^j&%1m*yr4>ktI-g2~fo)HzDVSgkKIzCI{QsR?<A$3|w9|qeE
zwu__S&?xF^Ie<iHYkT;_=;z*PGQ&I<4%N)F;=({Yq&i<5-;Tb6jB#lp+l9_50Eu)+
z7-+!py9Vd!{eimg=;#f9X6&*5+^kl_GL;Jy^E}R%0*~IueZCmAm>N_5GR>xPj&349
z6?rrP!pR15ryA~Tpu`{;;$f*s^h!IZ{8q1@PM28y^u*vS{%?j1FaIN!wQp)qpCfEV
z+X8EnCp<+L)_5=DW^^3da1CO<QKWjHtD?LK2=D~NE$SWsrOO9snsP5<2xbk-^Zr?F
z5hV5@>hZjrv@|khpEt-sVP;ZC#~c95i#cno!2=dGMqukZFfrsnA}+nZvmG@t{*nLp
z^G{ECE*wuZ2uapRU_F7LeQMN??9lrY_E$3FffD&`<P|$XfRAXaiW*G`YN~hKXy&9M
zBKkSgFSY>%v8X^uZQOAb#jx-qFwkNSJ4@M9iNwjqMrhXahcbszX7Hb?__-N;v@n;G
zJN9%3Y@pl(XN*4Q1si(Am=MW#@xl1+pPMWt0tV*)+XJsA%0S&TLTtc2A0>(o9hvkr
zXvbpl?JvP9sDYb%fGzMdx+l+Ae@Bg;rkaqA2IOp>xCP6xMdhvp*ddBA+gtl5$y475
zzsT$`D~74SW^4X|0lZ+4WeouFhOskkfV{}x`J>2r<YEUgdeg^>2bnDPD&-#rIvOO@
zMAjTh13+pOz=!nre@`|c_rhJ{7X?58#^wOFciZ@<XR+_0+P+@1MT@E#j(SY_;S^PL
zPM=^^-$-krt|*@;Ed4lVi@;(47IQaQB^Gokct^v&Wa}mi;G>^lcarhWF|@n{%MB;8
z{ZpcBr#YqG=FPr>V24P-tQ|DM$mT%8Z^44`<bFziC(TwcP7YAMnT|oe92R>YzJ!6T
zPJi|RI!42&A0bJwPn(i5&>^<{fnR6Gq>UKT+<|M#sIIICQ&fjLx{;c}A>+zRG#y|A
zu5%Ocz$iEEBp|(~PAACA&A0>l5DNY5kJzjyhsX=R@nft7-RHIZhPMWC(B~FsUc6!Y
z25FrZQ|ptK3L-#Ia=BgJGimC4nY&B-8l;xif`OY0b^_8wy&PCmMy5LCr=UPUe>L*J
z1;K>1`84FOpv3i{PNwiV>0rf=ocWmn1)Bu<@6bctc-DR8%?-O%2=AckU$zQ*r|$=R
zv7vh){oFw}AQwce5G13#;pkjlDG5kSPCTw)ta749dWbGgUy3hT_nh%aC<VkpzocT;
z_h2;u4zXY{aJhYX=m?pj7E=Sz%hL?72?UKeSFxg31o@A1ixt3c&|F|Y@SKF`|2U|C
zME(vf7)29)d=biBw3$8ZF!&0viZ7*e4Waa+g?#qw&_HY-)Ct^dUqezKvb;$X$lt{i
zz6T;H*EZ|Wm_YzCc2r=EavVM#lLl&;xkdZlLM=Zg?wGvwwbhx(H_Qr0Ve(Kh|4`?Q
z9L9bfswg50&eUG{-a7@09g4Iqw<$0->3LWZ@6xW{?;(D@V)4xC&R*x#6#!jsmy}i!
zQ@tvu0MQABvWQLB-9lWBIvYA7gjnkHW$?K;mr|z!HVvst4}0pRi3wE`)uJBA%BAmr
zVq}uABEx8ReZaF|44&K4QC7Zoy9Y=z_*eSt`sP=^Y&&Q8u;c=h^cNO4i0<VsPAr}D
ztQfIFdZiv8h%Jis;;k+lS?DlH=#a=r8xX1r=8ko_hZKq+?U)c)r{WB8DXZ5u(#Z88
zbPU;JQ{bi0I&;6M2igyWN}gTubcE#kBIV(v`=XD4k85<QV<vQkbhI$TAWsdNnT8;<
zSs2(%y1u)Rw2<Xp?@>>@M!K%LPWsTtBs0e62HXa@t`ilOD>EFNl$nS;65H|;(3hI+
zjaZi|?(6$Ycz(&^`>z1{I+e(=<1}U420ynGi7N-0DcijDS@3JM$9P%c=WXnQ$@Zhi
z5)nz9c}I%u*q{#WKYsiG&pbTxm;Z}O|G)iU`K?V5v4X1OfBc}-Gv0_^*MI$i{kzfr
zk1vrQM5F7*__sHq(&pEXT_6{fj%ktOJpcM)kWQFiAN}*icR+SC(kdVRccX=W`|Z{L
zV-d*X`2Umr+fx6vvp;kYAj9i>tw{|+Q1Zb{%uN*H=HU<=KL=s{gGd>N_Ya^g4M!T6
ze*>nd@2k*;P<H04XvyU6DqinW1}>>ag|lP0cHkGWF$XCO>GKlOlP}(U3E&TY@DZ9T
zBOEBl^u7S$91>?&L6R5%X?>2=VmZE|*jx?i9Zxed)RBU%_3sJm`@C0L5=s}#W?$1M
zP33q4wm!8m4ASds!&kVeZdFJ*lunvPICxmwO6@x1t#8yGD5$nA=o^W)F7sUMs+wp4
znY$nQ@;=~lAA7PkzV>5O!DH$sbmJ}cY;W{xi-!y<$L5nlAE1r&U`GkSuEWumk3SGD
zGj|sQV&_=1FYQJNFVuxW8BRC|P}w~bW0e=>vJQEp%J}EG#`b}|+D<yPVFP*^MI4a4
zc_`GFY)LY~Y%Lv6js3#U`#s>?2vUGH3{*+So4#>4SkLNPwXiG!C&}YDs37xYLQ?Jb
zN%lJ2M#xw6VL^XfYr@V4N%mKFC#)g>yCc*9*FyPWq(`;K8syO`g3v7}y{xeQ{bSi7
zi5YA)Hw3Qo;~ZYoBKg=!7;pxe5vnZG5YRpp#Q~Sq+T0~(s8IO=o43Gh^9pHP{odPn
z3@1Zewq7Oy(O{P5!&|GZ%dol!ZUn~v{nBYBfc>x_Jv|SdvA_-Ud6k}3PTUlA=IGbs
zxa6)9NRghYKhdV2&S5~@-wKp@mh565j~polB{dA1<#hkyzU-g6gmhWPM#oawdfD=g
zc3FGq8Toxs^fOm8ezhGk89^P+;+U%;Z@+A3dXepo*~-mj(NQw#b3i)sEi2hY1K!u<
zpl$h)I^>FGZ+Ew_$5dXVNqO()f#vs-2+-h#iN8zesAH7J3|_Uenyne1>^*q%6TNng
ziRY1v@<4q?a*AZbTc;q1D1;e)tfcV#<@o%R7B0dU<%6qG#|`8U1@4q+(Q$>D8PfOF
zf%R$b!{R5!1*_|B!cAEWIRF|{bAp$-Vx=w1y-NK?)b;(>p@xCv6y12l31Iv0%4=XC
zSkzTblZGr*Qm@zDJS}(mn1T1YGP_aOVcxc-2{{Nf?qoAwsZviQ@~*a~$q!FSeo8Mm
zv+4|6w`!jE(Ly5Kq`Y~vyYW=W<1-a&z^<`$rVYwm4vXI;*--aPv#*S9v`u@g)MI2V
z3x#22jusIwOzNoKdU4KkLEh9{$oOrrlv?fItdj=T&AD5IK*hve@2$I9FZ-V9@A4CG
zPhrF#jQV__1}G_gDfK+0cD${J4n@<86%lNBgn0?b93T~bKtXl35)wXsh*{Wn)In_-
zY1CwWrQ^0TsmdZqvm%f>%9S3{et}Hn{eI=a$d<-z%@<Y^5p#M<R&ohz*@_M?)}U@_
zl)L3(Uh8%V%{D$@&fkT%b0;On>QD2{h*e&DNjlpDn1y9s_sgB)_uFJg!EBf#=Ac1}
zUrLGy26kR~RYE`N25)5nLK7JsT}xl${@Mi6q@{kY0@A&=w0oF};JV$?$;qkTZ8G@c
z>~$dfu&@B`k4>H&d@(M%w%Mgw7W;5@Onm888#Zvao;<Foi`Wo(1Hdw4*8M{JWAfDU
zl)ZPXo&+%zX*6s<+EKok7uXQy3Bjte7VAO8!n3Fgda7ppK0iBsCjsH0EVVI$G}8X>
zb?z_M@-XM~aszL((-&J`aE_b0fe`iYbkfNR%wGllPJea?yy(9v$p@y4dUK%yKeI#>
zaY(|X^k^JgZ}`&rj&?&N$T8U^J`qLx=?k35@&aRge_na#B9z+Sut_0)J$10`<yd`G
zo$6<j%~<i%V&e%UdL{T{uKjf0_3VSL@q=rSlcZt?m8XNEGugDe&4EDDZ8dpPGxplh
z`{DRweQQGsYf5#Kou!<%!`s_e+<DmWHPA-SiP=(4QrvEsG~-<nrJs7qg5X}&T3tsB
zH*)RTlm1-(u;MeWo3oW}t1eZ(?8bWq3q}7BB7emwD5xB%UQ-b`+7Y9PNovsNdAHFf
z0!Pj?(A!;-zjqKyG#821Sc2qb;ruC(y#0*$x|NJ1$p$fTJ!K#+kh~^r2?<t2X=og8
z63|~rh>6vAe?_lRfAwQ&3z3n4&|2N?*CceF)kq|a9uhZ}WA_lxa+zqq+s}~RsFG-K
z!WVq>zpM)uKtwQsTg*}8hp5K410Um}UA8kxMyIv+xEf?yee#W^OL~&1Z?EIpX}WnJ
z4Y=ee2C5(Ozq`q0Tvt^`D(Q(0!QU?`3iA#gysbbQKhvCLqF^7{V20tXk3>9(eug%v
zCfju1^}Ey#{9{lpIbccGBE*Wz3s6NYk2FUmjkdYJxHhEH<e;~jwQC-{5|U1ANVnzp
z@0;sxJ5uV&eGFjb|8iI8@vMfaeEz|-1h3+y<&ptXCc>-*(aYfkd1T-^Wfu)M+Vmm(
zD*+AEXFa%=j)@zR*uMU{H{?m4@iAxqY3uP_;4AdAo?eMv`2mzXvmIlIuFDmUG*D`z
z%JV8~XlMxRSM2}s@y&&jmO4V6Oo(Ta%@gaLq+G}0nm}4((kM_cJUy_Tm~N!hR@XgW
zJHZ$|`MtjCD~Y7V6J<^-RxJ8_QS!Pth57sacLz;!8)aVRYjk3C^KQ;kn}7n(ORGM*
z(5kbRhj0;ldpj`ddFT*Mh6=iwpM$!_jAzzHb<ZzW@cbI>&uQ4Hy|W?zFOCbRjU#Mt
zi&gA;G4-wX;75XyuqGzgh~&<(e{FEH!fZkU)cCnco)P06UkaIlD`#BI&OnLTfjLEE
z2JS>><GGDJgF)$66A6mu4y!fi%0HV_J=fzns@pUKYOY%$LI2H>87>PW-Srz!8r#m&
zaN@gTwa25loEsG%hssq!ONSwo+PYVHCE|*zoOog-6#e#z%+0=GUR{9tAiIAVy8JhL
zua!$Epo7J3s`I9QTNf2pD_U~;boI!So2xwoiwZGWRfum`18vWa+Y6_}dYv{_u+Chl
z3)GrP{;~4awQoLoq;g8K;(Jik&WwIh(%z!SGnaW+qplC_>AOOy`Mo)^{EKJY02NNm
zvVp6jxV<H(7+Qo}xpeH*Om^ll;S}|lL8iNopi$O57+-TApG_r5JEiu5MbZC#erQoJ
zZo%$W3h#`<=K%iFko+F=O~6vgJM+VRXfK<Fd&l0TEMU1ifBMDbxz+Z9@;%V$B&rlx
z^d3)qes`Z_&UX6z_`(JHiK<1!eMTzBh%t$+PIlJT-|S;BfR;C<&?BMla5s3Wa!8N5
zy=xvYd3L4s7Zciv;9o9vo4(d90(0yg)u^$wcuCuZwO3hYJV$|#G{>rGw|!)Ewy<C?
zBhSb`<YsF&KsDUV@<u2SS-riHWe%SlT+^@rxeHWPDWBeLZAQM@t|OXlE^?SCxi1ft
zzJOIBY|xiEeudtu52>$#cs^Z8`rtW64Wv4==#wAkz5cv4d+*g)b`39R5<lP0lqb~u
zAg8SC|Fn0l;ZUw^+oq*b7NwSjWz*^<*@SE++chdtOGqu5WK%K<QJ83Cm(s>s>CLNz
zC@LC+kPKtf2H9tfu^n11dm@an8}nYbY`@mu@5gt1>)#y59QX6gGtYfr_jR4;d7XEg
zzEXcO-=hn9brzcmN;rz@Z=n4=s_3d`vu8ZiZYKVyk$WW*c!jKb#~BB)BxjsJ*x(<@
zGr=+_-6;AvM|dox8-VE?yB2JEZw~L|U)M8mm~!UG@YDUdGR%EPv^_^N<Jc*f@Jv_?
zwsOcdkeHB=9xZSrJAbmV;Pk^YcLmEx&od3Jj~$y4jVTV+&Z8-cN)vgNbgUaB>A2)=
zu|DybJuksI&jS*Yi<sBt0voEd6Frd70^avFVsJ7yFJI$fA2kzZ$ngkQIGuh3Pj#Lq
zSdyy$6mWC)fwsz<jP(cD1LKHX>DFiE-kBwuo*f<j_=e(){8%qRew&dfH%Y5Dr>Trb
zxjf&F)ATWCm#=LOCd}KJ%|B`{5&^bH)@?<3N6;1i(-TSF&jP@`G{1JdeRi;&Ja8j7
zH$ns(C2nv-pAo=eQ)GC_DOPWInl;%!7FLHyfYBl=5q!HeT)FSffl!Mj=Q5Ocd0<}%
z@!J7(J2=Tj)mxtNEDjb*iiNquRMeVl#QwHud<=}PMklWc>ubQ2JKBK?AqbL)Vd+k-
z5PRtllHvSlLmRIThB!<#zjd6)ieG`<2v}UW%7Lso4wEZLj9+)5i9gK;n)u&;ye4IP
z-cNoJr<@zT0}2bWu6EYfoGi#q$Ooq7UZ-yx?ENrVF~6=H`vrmM?v?XM!qSs|{Uj{|
z>HKz4WAaxa&o@u}&*&AK<-$Js&%d@JQC(PE{|9FafYJzlm~sL)1)8o>Hwq>jQ47Sx
zmdD><U2nIlfMQko$OAQ}?~9#v-^M@(=|+g!QAGv`?^mpISV;A|u}(?ONzRzdzbO<$
zJ9v^4KyyS%zo{i`&YU$TCV+#<jb+2XD1qOLj2>}pIBpbvM4xpLC`RRlQus7{fD2>F
z!sg(<)ihvkbC{clB-!CgwVE<O2IRnxlLW9!x^s6b?FMM&RJFi!V{`|LRmia+jt|_=
ziIV_wB913F^GGoNs$|qO5s0NAfk1^Z)od{of+(aBXN`CWa05x`>kgZ~qF|YF+suC?
z9CuF(0+l23dY$$`60pW0?|1)NV;TBBU5a~E9r_bW4m~kj`V%tN&4BUs9Wa2oNXJo#
z*&w&o9Cs->AB0ZO@Y>gR$Vfs4wZ8yzS^-tUkEKs`Z39J))x&y4ntje1%V06xebAkU
zxI)6I-Z|C}YsiDhdIgdZ$a#A7gCx9L4?*W(IGxKz+zUAB7Z*qI=tw-&eh}{x+~<(T
zMvWKXyo;SfeDVO^QE37os}t?yIUFd5M#ou$E%d_e72wRPAgDj_uoddjkt%n8s*XRv
zEKb=cAkyCD71XV@E#XXo7SG4K;&V@rkX|=(5Jvg@sJ?A`Q&aEN=yBgVw2h*wT9-33
z>M=c-<LeUZKl4e7)m&d&+Ywh-w|Xdl^wwA(ts>bUDn%k){=yCcdkt*EFcZNVch~QN
zH0u<2;L2MXO8Si*nuOo+DLbG>kF`nZmrE1pGS1SO)^woqUls!4Khm5)K&NehNsUc@
z236Dp%Iv#vj<SMb+B?8zuAAA{J>KuZVY9B0xr&|TWch)Cv4AAgwsAg+b;R)?K!_1>
z^_L%q#f)oRI>xAGHB*v!o|;NzGJ9@4c&~GQUpqK6QYz`|YMBeHM2*h)WKDp+3uo=N
zm=1bw$K`b|Zf%~5{ZJSXP?)bj@kxpksj6nRu7Rp~|Dk&FOql;zLjH^H7*A710TAVm
zwB)h(z!?n0>EtLCdq&x-XTp^y9E0bKr-BVBrQks5jDLM)?ROPsvASBL;1cZHc~=o<
zk!~_PGxaJrS8FucjxB$rsg<nT-L7fN+cqkuQtZrN@m)Yg+<p=J5Ypt)h<WTn!G8Dp
zT=0h)JPa-KpdRuH=|+r6+LNgd1d2gDK>tH}n2&MmC3UR;3Y09|1~6beCcF+8hpcUS
z1}yRrZqLN0M-5xR5{N4kchCPQ5%o(zL4d$$sE8J%p=^g^9lTN#fqGXjfqnCTpbu=S
zlhE*_TGkI4X6?M%d}_DRBwS=L?o)`k+ilM55T}|PqL(xnFxh#J;vj2s7tC<#BuNu4
z&`D(i`ePFzMPL;ad}6@}r5rSIY|UO*hKQ|C@w9_ND>&|-ylgSl3u4gKY59za0~q0-
zDxmcDSN)jEI?joLGa`?lf43QRm)NuuTX+!SiSfl)8PQ4XwjqC~P}K77EcY<8pJ~?k
z2abfa=wM7PY)t&wWO^yE2&MCMeM({s7z&@mdo?7qd4(iZUsvbgShKnyv>i7usoupd
zGt&Qah`KthaF%SxxHP8QAF2NVmqgST&hL`Q)Tz9~J^Q;U2-MYd2l204Dgn~$wcF$5
z+2y9~OM8jySGFOmJKW_hJKYq=nduhIJZ0b(aWghrs5$S}6j*vf#dsm|$?Bb53A47U
zXG13^3Ij}>Zuj9E?o~y0&|3<TEY}~(BjPkO4T@jwgy}3hZBzVO?x9HB?xJ79#%a|*
zGz80HndQzB8YjTA<MW2}_Av5Pc}o3fQK{!3Y0MVB-E4DHVHtcY6c>o2*9iW_|J_6U
z)5ig=m0V2mk!NOp<t-ctB2Ddyf|X*={3lz}M*)@vFs-yR^z1s39b*+|e0+KyUBk08
z9;Ji`gE5SG!<oxLp8>+o55y#6H^(&wj-ppooQVvIho{bueK2zJ+JCjRimyrRN*^9S
zM(oV4Q7)J`>Gq`(T%#S@p`S>J@dZRvlhf+hs<wiWh_s3BO1%<r@VIV=-D7dNe&Rtc
zP&ohE1Iei9r@abGZ;mX4nlxE&{6Cu<wywHl!Vu(j?I5WsYK&2DcR+!cX-G*EvAdL!
zWA~<{Rq55O#N0=RKcC>_3jCbMzh%q^`qk{rq*@_D`ulqFwxwL&5ZB{s(W3w0Y*b#a
z$uZv<pgqg<2J~`HBv!woRkd<eCLebF<EZB+1x`#CpS|q^&>#_~Z&qPxMb<5kKJG+1
zXxk!xDY>x-(P!CGg3o~@Z&N4|BX!yFE;^5K1^u<ICn|v~ndk<lX$5}Mc)|g?IE`@V
zV4ys{;c<>7X{{<dkJ?X;Ay+51A>dcF_*sr@@^e+O)&dBT<=zFO5~|$sksWG2a{XVn
zT>I(IR8bPWK6_Q@S#oSY97H|0sfI>2rJi(qE+bnT0mm;+PA9XGwGnFG9LmDq;Bgmd
z?9lKMuu>bXZy^BUT9E^%$aI`=*Q4@&zpJPv$G(_H^&@G$=+3?1%y+!DRPnI1?77W0
z*0k15CY3w+Y}*h&(%jsfUkdwM+R=S+ky7{evI#R@R8=ilG?daAP^V;XP$^8`=*Q`H
zoANstmVsv9H~TusV|oPs4aHT>MGEzyDQ`Knb`rNdn5`{@PM@Jj#&B=pY=2>XDLA6%
z{p4Ax%J>8gu|AmeIu)~E0|7AtRoukJ<Gl={>fiYiU1@0R1{Q!{RVs+(b}BI_X|k*!
zGJ78-`d0}+=;ATCYMDDan_beFq%&N}gUvu3yuB1-r|H%zhYC8nK1C~>5x74G3Fp+`
z){CEL1KuGX6F>$khT;G&GRS@`KfO`s#}Q=#IVK;DNR?}I%kBI@`Fca`QK|l?n6SG3
z@qs3)FQc)sr|r^#ic74#_6Xo}4YHJ+NsrPPCgqAw(#e*oBRnpY^Yw5NUMG#+OfTrV
zC@7|GJXT^!e4|V$h^<Homq!Nrk+>HKPfN>ZyFkZPyg_P12Ifg!bXYHw*^C-~bhz1)
zBCTsIz9tVuAO}-R4<*(tWF=Vs?`$u2ZWdKNc?F~P!iwdk`8Xu%f-j&C?^<==>nlw$
zMG7abBKCati!vaJ9#tCji5gF-NCM6g@{5yhS$}13hx6h_5k^yK)cj6SN4jh4VD7cd
zSeK&dH&U}IE_PaGl~pNkTXjhl@?8#-C*81}5!CTsLg-oi8D3QkcQ`trRbd297bt@s
z=bM{Z`1~kGkyDUmjF+Dln$8IGOF4Z}?;Pw77556pp4@TV1#5;TvZmTp#yGV=*=a8-
zm%WB=@*bC1qVK?-$F=39QUb<wCr6`Z$^ox~AA9=SRZX=wJCjoyvb~e&rTDJXU65v#
zk(}{X_=pTTm1$Y!T9+bjd><DhO2LII3D1x^p`d>7fpCB@4aZqnrpKfvU@~@QT4pv|
zd=^&o%=$}X>=^m`RK_NSSNFr(ru(a!#{vDdonDu^)dk=GgSs{xuAm0uSb<nD&t&MP
z9VGjcZiPR;r+wn}mEZ5@Mync}EvbB3C0_&Tv!n-s1?>lKAIn?kfuz~e+&q=$8XWEP
zrGWhSS1i4^UPHUxcSz4&uLkOD%Bj`Nt=-zuUI1S(8x#lI^1R=V5NlfpZNX`IR#e44
zFz1JUNb$_m8SR`|Z}+YN&~bF9wvoXd>c;GL%Yu&mKQ*55FW6w?eE~x3$=5a?2xh*_
zzTaLrl5JCWIHZF|BJm~kS-n?CbBSjl3{3kO5dV&h^K;q7*HRqxab$I|tgpsubFUys
zds8DU3ktysWiMKt|2f=e9=KzUivV5E9aQjxS=F4`%sfuHY3*^q2TtDAE$jaPouicK
zo1GH&3x49#9RQD$c!ROR^LZK?lw`!Mqo$EFgjd4d*CsfBc8i_AJ{ONEr12o+SLaza
zAEtznOS&VHiAd11ae})#xWGNesKGyW5$j$iI9PU{MSLIziIe=VpO*gHLT}BhpFx;a
zZd|*9I9ePmbPm*(w8<^;K?^HyC9u$O6P{3EtAWnn-5I1#7eVUOeeF8mN+e_9T7gSy
z>!n9t_Ng1>1P`Qs7MoLKEtIAUgSnO~E&lHI|2+6VZ+yjU{r@@Di#RzBrRrQ<T<c%Y
z<dyIlNhr3h=H?OMHeilikyy1xNE#c+#SIM^iysEQTSa3V@@@R)3nA%q8-$K1n>8Bz
vj}d5SkuBD|&F&P$|7Dk&zvlnK&7_ZTJzKx6I8_QBjBCH4>E7qN?Jxfq;c;}s

literal 0
HcmV?d00001

diff --git a/docs/source/dev/kernel/paged_attention.rst b/docs/source/dev/kernel/paged_attention.rst
new file mode 100644
index 0000000000000..6fcadeeec27b6
--- /dev/null
+++ b/docs/source/dev/kernel/paged_attention.rst
@@ -0,0 +1,525 @@
+vLLM Paged Attention
+====================
+
+-  Currently, vLLM utilizes its own implementation of a multi-head query
+   attention kernel (``csrc/attention/attention_kernels.cu``). 
+   This kernel is designed to be compatible with
+   vLLM's paged KV caches, where the key and value cache are stored in
+   separate blocks (note that this block concept differs from the GPU
+   thread block. So in a later document, I will refer to vLLM paged
+   attention block as "block", while refer to GPU thread block as
+   "thread block").
+-  To achieve high performance, this kernel relies on a specially
+   designed memory layout and access method, specifically when threads
+   read data from global memory to shared memory. The purpose of this
+   document is to provide a high-level explanation of the kernel
+   implementation step by step, aiding those who wish to learn about the
+   vLLM multi-head query attention kernel. After going through this 
+   document, users will likely have a better understanding and feel easier
+   to follow the actual implementation.
+-  Please note that this document may not cover all details, such as how
+   to calculate the correct index for the corresponding data or the dot
+   multiplication implementation. However, after reading this document
+   and becoming familiar with the high-level logic flow, it should be
+   easier for you to read the actual code and understand the details.
+
+Inputs
+------
+
+-  The kernel function takes a list of arguments for the current thread
+   to perform its assigned work. The three most important arguments are
+   the input pointers ``q``, ``k_cache``, and ``v_cache``, which point
+   to query, key, and value data on global memory that need to be read
+   and processed. The output pointer ``out`` points to global memory
+   where the result should be written. These four pointers actually
+   refer to multi-dimensional arrays, but each thread only accesses the
+   portion of data assigned to it. I have omitted all other runtime
+   parameters here for simplicity.
+
+   .. code:: cpp
+
+      template<
+      typename scalar_t,
+      int HEAD_SIZE,
+      int BLOCK_SIZE,
+      int NUM_THREADS,
+      int PARTITION_SIZE = 0>
+      __device__ void paged_attention_kernel(
+      ... // Other side args.
+      const scalar_t* __restrict__ out,       // [num_seqs, num_heads, max_num_partitions, head_size]
+      const scalar_t* __restrict__ q,         // [num_seqs, num_heads, head_size]
+      const scalar_t* __restrict__ k_cache,   // [num_blocks, num_kv_heads, head_size/x, block_size, x]
+      const scalar_t* __restrict__ v_cache,   // [num_blocks, num_kv_heads, head_size, block_size]
+      ... // Other side args.
+      )
+
+-  There are also a list of template arguments above the function
+   signature that are determined during compilation time. ``scalar_t``
+   represents the data type of the query, key, and value data elements,
+   such as FP16. ``HEAD_SIZE`` indicates the number of elements in each
+   head. ``BLOCK_SIZE`` refers to the number of tokens in each block.
+   ``NUM_THREADS`` denotes the number of threads in each thread block.
+   ``PARTITION_SIZE`` represents the number of tensor parallel GPUs (For
+   simplicity, we assume this is 0 and tensor parallel is disabled).
+-  With these arguments, we need to perform a sequence of preparations.
+   This includes calculating the current head index, block index, and
+   other necessary variables. However, for now, we can ignore these
+   preparations and proceed directly to the actual calculations. It will
+   be easier to understand them once we grasp the entire flow.
+
+Concepts
+--------
+
+-  Just before we dive into the calculation flow, I want to describe a
+   few concepts that are needed for later sections. However, you may
+   skip this section and return later if you encounter any confusing
+   terminologies.
+-  **Sequence**: A sequence represents a client request. For example,
+   the data pointed to by ``q`` has a shape of
+   ``[num_seqs, num_heads, head_size]``. That represents there are total
+   ``num_seqs`` of query sequence data are pointed by ``q``. Since this 
+   kernel is a single query attention kernel, each sequence only has one
+   query token. Hence, the ``num_seqs`` equals the total number of tokens 
+   that are processed in the batch.
+-  **Context**: The context consists of the generated tokens from the
+   sequence. For instance, ``["What", "is", "your"]`` are the context
+   tokens, and the input query token is ``"name"``. The model might
+   generate the token ``"?"``.
+-  **Vec**: The vec is a list of elements that are fetched and
+   calculated together. For query and key data, the vec size
+   (``VEC_SIZE``) is determined so that each thread group can fetch and
+   calculate 16 bytes of data at a time. For value data, the vec size
+   (``V_VEC_SIZE``) is determined so that each thread can fetch and
+   calculate 16 bytes of data at a time. For example, if the
+   ``scalar_t`` is FP16 (2 bytes) and ``THREAD_GROUP_SIZE`` is 2, the 
+   ``VEC_SIZE`` will be 4, while the ``V_VEC_SIZE`` will be 8.
+-  **Thread group**: The thread group is a small group of
+   threads(\ ``THREAD_GROUP_SIZE``) that fetches and calculates one
+   query token and one key token at a time. Each thread handles only a
+   portion of the token data. The total number of elements processed by
+   one thread group is referred as ``x``. For example, if the thread
+   group contains 2 threads and the head size is 8, then thread 0
+   handles the query and key elements at index 0, 2, 4, 6, while thread
+   1 handles the elements at index 1, 3, 5, 7.
+-  **Block**: The key and value cache data in vLLM are split into
+   blocks. Each block stores data for a fixed number(\ ``BLOCK_SIZE``)
+   of tokens at one head. Each block may contain only a portion of the
+   whole context tokens. For example, if the block size is 16 and the
+   head size is 128, then for one head, one block can store 16 \* 128 =
+   2048 elements.
+-  **Warp**: A warp is a group of 32 threads(\ ``WARP_SIZE``) that
+   execute simultaneously on a stream multiprocessor (SM). In this
+   kernel, each warp processes the calculation between one query token
+   and key tokens of one entire block at a time (it may process multiple
+   blocks in multiple iterations). For example, if there are 4 warps and
+   6 blocks for one context, the assignment would be like warp 0 handles
+   the 0th, 4th blocks, warp 1 handles the 1st, 5th blocks, warp 2
+   handles the 2nd block and warp 3 handles the 3rd block.
+-  **Thread block**: A thread block is a group of
+   threads(\ ``NUM_THREADS``) that can access the same shared memory.
+   Each thread block contains multiple warps(\ ``NUM_WARPS``), and in
+   this kernel, each thread block processes the calculation between one
+   query token and key tokens of a whole context.
+-  **Grid**: A grid is a collection of thread blocks and defines the
+   shape of the collection. In this kernel, the shape is
+   ``(num_heads, num_seqs, max_num_partitions)``. Therefore, each thread
+   block only handles the calculation for one head, one sequence, and
+   one partition.
+
+Query
+-----
+
+-  This section will introduce how query data is stored in memory and
+   fetched by each thread. As mentioned above, each thread group fetches
+   one query token data, while each thread itself only handles a part of
+   one query token data. Within each warp, every thread group will fetch
+   the same query token data, but will multiply it with different key
+   token data.
+
+   .. code:: cpp
+
+      const scalar_t* q_ptr = q + seq_idx * q_stride + head_idx * HEAD_SIZE;
+
+   .. figure:: ../../assets/kernel/query.png
+      :alt: query
+      :width: 70%
+      :align: center
+
+      Query data of one token at one head
+
+-  Each thread defines its own ``q_ptr`` which points to the assigned
+   query token data on global memory. For example, if ``VEC_SIZE`` is 4
+   and ``HEAD_SIZE`` is 128, the ``q_ptr`` points to data that contains
+   total of 128 elements divided into 128 / 4 = 32 vecs.
+
+   .. figure:: ../../assets/kernel/q_vecs.png
+      :alt: q_vecs
+      :width: 70%
+      :align: center
+
+      ``q_vecs`` for one thread group
+
+   .. code:: cpp
+
+      __shared__ Q_vec q_vecs[THREAD_GROUP_SIZE][NUM_VECS_PER_THREAD];
+
+-  Next, we need to read the global memory data pointed to by ``q_ptr``
+   into shared memory as ``q_vecs``. It is important to note that each
+   vecs is assigned to a different row. For example, if the
+   ``THREAD_GROUP_SIZE`` is 2, thread 0 will handle the 0th row vecs,
+   while thread 1 handles the 1st row vecs. By reading the query data in
+   this way, neighboring threads like thread 0 and thread 1 can read
+   neighbor memory, achieving the memory coalescing to improve
+   performance.
+
+Key
+---
+
+-  Similar to the "Query" section, this section introduces memory layout
+   and assignment for keys. While each thread group only handle one
+   query token one kernel run, it may handle multiple key tokens across
+   multiple iterations. Meanwhile, each warp will process multiple blocks
+   of key tokens in multiple iterations, ensuring that all context
+   tokens are processed by the entire thread group after the kernel run.
+   In this context, "handle" refers to performing the dot multiplication
+   between query data and key data.
+
+   .. code:: cpp
+
+      const scalar_t* k_ptr = k_cache + physical_block_number * kv_block_stride
+                          + kv_head_idx * kv_head_stride
+                          + physical_block_offset * x;
+
+-  Unlike to ``q_ptr``, ``k_ptr`` in each thread will point to different
+   key token at different iterations. As shown above, that ``k_ptr``
+   points to key token data based on ``k_cache`` at assigned block,
+   assigned head and assigned token.
+
+   .. figure:: ../../assets/kernel/key.png
+      :alt: key
+      :width: 70%
+      :align: center
+
+      Key data of all context tokens at one head
+
+-  The diagram above illustrates the memory layout for key data. It
+   assumes that the ``BLOCK_SIZE`` is 16, ``HEAD_SIZE`` is 128, ``x`` is
+   8, ``THREAD_GROUP_SIZE`` is 2, and there are a total of 4 warps. Each
+   rectangle represents all the elements for one key token at one head,
+   which will be processed by one thread group. The left half shows the
+   total 16 blocks of key token data for warp 0, while the right half
+   represents the remaining key token data for other warps or
+   iterations. Inside each rectangle, there are a total 32 vecs (128
+   elements for one token) that will be processed by 2 threads (one
+   thread group) separately.
+
+   .. figure:: ../../assets/kernel/k_vecs.png
+      :alt: k_vecs
+      :width: 70%
+      :align: center
+
+      ``k_vecs`` for one thread
+
+   .. code:: cpp
+
+      K_vec k_vecs[NUM_VECS_PER_THREAD]
+
+-  Next, we need to read the key token data from ``k_ptr`` and store
+   them on register memory as ``k_vecs``. We use register memory for
+   ``k_vecs`` because it will only be accessed by one thread once,
+   whereas ``q_vecs`` will be accessed by multiple threads multiple
+   times. Each ``k_vecs`` will contain multiple vectors for later
+   calculation. Each vec will be set at each inner iteration. The
+   assignment of vecs allows neighboring threads in a warp to read
+   neighboring memory together, which again promotes the memory
+   coalescing. For instance, thread 0 will read vec 0, while thread 1
+   will read vec 1. In the next inner loop, thread 0 will read vec 2,
+   while thread 1 will read vec 3, and so on.
+-  You may still be a little confused about the overall flow. Don't
+   worry, please keep reading the next "QK" section. It will illustrate
+   the query and key calculation flow in a clearer and higher-level
+   manner.
+
+QK
+---
+
+-  As shown the pseudo code below, before the entire for loop block, we
+   fetch the query data for one token and store it in ``q_vecs``. Then,
+   in the outer for loop, we iterate through different ``k_ptrs`` that
+   point to different tokens and prepare the ``k_vecs`` in the inner for
+   loop. Finally, we perform the dot multiplication between the
+   ``q_vecs`` and each ``k_vecs``.
+
+   .. code:: cpp
+
+      q_vecs = ...
+      for ... {
+         k_ptr = ...
+         for ... {
+            k_vecs[i] = ...
+         }
+         ...
+         float qk = scale * Qk_dot<scalar_t, THREAD_GROUP_SIZE>::dot(q_vecs[thread_group_offset], k_vecs);
+      }
+
+-  As mentioned before, for each thread, it only fetches part of the
+   query and key token data at a time. However, there will be a cross
+   thread group reduction happen in the ``Qk_dot<>::dot`` . So ``qk``
+   returned here is not just between part of the query and key token dot
+   multiplication, but actually a full result between entire query and
+   key token data.
+-  For example, if the value of ``HEAD_SIZE`` is 128 and
+   ``THREAD_GROUP_SIZE`` is 2, each thread's ``k_vecs`` will contain
+   total 64 elements. However, the returned ``qk`` is actually the
+   result of dot multiplication between 128 query elements and 128 key
+   elements. If you want to learn more about the details of the dot
+   multiplication and reduction, you may refer to the implementation of
+   ``Qk_dot<>::dot``. However, for the sake of simplicity, I will not
+   cover it in this document.
+
+Softmax
+-------
+
+-  Next, we need to calculate the normalized softmax for all ``qk``\ s,
+   as shown above, where each :math:`x` represents a ``qk``. To do this,
+   we must obtain the reduced value of ``qk_max``\ (:math:`m(x)`) and
+   the ``exp_sum``\ (:math:`\ell(x)`) of all ``qk``\ s. The reduction
+   should be performed across the entire thread block, encompassing
+   results between the query token and all context key tokens.
+
+   .. math::
+      :nowrap:
+
+      \begin{gather*}
+      m(x):=\max _i \quad x_i \\ \quad f(x):=\left[\begin{array}{lll}e^{x_1-m(x)} & \ldots & e^{x_B-m(x)}\end{array}\right]\\ \quad \ell(x):=\sum_i f(x)_i \\
+      \quad \operatorname{softmax}(x):=\frac{f(x)}{\ell(x)}
+      \end{gather*}
+
+``qk_max`` and ``logits``
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+-  Just right after we get the ``qk`` result, we can set the temporary
+   ``logits`` result with ``qk`` (In the end, the ``logits`` should
+   store the normalized softmax result). Also we can compare and collect
+   the ``qk_max`` for all ``qk``\ s that are calculated by current
+   thread group.
+
+   .. code:: cpp
+
+      if (thread_group_offset == 0) {
+         const bool mask = token_idx >= context_len;
+         logits[token_idx - start_token_idx] = mask ? 0.f : qk;
+         qk_max = mask ? qk_max : fmaxf(qk_max, qk);
+      }
+
+-  Please note that the ``logits`` here is on shared memory, so each
+   thread group will set the fields for its own assigned context tokens.
+   Overall, the size of logits should be number of context tokens.
+
+   .. code:: cpp
+
+      for (int mask = WARP_SIZE / 2; mask >= THREAD_GROUP_SIZE; mask /= 2) {
+          qk_max = fmaxf(qk_max, VLLM_SHFL_XOR_SYNC(qk_max, mask));
+      }
+
+      if (lane == 0) {
+         red_smem[warp_idx] = qk_max;
+      }
+
+-  Then we need to get the reduced ``qk_max`` across each warp. The main
+   idea is to make threads in warp to communicate with each other and
+   get the final max ``qk`` .
+
+   .. code:: cpp
+
+      for (int mask = NUM_WARPS / 2; mask >= 1; mask /= 2) {
+          qk_max = fmaxf(qk_max, VLLM_SHFL_XOR_SYNC(qk_max, mask));
+      }
+      qk_max = VLLM_SHFL_SYNC(qk_max, 0);
+
+-  Finally, we can get the reduced ``qk_max`` from whole thread block by
+   compare the ``qk_max`` from all warps in this thread block. Then we
+   need to broadcast the final result to each thread.
+
+``exp_sum``
+~~~~~~~~~~~
+
+-  Similar to ``qk_max``, we need to get the reduced sum value from the
+   entire thread block too.
+
+   .. code:: cpp
+
+      for (int i = thread_idx; i < num_tokens; i += NUM_THREADS) {
+          float val = __expf(logits[i] - qk_max);
+          logits[i] = val;
+          exp_sum += val;
+      }
+      ...
+      exp_sum = block_sum<NUM_WARPS>(&red_smem[NUM_WARPS], exp_sum);
+
+-  Firstly, sum all exp values from each thread group, and meanwhile,
+   convert each entry of ``logits`` from ``qk`` to ``exp(qk - qk_max)``.
+   Please note, the ``qk_max`` here is already the max ``qk`` across the
+   whole thread block. And then we can do reduction for ``exp_sum``
+   across whole thread block just like the ``qk_max``.
+
+   .. code:: cpp
+
+      const float inv_sum = __fdividef(1.f, exp_sum + 1e-6f);
+      for (int i = thread_idx; i < num_tokens; i += NUM_THREADS) {
+         logits[i] *= inv_sum;
+      }
+
+-  Finally, with the reduced ``qk_max`` and ``exp_sum``, we can obtain
+   the final normalized softmax result as ``logits``. This ``logits``
+   variable will be used for dot multiplication with the value data in
+   later steps. Now, it should store the normalized softmax result of
+   ``qk`` for all assigned context tokens.
+
+Value
+-----
+
+.. figure:: ../../assets/kernel/value.png
+   :alt: value
+   :width: 70%
+   :align: center
+
+   Value data of all context tokens at one head
+
+.. figure:: ../../assets/kernel/logits_vec.png
+   :alt: logits_vec
+   :width: 50%
+   :align: center
+
+   ``logits_vec`` for one thread
+
+.. figure:: ../../assets/kernel/v_vec.png
+   :alt: v_vec
+   :width: 70%
+   :align: center
+
+   List of ``v_vec`` for one thread
+
+-  Now we need to retrieve the value data and perform dot multiplication
+   with ``logits``. Unlike query and key, there is no thread group
+   concept for value data. As shown in diagram, different from key token
+   memory layout, elements from the same column correspond to the same
+   value token. For one block of value data, there are ``HEAD_SIZE`` of
+   rows and ``BLOCK_SIZE`` of columns that are split into multiple
+   ``v_vecs``.
+-  Each thread always fetches ``V_VEC_SIZE`` elements from the same
+   ``V_VEC_SIZE`` of tokens at a time. As a result, a single thread
+   retrieves multiple ``v_vec``\ s from different rows and the same
+   columns through multiple inner iterations. For each ``v_vec``, it
+   needs to be dot multiplied with the corresponding ``logits_vec``,
+   which is also ``V_VEC_SIZE`` elements from ``logits``. Overall, with
+   multiple inner iterations, each warp will process one block of value
+   tokens. And with multiple outer iterations, the whole context value
+   tokens are processd
+
+   .. code:: cpp
+
+      float accs[NUM_ROWS_PER_THREAD];
+      for ... { // Iteration over different blocks.
+          logits_vec = ...
+          for ... { // Iteration over different rows.
+              v_vec = ...
+              ...
+              accs[i] += dot(logits_vec, v_vec);
+          }
+      }
+
+-  As shown in the above pseudo code, in the outer loop, similar to
+   ``k_ptr``, ``logits_vec`` iterates over different blocks and reads
+   ``V_VEC_SIZE`` elements from ``logits``. In the inner loop, each
+   thread reads ``V_VEC_SIZE`` elements from the same tokens as a
+   ``v_vec`` and performs dot multiplication. It is important to note
+   that in each inner iteration, the thread fetches different head
+   position elements for the same tokens. The dot result is then
+   accumulated in ``accs``. Therefore, each entry of ``accs`` is mapped
+   to a head position assigned to the current thread.
+-  For example, if ``BLOCK_SIZE`` is 16 and ``V_VEC_SIZE`` is 8, each
+   thread fetches 8 value elements for 8 tokens at a time. Each element
+   is from different tokens at the same head position. If ``HEAD_SIZE``
+   is 128 and ``WARP_SIZE`` is 32, for each inner loop, a warp needs to
+   fetch ``WARP_SIZE * V_VEC_SIZE = 256`` elements. This means there are
+   a total of 128 \* 16 / 256 = 8 inner iterations for a warp to handle
+   a whole block of value tokens. And each ``accs`` in each thread
+   contains 8 elements that accumulated at 8 different head positions.
+   For the thread 0, the ``accs`` variable will have 8 elements, which
+   are 0th, 16th … 112th elements of a value head that are accumulated
+   from all assigned 8 tokens.
+
+LV
+---
+-  Now, we need to perform reduction for ``accs`` within each warp. This
+   process allows each thread to accumulate the ``accs`` for the
+   assigned head positions of all tokens in one block.
+
+   .. code:: cpp
+
+      for (int i = 0; i < NUM_ROWS_PER_THREAD; i++) {
+         float acc = accs[i];
+         for (int mask = NUM_V_VECS_PER_ROW / 2; mask >= 1; mask /= 2) {
+            acc += VLLM_SHFL_XOR_SYNC(acc, mask);
+         }
+         accs[i] = acc;
+      }
+
+-  Next, we perform reduction for ``accs`` across all warps, allowing
+   each thread to have the accumulation of ``accs`` for the assigned
+   head positions of all context tokens. Please note that each ``accs``
+   in every thread only stores the accumulation for a portion of
+   elements of the entire head for all context tokens. However, overall,
+   all results for output have been calculated but are just stored in
+   different thread register memory.
+
+   .. code:: cpp
+
+      float* out_smem = reinterpret_cast<float*>(shared_mem);
+      for (int i = NUM_WARPS; i > 1; i /= 2) {
+          // Upper warps write to shared memory.
+          ...
+              float* dst = &out_smem[(warp_idx - mid) * HEAD_SIZE];
+              for (int i = 0; i < NUM_ROWS_PER_THREAD; i++) {
+                      ...
+              dst[row_idx] = accs[i];
+          }
+
+          // Lower warps update the output.
+              const float* src = &out_smem[warp_idx * HEAD_SIZE];
+          for (int i = 0; i < NUM_ROWS_PER_THREAD; i++) {
+                      ...
+              accs[i] += src[row_idx];
+          }
+
+              // Write out the accs.
+      }
+
+Output
+------
+
+-  Now we can write all of calculated result from local register memory
+   to final output global memory.
+
+   .. code:: cpp
+
+      scalar_t* out_ptr = out + seq_idx * num_heads * max_num_partitions * HEAD_SIZE
+                      + head_idx * max_num_partitions * HEAD_SIZE
+                      + partition_idx * HEAD_SIZE;
+
+-  First, we need to define the ``out_ptr`` variable, which points to
+   the start address of the assigned sequence and assigned head.
+
+   .. code:: cpp
+
+      for (int i = 0; i < NUM_ROWS_PER_THREAD; i++) {
+      const int row_idx = lane / NUM_V_VECS_PER_ROW + i * NUM_ROWS_PER_ITER;
+      if (row_idx < HEAD_SIZE && lane % NUM_V_VECS_PER_ROW == 0) {
+          from_float(*(out_ptr + row_idx), accs[i]);
+      }
+      }
+
+-  Finally, we need to iterate over different assigned head positions
+   and write out the corresponding accumulated result based on the
+   ``out_ptr``.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index e90481845c4ff..c0250bf99f7ae 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -98,6 +98,7 @@ Documentation
    :caption: Developer Documentation
 
    dev/engine/engine_index
+   dev/kernel/paged_attention
 
 Indices and tables
 ==================

From 9cbc7e5f3be72552d6041f81738921a9597643e8 Mon Sep 17 00:00:00 2001
From: "Allen.Dou" <allen.dou@hotmail.com>
Date: Tue, 5 Mar 2024 02:37:58 +0800
Subject: [PATCH 051/196] enable --gpu-memory-utilization in
 benchmark_throughput.py (#3175)

Co-authored-by: zixiao <shunli.dsl@alibaba-inc.com>
---
 benchmarks/benchmark_throughput.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/benchmarks/benchmark_throughput.py b/benchmarks/benchmark_throughput.py
index 1f0bfe06a67cb..72bdc4b3b4540 100644
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -74,6 +74,7 @@ def run_vllm(
     kv_cache_dtype: str,
     device: str,
     enable_prefix_caching: bool,
+    gpu_memory_utilization: float = 0.9,
 ) -> float:
     from vllm import LLM, SamplingParams
     llm = LLM(model=model,
@@ -84,6 +85,7 @@ def run_vllm(
               trust_remote_code=trust_remote_code,
               dtype=dtype,
               max_model_len=max_model_len,
+              gpu_memory_utilization=gpu_memory_utilization,
               enforce_eager=enforce_eager,
               kv_cache_dtype=kv_cache_dtype,
               device=device,
@@ -206,13 +208,12 @@ def main(args: argparse.Namespace):
                                    args.output_len)
 
     if args.backend == "vllm":
-        elapsed_time = run_vllm(requests, args.model, args.tokenizer,
-                                args.quantization, args.tensor_parallel_size,
-                                args.seed, args.n, args.use_beam_search,
-                                args.trust_remote_code, args.dtype,
-                                args.max_model_len, args.enforce_eager,
-                                args.kv_cache_dtype, args.device,
-                                args.enable_prefix_caching)
+        elapsed_time = run_vllm(
+            requests, args.model, args.tokenizer, args.quantization,
+            args.tensor_parallel_size, args.seed, args.n, args.use_beam_search,
+            args.trust_remote_code, args.dtype, args.max_model_len,
+            args.enforce_eager, args.kv_cache_dtype, args.device,
+            args.enable_prefix_caching, args.gpu_memory_utilization)
     elif args.backend == "hf":
         assert args.tensor_parallel_size == 1
         elapsed_time = run_hf(requests, args.model, tokenizer, args.n,
@@ -287,6 +288,12 @@ def main(args: argparse.Namespace):
         'The "auto" option will use FP16 precision '
         'for FP32 and FP16 models, and BF16 precision '
         'for BF16 models.')
+    parser.add_argument('--gpu-memory-utilization',
+                        type=float,
+                        default=0.9,
+                        help='the fraction of GPU memory to be used for '
+                        'the model executor, which can range from 0 to 1.'
+                        'If unspecified, will use the default value of 0.9.')
     parser.add_argument("--enforce-eager",
                         action="store_true",
                         help="enforce eager execution")

From 76e8a70476ef9daa970349c14c117fe91e8b4544 Mon Sep 17 00:00:00 2001
From: ttbachyinsda <ttbachyinsda@outlook.com>
Date: Tue, 5 Mar 2024 03:17:12 +0800
Subject: [PATCH 052/196] [Minor fix] The domain dns.google may cause a
 socket.gaierror exception (#3176)

Co-authored-by: guofangze <guofangze@kuaishou.com>
---
 vllm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index a4f9bfe6aac99..9cdf623379516 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -173,7 +173,7 @@ def get_ip() -> str:
     # try ipv4
     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
     try:
-        s.connect(("dns.google", 80))  # Doesn't need to be reachable
+        s.connect(("8.8.8.8", 80))  # Doesn't need to be reachable
         return s.getsockname()[0]
     except OSError:
         # try ipv6

From 22de45235c6dd14e901e089971635ec655d5fbe0 Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Mon, 4 Mar 2024 11:54:06 -0800
Subject: [PATCH 053/196] Push logprob generation to LLMEngine (#3065)

Co-authored-by: Avnish Narayan <avnish@anyscale.com>
---
 tests/entrypoints/test_openai_server.py       |  61 ++-
 tests/samplers/test_logprobs.py               |  42 +-
 tests/worker/spec_decode/utils.py             |  12 +-
 vllm/config.py                                |   2 +
 vllm/engine/arg_utils.py                      |  10 +-
 vllm/engine/async_llm_engine.py               |  29 +-
 vllm/engine/llm_engine.py                     |  42 +-
 vllm/entrypoints/openai/serving_chat.py       | 236 ++++++-----
 vllm/entrypoints/openai/serving_completion.py | 391 +++++++++---------
 vllm/entrypoints/openai/serving_engine.py     |  23 +-
 vllm/model_executor/layers/sampler.py         |  15 +-
 vllm/sequence.py                              |  25 +-
 vllm/worker/spec_decode/multi_step_worker.py  |   2 +-
 13 files changed, 555 insertions(+), 335 deletions(-)

diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py
index e426cf7eed72b..f4a6e44d88a87 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -213,14 +213,14 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI,
                                                            messages=messages,
                                                            max_tokens=10,
                                                            logprobs=True,
-                                                           top_logprobs=10)
+                                                           top_logprobs=5)
     assert chat_completion.id is not None
     assert chat_completion.choices is not None and len(
         chat_completion.choices) == 1
     assert chat_completion.choices[0].message is not None
     assert chat_completion.choices[0].logprobs is not None
     assert chat_completion.choices[0].logprobs.top_logprobs is not None
-    assert len(chat_completion.choices[0].logprobs.top_logprobs[0]) == 10
+    assert len(chat_completion.choices[0].logprobs.top_logprobs[0]) == 5
     message = chat_completion.choices[0].message
     assert message.content is not None and len(message.content) >= 10
     assert message.role == "assistant"
@@ -229,7 +229,7 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI,
     # test multi-turn dialogue
     messages.append({"role": "user", "content": "express your result in json"})
     chat_completion = await client.chat.completions.create(
-        model=MODEL_NAME,
+        model=model_name,
         messages=messages,
         max_tokens=10,
     )
@@ -237,6 +237,61 @@ async def test_single_chat_session(server, client: openai.AsyncOpenAI,
     assert message.content is not None and len(message.content) >= 0
 
 
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_too_many_logprobs(server, client: openai.AsyncOpenAI,
+                                 model_name: str):
+    messages = [{
+        "role": "system",
+        "content": "you are a helpful assistant"
+    }, {
+        "role": "user",
+        "content": "what is 1+1?"
+    }]
+
+    # Default max_logprobs is 5, so this should raise an error
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        stream = await client.chat.completions.create(model=model_name,
+                                                      messages=messages,
+                                                      max_tokens=10,
+                                                      logprobs=True,
+                                                      top_logprobs=10,
+                                                      stream=True)
+        async for chunk in stream:
+            ...
+
+    with pytest.raises(openai.BadRequestError):
+        await client.chat.completions.create(model=model_name,
+                                             messages=messages,
+                                             max_tokens=10,
+                                             logprobs=True,
+                                             top_logprobs=10,
+                                             stream=False)
+
+    with pytest.raises((openai.BadRequestError, openai.APIError)):
+        stream = await client.completions.create(model=model_name,
+                                                 prompt="Test",
+                                                 max_tokens=10,
+                                                 logprobs=10,
+                                                 stream=True)
+        async for chunk in stream:
+            ...
+
+    with pytest.raises(openai.BadRequestError):
+        await client.completions.create(model=model_name,
+                                        prompt="Test",
+                                        max_tokens=10,
+                                        logprobs=10,
+                                        stream=False)
+
+    # the server should still work afterwards
+    chat_completion = await client.chat.completions.create(model=model_name,
+                                                           messages=messages,
+                                                           max_tokens=10,
+                                                           stream=False)
+    message = chat_completion.choices[0].message
+    assert message.content is not None and len(message.content) >= 0
+
+
 @pytest.mark.parametrize(
     # just test 1 lora hereafter
     "model_name",
diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py
index 0ea3704462fcb..1abb55f021214 100644
--- a/tests/samplers/test_logprobs.py
+++ b/tests/samplers/test_logprobs.py
@@ -1,5 +1,6 @@
 import pytest
 import torch
+from tests.conftest import VllmRunner
 
 from vllm import SamplingParams
 
@@ -16,6 +17,7 @@ def test_get_prompt_logprobs(
     example_prompts,
 ):
     max_tokens = 5
+    num_top_logprobs = 6
     hf_model = hf_runner(model, dtype=dtype)
     hf_logprobs = hf_model.generate_greedy_logprobs(
         example_prompts,
@@ -23,19 +25,32 @@ def test_get_prompt_logprobs(
     )
     del hf_model
 
-    vllm_model = vllm_runner(model, dtype=dtype)
+    vllm_model = vllm_runner(model, dtype=dtype, max_logprobs=num_top_logprobs)
     vllm_sampling_params = SamplingParams(max_tokens=max_tokens,
-                                          logprobs=5,
+                                          logprobs=num_top_logprobs,
                                           prompt_logprobs=5,
                                           temperature=0.0)
     vllm_results = vllm_model.model.generate(
         example_prompts, sampling_params=vllm_sampling_params)
-    del vllm_model
 
     # Test whether logprobs are included in the results.
     for result in vllm_results:
         assert result.prompt_logprobs is not None
         assert result.outputs[0].logprobs is not None
+        assert len(result.outputs[0].logprobs) == max_tokens
+        for logprobs in result.outputs[0].logprobs:
+            assert len(logprobs) == num_top_logprobs
+        output_text = result.outputs[0].text
+        output_string_from_most_likely_tokens = []
+        for top_logprobs in result.outputs[0].logprobs:
+            top_logprob = next(iter(top_logprobs.values()))
+            output_string_from_most_likely_tokens.append(
+                top_logprob.decoded_token)
+        output_string_from_most_likely_tokens = "".join(
+            output_string_from_most_likely_tokens)
+        assert output_text == output_string_from_most_likely_tokens, (
+            "The output text from the top logprob for each token position "
+            "should be the same as the output text in the result.")
 
     # Test whether prompt logprobs are consistent with HF
     for vllm_result, hf_logprob in zip(vllm_results, hf_logprobs):
@@ -43,14 +58,29 @@ def test_get_prompt_logprobs(
         vllm_prompt_logprobs = vllm_result.prompt_logprobs[1:]
         for i, vllm_prompt_logprob_dict in enumerate(vllm_prompt_logprobs):
             for token_id, logprob in vllm_prompt_logprob_dict.items():
-                torch.testing.assert_close(logprob,
+                torch.testing.assert_close(logprob.logprob,
                                            hf_logprob[0][i][token_id].item(),
                                            atol=1e-2,
                                            rtol=1e-2)
         vllm_sample_logprobs = vllm_result.outputs[0].logprobs
-        for i, vllm_sample_logprob_dict in enumerate(vllm_sample_logprobs):
-            for token_id, logprob in vllm_sample_logprob_dict.items():
+        for i, top_logprobs in enumerate(vllm_sample_logprobs):
+            for token_id, sample_logprob in top_logprobs.items():
+                logprob = sample_logprob.logprob
                 torch.testing.assert_close(logprob,
                                            hf_logprob[i][-1][token_id].item(),
                                            atol=1e-2,
                                            rtol=1e-2)
+                assert isinstance(sample_logprob.decoded_token, str), \
+                    ("The token should be decoded by the time it is returned "
+                    " to the user.")
+
+
+def test_max_logprobs():
+    runner = VllmRunner("facebook/opt-125m", max_logprobs=1)
+    vllm_sampling_params = SamplingParams(logprobs=1)
+    # should pass
+    runner.generate(["Hello world"], sampling_params=vllm_sampling_params)
+
+    bad_sampling_params = SamplingParams(logprobs=2)
+    with pytest.raises(ValueError):
+        runner.generate(["Hello world"], sampling_params=bad_sampling_params)
diff --git a/tests/worker/spec_decode/utils.py b/tests/worker/spec_decode/utils.py
index 8d74509fea488..fa8767cf898aa 100644
--- a/tests/worker/spec_decode/utils.py
+++ b/tests/worker/spec_decode/utils.py
@@ -4,7 +4,7 @@
 from vllm.worker.worker import Worker
 from vllm.utils import get_distributed_init_method, get_ip, get_open_port
 from vllm.engine.arg_utils import EngineArgs
-from vllm.sequence import SequenceGroupMetadata, SequenceData
+from vllm.sequence import Logprob, SequenceGroupMetadata, SequenceData
 from vllm.sampling_params import SamplingParams
 from vllm.worker.cache_engine import CacheEngine
 from vllm.model_executor.utils import set_random_seed
@@ -166,13 +166,15 @@ def create_seq_group_metadata_from_prompts(
 
 
 def assert_logprobs_dict_allclose(
-        actual_logprobs: List[Dict[int, float]],
-        expected_logprobs: List[Dict[int, float]]) -> None:
+        actual_logprobs: List[Dict[int, Logprob]],
+        expected_logprobs: List[Dict[int, Logprob]]) -> None:
     for single_step_actual_logprobs, single_step_expected_logprobs in zip(
             actual_logprobs, expected_logprobs):
         assert set(single_step_actual_logprobs.keys()) == set(
             single_step_expected_logprobs.keys())
         for token_id in single_step_actual_logprobs:
-            actual = torch.tensor(single_step_actual_logprobs[token_id])
-            expected = torch.tensor(single_step_expected_logprobs[token_id])
+            actual = torch.tensor(
+                single_step_actual_logprobs[token_id].logprob)
+            expected = torch.tensor(
+                single_step_expected_logprobs[token_id].logprob)
             assert torch.allclose(actual, expected)
diff --git a/vllm/config.py b/vllm/config.py
index e39fd7265689f..ef9a920f29c2a 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -79,6 +79,7 @@ def __init__(
         quantization: Optional[str] = None,
         enforce_eager: bool = False,
         max_context_len_to_capture: Optional[int] = None,
+        max_logprobs: int = 5,
     ) -> None:
         self.model = model
         self.tokenizer = tokenizer
@@ -93,6 +94,7 @@ def __init__(
         self.quantization = quantization
         self.enforce_eager = enforce_eager
         self.max_context_len_to_capture = max_context_len_to_capture
+        self.max_logprobs = max_logprobs
 
         if os.environ.get("VLLM_USE_MODELSCOPE", "False").lower() == "true":
             # download model from ModelScope hub,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 6882e8be34d11..c3dccdd5bb50b 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -31,6 +31,7 @@ class EngineArgs:
     max_num_batched_tokens: Optional[int] = None
     max_num_seqs: int = 256
     max_paddings: int = 256
+    max_logprobs: int = 5  # OpenAI default value
     disable_log_stats: bool = False
     revision: Optional[str] = None
     code_revision: Optional[str] = None
@@ -212,6 +213,12 @@ def add_cli_args(
                             type=int,
                             default=EngineArgs.max_paddings,
                             help='maximum number of paddings in a batch')
+        parser.add_argument(
+            '--max-logprobs',
+            type=int,
+            default=EngineArgs.max_logprobs,
+            help=('max number of log probs to return logprobs is specified in'
+                  ' SamplingParams'))
         parser.add_argument('--disable-log-stats',
                             action='store_true',
                             help='disable logging statistics')
@@ -300,7 +307,8 @@ def create_engine_configs(
             self.trust_remote_code, self.download_dir, self.load_format,
             self.dtype, self.seed, self.revision, self.code_revision,
             self.tokenizer_revision, self.max_model_len, self.quantization,
-            self.enforce_eager, self.max_context_len_to_capture)
+            self.enforce_eager, self.max_context_len_to_capture,
+            self.max_logprobs)
         cache_config = CacheConfig(self.block_size,
                                    self.gpu_memory_utilization,
                                    self.swap_space, self.kv_cache_dtype,
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 9e52d20ca4980..df66139fddcd1 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -47,7 +47,7 @@ def __init__(self, request_id: str) -> None:
         self._queue = asyncio.Queue()
         self._finished = False
 
-    def put(self, item: RequestOutput) -> None:
+    def put(self, item: Union[RequestOutput, Exception]) -> None:
         if self._finished:
             return
         self._queue.put_nowait(item)
@@ -110,6 +110,17 @@ def process_request_output(self,
                 logger.info(f"Finished request {request_id}.")
             self.abort_request(request_id)
 
+    def process_exception(self,
+                          request_id: str,
+                          exception: Exception,
+                          *,
+                          verbose: bool = False) -> None:
+        """Propagate an exception from the engine."""
+        self._request_streams[request_id].put(exception)
+        if verbose:
+            logger.info(f"Finished request {request_id}.")
+        self.abort_request(request_id)
+
     def add_request(self, request_id: str,
                     **engine_add_request_kwargs) -> AsyncStream:
         """Add a request to be sent to the engine on the next background
@@ -377,10 +388,18 @@ async def engine_step(self) -> bool:
         for new_request in new_requests:
             # Add the request into the vLLM engine's waiting queue.
             # TODO: Maybe add add_request_batch to reduce Ray overhead
-            if self.engine_use_ray:
-                await self.engine.add_request.remote(**new_request)
-            else:
-                await self.engine.add_request_async(**new_request)
+            try:
+                if self.engine_use_ray:
+                    await self.engine.add_request.remote(**new_request)
+                else:
+                    await self.engine.add_request_async(**new_request)
+            except ValueError as e:
+                # TODO: use a vLLM specific error for failed validation
+                self._request_tracker.process_exception(
+                    new_request["request_id"],
+                    e,
+                    verbose=self.log_requests,
+                )
 
         if finished_requests:
             await self._engine_abort(finished_requests)
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 8a2573034c940..703756996b7f7 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -18,7 +18,7 @@
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import SamplingParams
-from vllm.sequence import (SamplerOutput, Sequence, SequenceGroup,
+from vllm.sequence import (Logprob, SamplerOutput, Sequence, SequenceGroup,
                            SequenceGroupOutput, SequenceOutput, SequenceStatus)
 from vllm.transformers_utils.tokenizer import (detokenize_incrementally,
                                                TokenizerGroup)
@@ -473,6 +473,13 @@ def add_request(
         if lora_request is not None and not self.lora_config:
             raise ValueError(f"Got lora_request {lora_request} but LoRA is "
                              "not enabled!")
+        max_logprobs = self.get_model_config().max_logprobs
+        if (sampling_params.logprobs
+                and sampling_params.logprobs > max_logprobs) or (
+                    sampling_params.prompt_logprobs
+                    and sampling_params.prompt_logprobs > max_logprobs):
+            raise ValueError(f"Cannot request more than "
+                             f"{max_logprobs} logprobs.")
         if arrival_time is None:
             arrival_time = time.monotonic()
         prompt_token_ids = self.encode_request(
@@ -583,6 +590,13 @@ def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
         # Process prompt logprobs
         prompt_logprobs = outputs.prompt_logprobs
         if prompt_logprobs is not None:
+            # We can pick any sequence for the prompt.
+            seq = next(iter(seq_group.seqs_dict.values()))
+            all_token_ids = seq.get_token_ids()
+            for i, prompt_logprobs_for_token in enumerate(prompt_logprobs):
+                self._decode_logprobs(seq, seq_group.sampling_params,
+                                      prompt_logprobs_for_token,
+                                      all_token_ids[:i])
             seq_group.prompt_logprobs = prompt_logprobs
 
         # Process samples
@@ -930,12 +944,36 @@ def _get_stats(self,
             time_e2e_requests=time_e2e_requests,
         )
 
+    def _decode_logprobs(self, seq: Sequence, prms: SamplingParams,
+                         logprobs: Dict[int, Logprob],
+                         all_input_ids: List[int]) -> None:
+        if not logprobs:
+            return
+        for token_id, sample_logprob in logprobs.items():
+            if (sample_logprob.decoded_token is None and token_id != -1):
+                all_input_ids_with_logprob = all_input_ids[:-1] + [token_id]
+                _, new_text, prefix_offset, read_offset = detokenize_incrementally(
+                    self.get_tokenizer_for_seq(seq),
+                    all_input_ids=all_input_ids_with_logprob,
+                    prev_tokens=seq.tokens,
+                    prefix_offset=seq.prefix_offset,
+                    read_offset=seq.read_offset,
+                    skip_special_tokens=prms.skip_special_tokens,
+                    spaces_between_special_tokens=prms.
+                    spaces_between_special_tokens,
+                )
+                sample_logprob.decoded_token = new_text
+
     def _decode_sequence(self, seq: Sequence, prms: SamplingParams) -> None:
         """Decodes the new token for a sequence."""
+        all_input_ids = seq.get_token_ids()
+        self._decode_logprobs(seq, prms, seq.output_logprobs[-1],
+                              all_input_ids)
+
         (new_tokens, new_output_text, prefix_offset,
          read_offset) = detokenize_incrementally(
              self.get_tokenizer_for_seq(seq),
-             all_input_ids=seq.get_token_ids(),
+             all_input_ids=all_input_ids,
              prev_tokens=seq.tokens,
              prefix_offset=seq.prefix_offset,
              read_offset=seq.read_offset,
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index f4ad0aa5a0184..ba352f18f6454 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -82,8 +82,12 @@ async def create_chat_completion(
             return self.chat_completion_stream_generator(
                 request, result_generator, request_id)
         else:
-            return await self.chat_completion_full_generator(
-                request, raw_request, result_generator, request_id)
+            try:
+                return await self.chat_completion_full_generator(
+                    request, raw_request, result_generator, request_id)
+            except ValueError as e:
+                # TODO: Use a vllm-specific Validation Error
+                return self.create_error_response(str(e))
 
     def get_chat_request_role(self, request: ChatCompletionRequest) -> str:
         if request.add_generation_prompt:
@@ -99,117 +103,133 @@ async def chat_completion_stream_generator(
         model_name = request.model
         created_time = int(time.monotonic())
         chunk_object_type = "chat.completion.chunk"
-
-        # Send first response for each request.n (index) with the role
-        role = self.get_chat_request_role(request)
-        for i in range(request.n):
-            choice_data = ChatCompletionResponseStreamChoice(
-                index=i,
-                delta=DeltaMessage(role=role),
-                logprobs=None,
-                finish_reason=None)
-            chunk = ChatCompletionStreamResponse(id=request_id,
-                                                 object=chunk_object_type,
-                                                 created=created_time,
-                                                 choices=[choice_data],
-                                                 model=model_name)
-            data = chunk.model_dump_json(exclude_unset=True)
-            yield f"data: {data}\n\n"
-
-        # Send response to echo the input portion of the last message
-        if request.echo:
-            last_msg_content = ""
-            if request.messages and isinstance(
-                    request.messages, list) and request.messages[-1].get(
-                        "content") and request.messages[-1].get(
-                            "role") == role:
-                last_msg_content = request.messages[-1]["content"]
-
-            if last_msg_content:
-                for i in range(request.n):
-                    choice_data = ChatCompletionResponseStreamChoice(
-                        index=i,
-                        delta=DeltaMessage(content=last_msg_content),
-                        finish_reason=None)
-                    chunk = ChatCompletionStreamResponse(
-                        id=request_id,
-                        object=chunk_object_type,
-                        created=created_time,
-                        choices=[choice_data],
-                        logprobs=None,
-                        model=model_name)
-                    data = chunk.model_dump_json(exclude_unset=True)
-                    yield f"data: {data}\n\n"
+        first_iteration = True
 
         # Send response for each token for each request.n (index)
         previous_texts = [""] * request.n
         previous_num_tokens = [0] * request.n
         finish_reason_sent = [False] * request.n
-        async for res in result_generator:
-            res: RequestOutput
-            for output in res.outputs:
-                i = output.index
-
-                if finish_reason_sent[i]:
-                    continue
-
-                delta_token_ids = output.token_ids[previous_num_tokens[i]:]
-                top_logprobs = output.logprobs[
-                    previous_num_tokens[i]:] if output.logprobs else None
-
-                if request.logprobs:
-                    logprobs = self._create_logprobs(
-                        token_ids=delta_token_ids,
-                        top_logprobs=top_logprobs,
-                        num_output_top_logprobs=request.logprobs,
-                        initial_text_offset=len(previous_texts[i]),
-                    )
-                else:
-                    logprobs = None
-
-                delta_text = output.text[len(previous_texts[i]):]
-                previous_texts[i] = output.text
-                previous_num_tokens[i] = len(output.token_ids)
-                if output.finish_reason is None:
-                    # Send token-by-token response for each request.n
-                    choice_data = ChatCompletionResponseStreamChoice(
-                        index=i,
-                        delta=DeltaMessage(content=delta_text),
-                        logprobs=logprobs,
-                        finish_reason=None)
-                    chunk = ChatCompletionStreamResponse(
-                        id=request_id,
-                        object=chunk_object_type,
-                        created=created_time,
-                        choices=[choice_data],
-                        model=model_name)
-                    data = chunk.model_dump_json(exclude_unset=True)
-                    yield f"data: {data}\n\n"
-                else:
-                    # Send the finish response for each request.n only once
-                    prompt_tokens = len(res.prompt_token_ids)
-                    final_usage = UsageInfo(
-                        prompt_tokens=prompt_tokens,
-                        completion_tokens=previous_num_tokens[i],
-                        total_tokens=prompt_tokens + previous_num_tokens[i],
-                    )
-                    choice_data = ChatCompletionResponseStreamChoice(
-                        index=i,
-                        delta=DeltaMessage(content=delta_text),
-                        logprobs=logprobs,
-                        finish_reason=output.finish_reason)
-                    chunk = ChatCompletionStreamResponse(
-                        id=request_id,
-                        object=chunk_object_type,
-                        created=created_time,
-                        choices=[choice_data],
-                        model=model_name)
-                    if final_usage is not None:
-                        chunk.usage = final_usage
-                    data = chunk.model_dump_json(exclude_unset=True,
-                                                 exclude_none=True)
-                    yield f"data: {data}\n\n"
-                    finish_reason_sent[i] = True
+        try:
+            async for res in result_generator:
+                res: RequestOutput
+                # We need to do it here, because if there are exceptions in
+                # the result_generator, it needs to be sent as the FIRST
+                # response (by the try...catch).
+                if first_iteration:
+                    # Send first response for each request.n (index) with the role
+                    role = self.get_chat_request_role(request)
+                    for i in range(request.n):
+                        choice_data = ChatCompletionResponseStreamChoice(
+                            index=i,
+                            delta=DeltaMessage(role=role),
+                            logprobs=None,
+                            finish_reason=None)
+                        chunk = ChatCompletionStreamResponse(
+                            id=request_id,
+                            object=chunk_object_type,
+                            created=created_time,
+                            choices=[choice_data],
+                            model=model_name)
+                        data = chunk.model_dump_json(exclude_unset=True)
+                        yield f"data: {data}\n\n"
+
+                    # Send response to echo the input portion of the last message
+                    if request.echo:
+                        last_msg_content = ""
+                        if request.messages and isinstance(
+                                request.messages,
+                                list) and request.messages[-1].get(
+                                    "content") and request.messages[-1].get(
+                                        "role") == role:
+                            last_msg_content = request.messages[-1]["content"]
+
+                        if last_msg_content:
+                            for i in range(request.n):
+                                choice_data = ChatCompletionResponseStreamChoice(
+                                    index=i,
+                                    delta=DeltaMessage(
+                                        content=last_msg_content),
+                                    finish_reason=None)
+                                chunk = ChatCompletionStreamResponse(
+                                    id=request_id,
+                                    object=chunk_object_type,
+                                    created=created_time,
+                                    choices=[choice_data],
+                                    logprobs=None,
+                                    model=model_name)
+                                data = chunk.model_dump_json(
+                                    exclude_unset=True)
+                                yield f"data: {data}\n\n"
+                    first_iteration = False
+
+                for output in res.outputs:
+                    i = output.index
+
+                    if finish_reason_sent[i]:
+                        continue
+
+                    delta_token_ids = output.token_ids[previous_num_tokens[i]:]
+                    top_logprobs = output.logprobs[
+                        previous_num_tokens[i]:] if output.logprobs else None
+
+                    if request.logprobs:
+                        logprobs = self._create_logprobs(
+                            token_ids=delta_token_ids,
+                            top_logprobs=top_logprobs,
+                            num_output_top_logprobs=request.logprobs,
+                            initial_text_offset=len(previous_texts[i]),
+                        )
+                    else:
+                        logprobs = None
+
+                    delta_text = output.text[len(previous_texts[i]):]
+                    previous_texts[i] = output.text
+                    previous_num_tokens[i] = len(output.token_ids)
+                    if output.finish_reason is None:
+                        # Send token-by-token response for each request.n
+                        choice_data = ChatCompletionResponseStreamChoice(
+                            index=i,
+                            delta=DeltaMessage(content=delta_text),
+                            logprobs=logprobs,
+                            finish_reason=None)
+                        chunk = ChatCompletionStreamResponse(
+                            id=request_id,
+                            object=chunk_object_type,
+                            created=created_time,
+                            choices=[choice_data],
+                            model=model_name)
+                        data = chunk.model_dump_json(exclude_unset=True)
+                        yield f"data: {data}\n\n"
+                    else:
+                        # Send the finish response for each request.n only once
+                        prompt_tokens = len(res.prompt_token_ids)
+                        final_usage = UsageInfo(
+                            prompt_tokens=prompt_tokens,
+                            completion_tokens=previous_num_tokens[i],
+                            total_tokens=prompt_tokens +
+                            previous_num_tokens[i],
+                        )
+                        choice_data = ChatCompletionResponseStreamChoice(
+                            index=i,
+                            delta=DeltaMessage(content=delta_text),
+                            logprobs=logprobs,
+                            finish_reason=output.finish_reason)
+                        chunk = ChatCompletionStreamResponse(
+                            id=request_id,
+                            object=chunk_object_type,
+                            created=created_time,
+                            choices=[choice_data],
+                            model=model_name)
+                        if final_usage is not None:
+                            chunk.usage = final_usage
+                        data = chunk.model_dump_json(exclude_unset=True,
+                                                     exclude_none=True)
+                        yield f"data: {data}\n\n"
+                        finish_reason_sent[i] = True
+        except ValueError as e:
+            # TODO: Use a vllm-specific Validation Error
+            data = self.create_streaming_error_response(str(e))
+            yield f"data: {data}\n\n"
         # Send the final done message after all response.n are finished
         yield "data: [DONE]\n\n"
 
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index 99a10196b5f73..a8244fd150753 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -26,107 +26,6 @@
     [TypeTokenIDs, TypeTopLogProbs, Optional[int], int], LogProbs]
 
 
-async def completion_stream_generator(
-    request: CompletionRequest,
-    raw_request: Request,
-    on_abort,
-    result_generator: AsyncIterator[Tuple[int, RequestOutput]],
-    create_logprobs_fn: TypeCreateLogProbsFn,
-    request_id: str,
-    created_time: int,
-    model_name: str,
-    num_prompts: int,
-) -> AsyncGenerator[str, None]:
-    previous_texts = [""] * request.n * num_prompts
-    previous_num_tokens = [0] * request.n * num_prompts
-    has_echoed = [False] * request.n * num_prompts
-
-    async for prompt_idx, res in result_generator:
-
-        # Abort the request if the client disconnects.
-        if await raw_request.is_disconnected():
-            await on_abort(f"{request_id}-{prompt_idx}")
-            raise StopAsyncIteration()
-
-        for output in res.outputs:
-            i = output.index + prompt_idx * request.n
-            # TODO(simon): optimize the performance by avoiding full text O(n^2) sending.
-
-            if request.echo and request.max_tokens == 0:
-                # only return the prompt
-                delta_text = res.prompt
-                delta_token_ids = res.prompt_token_ids
-                top_logprobs = res.prompt_logprobs
-                has_echoed[i] = True
-            elif request.echo and request.max_tokens > 0 and not has_echoed[i]:
-                # echo the prompt and first token
-                delta_text = res.prompt + output.text
-                delta_token_ids = res.prompt_token_ids + output.token_ids
-                top_logprobs = res.prompt_logprobs + (output.logprobs or [])
-                has_echoed[i] = True
-            else:
-                # return just the delta
-                delta_text = output.text[len(previous_texts[i]):]
-                delta_token_ids = output.token_ids[previous_num_tokens[i]:]
-                top_logprobs = output.logprobs[
-                    previous_num_tokens[i]:] if output.logprobs else None
-
-            if request.logprobs is not None:
-                assert top_logprobs is not None, "top_logprobs must be provided when logprobs is requested"
-                logprobs = create_logprobs_fn(
-                    token_ids=delta_token_ids,
-                    top_logprobs=top_logprobs,
-                    num_output_top_logprobs=request.logprobs,
-                    initial_text_offset=len(previous_texts[i]),
-                )
-            else:
-                logprobs = None
-
-            previous_texts[i] = output.text
-            previous_num_tokens[i] = len(output.token_ids)
-            finish_reason = output.finish_reason
-            response_json = CompletionStreamResponse(
-                id=request_id,
-                created=created_time,
-                model=model_name,
-                choices=[
-                    CompletionResponseStreamChoice(
-                        index=i,
-                        text=delta_text,
-                        logprobs=logprobs,
-                        finish_reason=finish_reason,
-                    )
-                ]).model_dump_json()
-            yield f"data: {response_json}\n\n"
-
-            if output.finish_reason is not None:  # return final usage
-                logprobs = LogProbs() if request.logprobs is not None else None
-                prompt_tokens = len(res.prompt_token_ids)
-                completion_tokens = len(output.token_ids)
-                final_usage = UsageInfo(
-                    prompt_tokens=prompt_tokens,
-                    completion_tokens=completion_tokens,
-                    total_tokens=prompt_tokens + completion_tokens,
-                )
-                response_json = CompletionStreamResponse(
-                    id=request_id,
-                    created=created_time,
-                    model=model_name,
-                    choices=[
-                        CompletionResponseStreamChoice(
-                            index=i,
-                            text="",
-                            logprobs=logprobs,
-                            finish_reason=output.finish_reason,
-                        )
-                    ],
-                    usage=final_usage,
-                ).model_dump_json()
-                yield f"data: {response_json}\n\n"
-
-    yield "data: [DONE]\n\n"
-
-
 def parse_prompt_format(prompt) -> Tuple[bool, list]:
     # get the prompt, openai supports the following
     # "a string, array of strings, array of tokens, or array of token arrays."
@@ -151,73 +50,6 @@ def parse_prompt_format(prompt) -> Tuple[bool, list]:
     return prompt_is_tokens, prompts
 
 
-def request_output_to_completion_response(
-    final_res_batch: List[RequestOutput],
-    request: CompletionRequest,
-    create_logprobs_fn: TypeCreateLogProbsFn,
-    request_id: str,
-    created_time: int,
-    model_name: str,
-) -> CompletionResponse:
-    choices = []
-    num_prompt_tokens = 0
-    num_generated_tokens = 0
-    for final_res in final_res_batch:
-        assert final_res is not None
-        prompt_token_ids = final_res.prompt_token_ids
-        prompt_logprobs = final_res.prompt_logprobs
-        prompt_text = final_res.prompt
-
-        for output in final_res.outputs:
-            if request.echo and request.max_tokens == 0:
-                token_ids = prompt_token_ids
-                top_logprobs = prompt_logprobs
-                output_text = prompt_text
-            elif request.echo and request.max_tokens > 0:
-                token_ids = prompt_token_ids + output.token_ids
-                top_logprobs = prompt_logprobs + output.logprobs
-                output_text = prompt_text + output.text
-            else:
-                token_ids = output.token_ids
-                top_logprobs = output.logprobs
-                output_text = output.text
-
-            if request.logprobs is not None:
-                logprobs = create_logprobs_fn(
-                    token_ids=token_ids,
-                    top_logprobs=top_logprobs,
-                    num_output_top_logprobs=request.logprobs,
-                )
-            else:
-                logprobs = None
-
-            choice_data = CompletionResponseChoice(
-                index=len(choices),
-                text=output_text,
-                logprobs=logprobs,
-                finish_reason=output.finish_reason,
-            )
-            choices.append(choice_data)
-
-        num_prompt_tokens += len(prompt_token_ids)
-        num_generated_tokens += sum(
-            len(output.token_ids) for output in final_res.outputs)
-
-    usage = UsageInfo(
-        prompt_tokens=num_prompt_tokens,
-        completion_tokens=num_generated_tokens,
-        total_tokens=num_prompt_tokens + num_generated_tokens,
-    )
-
-    return CompletionResponse(
-        id=request_id,
-        created=created_time,
-        model=model_name,
-        choices=choices,
-        usage=usage,
-    )
-
-
 def merge_async_iterators(*iterators):
     """Merge multiple asynchronous iterators into a single iterator.
 
@@ -230,8 +62,11 @@ def merge_async_iterators(*iterators):
     finished = [False] * len(iterators)
 
     async def producer(i, iterator):
-        async for item in iterator:
-            await queue.put((i, item))
+        try:
+            async for item in iterator:
+                await queue.put((i, item))
+        except Exception as e:
+            await queue.put(e)
         finished[i] = True
 
     _tasks = [
@@ -242,6 +77,8 @@ async def producer(i, iterator):
     async def consumer():
         while not all(finished) or not queue.empty():
             item = await queue.get()
+            if isinstance(item, Exception):
+                raise item
             yield item
         await asyncio.gather(*_tasks)
 
@@ -312,6 +149,7 @@ async def create_completion(self, request: CompletionRequest,
                                          prompt_token_ids=input_ids,
                                          lora_request=lora_request))
         except ValueError as e:
+            # TODO: Use a vllm-specific Validation Error
             return self.create_error_response(str(e))
 
         result_generator: AsyncIterator[Tuple[
@@ -325,27 +163,28 @@ async def create_completion(self, request: CompletionRequest,
 
         # Streaming response
         if stream:
-            return completion_stream_generator(request,
-                                               raw_request,
-                                               self.engine.abort,
-                                               result_generator,
-                                               self._create_logprobs,
-                                               request_id,
-                                               created_time,
-                                               model_name,
-                                               num_prompts=len(prompts))
+            return self.completion_stream_generator(request,
+                                                    raw_request,
+                                                    result_generator,
+                                                    request_id,
+                                                    created_time,
+                                                    model_name,
+                                                    num_prompts=len(prompts))
 
         # Non-streaming response
         final_res_batch: RequestOutput = [None] * len(prompts)
-        async for i, res in result_generator:
-            if await raw_request.is_disconnected():
-                # Abort the request if the client disconnects.
-                await self.engine.abort(f"{request_id}-{i}")
-                return self.create_error_response("Client disconnected")
-            final_res_batch[i] = res
-        response = request_output_to_completion_response(
-            final_res_batch, request, self._create_logprobs, request_id,
-            created_time, model_name)
+        try:
+            async for i, res in result_generator:
+                if await raw_request.is_disconnected():
+                    # Abort the request if the client disconnects.
+                    await self.engine.abort(f"{request_id}-{i}")
+                    return self.create_error_response("Client disconnected")
+                final_res_batch[i] = res
+            response = self.request_output_to_completion_response(
+                final_res_batch, request, request_id, created_time, model_name)
+        except ValueError as e:
+            # TODO: Use a vllm-specific Validation Error
+            return self.create_error_response(str(e))
 
         # When user requests streaming but we don't stream, we still need to
         # return a streaming response with a single event.
@@ -359,3 +198,179 @@ async def fake_stream_generator() -> AsyncGenerator[str, None]:
             return fake_stream_generator()
 
         return response
+
+    async def completion_stream_generator(
+        self,
+        request: CompletionRequest,
+        raw_request: Request,
+        result_generator: AsyncIterator[Tuple[int, RequestOutput]],
+        request_id: str,
+        created_time: int,
+        model_name: str,
+        num_prompts: int,
+    ) -> AsyncGenerator[str, None]:
+        previous_texts = [""] * request.n * num_prompts
+        previous_num_tokens = [0] * request.n * num_prompts
+        has_echoed = [False] * request.n * num_prompts
+
+        try:
+            async for prompt_idx, res in result_generator:
+
+                # Abort the request if the client disconnects.
+                if await raw_request.is_disconnected():
+                    await self.engine.abort(f"{request_id}-{prompt_idx}")
+                    raise StopAsyncIteration()
+
+                for output in res.outputs:
+                    i = output.index + prompt_idx * request.n
+                    # TODO(simon): optimize the performance by avoiding full text O(n^2) sending.
+
+                    if request.echo and request.max_tokens == 0:
+                        # only return the prompt
+                        delta_text = res.prompt
+                        delta_token_ids = res.prompt_token_ids
+                        top_logprobs = res.prompt_logprobs
+                        has_echoed[i] = True
+                    elif request.echo and request.max_tokens > 0 and not has_echoed[
+                            i]:
+                        # echo the prompt and first token
+                        delta_text = res.prompt + output.text
+                        delta_token_ids = res.prompt_token_ids + output.token_ids
+                        top_logprobs = res.prompt_logprobs + (output.logprobs
+                                                              or [])
+                        has_echoed[i] = True
+                    else:
+                        # return just the delta
+                        delta_text = output.text[len(previous_texts[i]):]
+                        delta_token_ids = output.token_ids[
+                            previous_num_tokens[i]:]
+                        top_logprobs = output.logprobs[previous_num_tokens[
+                            i]:] if output.logprobs else None
+
+                    if request.logprobs is not None:
+                        assert top_logprobs is not None, "top_logprobs must be provided when logprobs is requested"
+                        logprobs = self._create_logprobs(
+                            token_ids=delta_token_ids,
+                            top_logprobs=top_logprobs,
+                            num_output_top_logprobs=request.logprobs,
+                            initial_text_offset=len(previous_texts[i]),
+                        )
+                    else:
+                        logprobs = None
+
+                    previous_texts[i] = output.text
+                    previous_num_tokens[i] = len(output.token_ids)
+                    finish_reason = output.finish_reason
+                    response_json = CompletionStreamResponse(
+                        id=request_id,
+                        created=created_time,
+                        model=model_name,
+                        choices=[
+                            CompletionResponseStreamChoice(
+                                index=i,
+                                text=delta_text,
+                                logprobs=logprobs,
+                                finish_reason=finish_reason,
+                            )
+                        ]).model_dump_json()
+                    yield f"data: {response_json}\n\n"
+
+                    if output.finish_reason is not None:  # return final usage
+                        logprobs = LogProbs(
+                        ) if request.logprobs is not None else None
+                        prompt_tokens = len(res.prompt_token_ids)
+                        completion_tokens = len(output.token_ids)
+                        final_usage = UsageInfo(
+                            prompt_tokens=prompt_tokens,
+                            completion_tokens=completion_tokens,
+                            total_tokens=prompt_tokens + completion_tokens,
+                        )
+                        response_json = CompletionStreamResponse(
+                            id=request_id,
+                            created=created_time,
+                            model=model_name,
+                            choices=[
+                                CompletionResponseStreamChoice(
+                                    index=i,
+                                    text="",
+                                    logprobs=logprobs,
+                                    finish_reason=output.finish_reason,
+                                )
+                            ],
+                            usage=final_usage,
+                        ).model_dump_json()
+                        yield f"data: {response_json}\n\n"
+        except ValueError as e:
+            # TODO: Use a vllm-specific Validation Error
+            data = self.create_streaming_error_response(str(e))
+            print("yield", f"data: {data}\n\n")
+            yield f"data: {data}\n\n"
+
+        print("yield", "data: [DONE]\n\n")
+        yield "data: [DONE]\n\n"
+
+    def request_output_to_completion_response(
+        self,
+        final_res_batch: List[RequestOutput],
+        request: CompletionRequest,
+        request_id: str,
+        created_time: int,
+        model_name: str,
+    ) -> CompletionResponse:
+        choices = []
+        num_prompt_tokens = 0
+        num_generated_tokens = 0
+        for final_res in final_res_batch:
+            assert final_res is not None
+            prompt_token_ids = final_res.prompt_token_ids
+            prompt_logprobs = final_res.prompt_logprobs
+            prompt_text = final_res.prompt
+
+            for output in final_res.outputs:
+                if request.echo and request.max_tokens == 0:
+                    token_ids = prompt_token_ids
+                    top_logprobs = prompt_logprobs
+                    output_text = prompt_text
+                elif request.echo and request.max_tokens > 0:
+                    token_ids = prompt_token_ids + output.token_ids
+                    top_logprobs = prompt_logprobs + output.logprobs
+                    output_text = prompt_text + output.text
+                else:
+                    token_ids = output.token_ids
+                    top_logprobs = output.logprobs
+                    output_text = output.text
+
+                if request.logprobs is not None:
+                    logprobs = self._create_logprobs(
+                        token_ids=token_ids,
+                        top_logprobs=top_logprobs,
+                        num_output_top_logprobs=request.logprobs,
+                    )
+                else:
+                    logprobs = None
+
+                choice_data = CompletionResponseChoice(
+                    index=len(choices),
+                    text=output_text,
+                    logprobs=logprobs,
+                    finish_reason=output.finish_reason,
+                )
+                choices.append(choice_data)
+
+            num_prompt_tokens += len(prompt_token_ids)
+            num_generated_tokens += sum(
+                len(output.token_ids) for output in final_res.outputs)
+
+        usage = UsageInfo(
+            prompt_tokens=num_prompt_tokens,
+            completion_tokens=num_generated_tokens,
+            total_tokens=num_prompt_tokens + num_generated_tokens,
+        )
+
+        return CompletionResponse(
+            id=request_id,
+            created=created_time,
+            model=model_name,
+            choices=choices,
+            usage=usage,
+        )
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 09945471e9af0..230d13d97dbba 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -1,4 +1,5 @@
 import asyncio
+import json
 from dataclasses import dataclass
 from http import HTTPStatus
 from typing import Dict, List, Optional, Union
@@ -11,6 +12,7 @@
                                               ModelCard, ModelList,
                                               ModelPermission)
 from vllm.lora.request import LoRARequest
+from vllm.sequence import Logprob
 
 logger = init_logger(__name__)
 
@@ -83,7 +85,7 @@ async def show_available_models(self) -> ModelList:
     def _create_logprobs(
         self,
         token_ids: List[int],
-        top_logprobs: Optional[List[Optional[Dict[int, float]]]] = None,
+        top_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None,
         num_output_top_logprobs: Optional[int] = None,
         initial_text_offset: int = 0,
     ) -> LogProbs:
@@ -95,10 +97,10 @@ def _create_logprobs(
         for i, token_id in enumerate(token_ids):
             step_top_logprobs = top_logprobs[i]
             if step_top_logprobs is not None:
-                token_logprob = step_top_logprobs[token_id]
+                token_logprob = step_top_logprobs[token_id].logprob
             else:
                 token_logprob = None
-            token = self.tokenizer.convert_ids_to_tokens(token_id)
+            token = step_top_logprobs[token_id].decoded_token
             logprobs.tokens.append(token)
             logprobs.token_logprobs.append(token_logprob)
             if len(logprobs.text_offset) == 0:
@@ -110,7 +112,7 @@ def _create_logprobs(
 
             if num_output_top_logprobs:
                 logprobs.top_logprobs.append({
-                    self.tokenizer.convert_ids_to_tokens(i): p
+                    p.decoded_token: p.logprob
                     for i, p in step_top_logprobs.items()
                 } if step_top_logprobs else None)
         return logprobs
@@ -124,6 +126,19 @@ def create_error_response(
                              type=err_type,
                              code=status_code.value)
 
+    def create_streaming_error_response(
+            self,
+            message: str,
+            err_type: str = "BadRequestError",
+            status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> str:
+        json_str = json.dumps({
+            "error":
+            self.create_error_response(message=message,
+                                       err_type=err_type,
+                                       status_code=status_code).model_dump()
+        })
+        return json_str
+
     async def _check_model(self, request) -> Optional[ErrorResponse]:
         if request.model == self.served_model:
             return
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index 71655b216fb3d..b48dde0318d09 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -8,8 +8,9 @@
     tensor_model_parallel_gather)
 from vllm.model_executor.sampling_metadata import SamplingMetadata, SamplingTensors
 from vllm.sampling_params import SamplingParams, SamplingType
-from vllm.sequence import (PromptLogprobs, SampleLogprobs, SamplerOutput,
-                           SequenceData, SequenceGroupOutput, SequenceOutput)
+from vllm.sequence import (Logprob, PromptLogprobs, SampleLogprobs,
+                           SamplerOutput, SequenceData, SequenceGroupOutput,
+                           SequenceOutput)
 from vllm.utils import is_neuron
 
 
@@ -528,7 +529,10 @@ def _get_logprobs(
                     prompt_logprobs_dict.update(
                         zip(top_token_ids[sample_idx, :num_logprobs].tolist(),
                             top_logprobs[sample_idx, :num_logprobs].tolist()))
-                group_prompt_logprobs.append(prompt_logprobs_dict)
+                group_prompt_logprobs.append({
+                    token_id: Logprob(logprob)
+                    for token_id, logprob in prompt_logprobs_dict.items()
+                })
                 sample_idx += 1
                 query_result_idx += 1
             result_prompt_logprobs.append(group_prompt_logprobs)
@@ -553,7 +557,10 @@ def _get_logprobs(
                                       parent_id, :num_logprobs].tolist(),
                         top_logprobs[sample_idx +
                                      parent_id, :num_logprobs].tolist()))
-            group_sample_logprobs.append(sample_logprobs_dict)
+            group_sample_logprobs.append({
+                token_id: Logprob(logprob)
+                for token_id, logprob in sample_logprobs_dict.items()
+            })
         result_sample_logprobs.append(group_sample_logprobs)
         sample_idx += len(seq_ids)
 
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 04a9a90a68bcc..a110ab6b748f8 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -8,8 +8,16 @@
 from vllm.sampling_params import SamplingParams
 from vllm.lora.request import LoRARequest
 
-PromptLogprobs = List[Optional[Dict[int, float]]]
-SampleLogprobs = List[Dict[int, float]]
+
+@dataclass
+class Logprob:
+    """Infos for supporting OpenAI compatible logprobs."""
+    logprob: float
+    decoded_token: Optional[str] = None
+
+
+PromptLogprobs = List[Optional[Dict[int, Logprob]]]
+SampleLogprobs = List[Dict[int, Logprob]]
 
 
 class SequenceStatus(enum.Enum):
@@ -196,12 +204,12 @@ def _append_tokens_to_blocks(self, token_ids: List[int]) -> None:
     def append_token_id(
         self,
         token_id: int,
-        logprobs: Dict[int, float],
+        logprobs: Dict[int, Logprob],
     ) -> None:
         assert token_id in logprobs
         self._append_tokens_to_blocks([token_id])
         self.output_logprobs.append(logprobs)
-        self.data.append_token_id(token_id, logprobs[token_id])
+        self.data.append_token_id(token_id, logprobs[token_id].logprob)
 
     def get_len(self) -> int:
         return self.data.get_len()
@@ -456,7 +464,7 @@ def __init__(
         self,
         parent_seq_id: int,
         output_token: int,
-        logprobs: Dict[int, float],
+        logprobs: Dict[int, Logprob],
     ) -> None:
         self.parent_seq_id = parent_seq_id
         self.output_token = output_token
@@ -470,9 +478,10 @@ def __repr__(self) -> str:
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, SequenceOutput):
             raise NotImplementedError()
-        return (self.parent_seq_id == other.parent_seq_id
-                and self.output_token == other.output_token
-                and self.logprobs == other.logprobs)
+        equal = (self.parent_seq_id == other.parent_seq_id
+                 and self.output_token == other.output_token)
+        log_probs_equal = other.logprobs == self.logprobs
+        return equal and log_probs_equal
 
 
 class SequenceGroupOutput:
diff --git a/vllm/worker/spec_decode/multi_step_worker.py b/vllm/worker/spec_decode/multi_step_worker.py
index 591d1b1300c88..ab3e28389a04c 100644
--- a/vllm/worker/spec_decode/multi_step_worker.py
+++ b/vllm/worker/spec_decode/multi_step_worker.py
@@ -77,7 +77,7 @@ def _append_new_tokens(
                 token_id = seq_output.output_token
                 token_logprob = seq_output.logprobs[token_id]
 
-                seq.append_token_id(token_id, token_logprob)
+                seq.append_token_id(token_id, token_logprob.logprob)
 
     def _shallow_copy_inputs(
         self, seq_group_metadata_list: List[SequenceGroupMetadata]

From ff578cae54d23812b53b6c9b94b8bd0bb293a1fe Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Mon, 4 Mar 2024 14:01:40 -0800
Subject: [PATCH 054/196] Add health check, make async Engine more robust
 (#3015)

Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
---
 tests/async_engine/test_async_llm_engine.py |  32 +++---
 tests/async_engine/test_request_tracker.py  |  38 +++----
 vllm/engine/async_llm_engine.py             | 113 +++++++++++++++-----
 vllm/engine/llm_engine.py                   |  20 ++++
 4 files changed, 138 insertions(+), 65 deletions(-)

diff --git a/tests/async_engine/test_async_llm_engine.py b/tests/async_engine/test_async_llm_engine.py
index 1edb19c550010..1e31ff7373031 100644
--- a/tests/async_engine/test_async_llm_engine.py
+++ b/tests/async_engine/test_async_llm_engine.py
@@ -25,12 +25,8 @@ async def step_async(self):
         return [RequestOutput(
             request_id=self.request_id)] if self.request_id else []
 
-    async def encode_request_async(
-        self,
-        *args,
-        **kwargs,
-    ):
-        return [1]
+    async def encode_request_async(self, *args, **kwargs):
+        pass
 
     def generate(self, request_id):
         self.request_id = request_id
@@ -43,13 +39,16 @@ def add_request(self, **kwargs):
         self.add_request_calls += 1
 
     async def add_request_async(self, **kwargs):
-        del kwargs  # Unused
         self.add_request_calls += 1
+        return
 
     def abort_request(self, request_id):
         del request_id  # Unused
         self.abort_request_calls += 1
 
+    def has_unfinished_requests(self):
+        return self.request_id is not None
+
 
 class MockAsyncLLMEngine(AsyncLLMEngine):
 
@@ -72,20 +71,21 @@ async def test_new_requests_event():
     await engine.add_request("2", "", None)
     engine.engine.generate("2")
     await asyncio.sleep(0)
-    assert engine.engine.add_request_calls == 2
-    assert engine.engine.step_calls == 2
     await asyncio.sleep(0)
-    assert engine.engine.step_calls == 3
+    assert engine.engine.add_request_calls == 2
+    assert engine.engine.step_calls >= 2
+    await asyncio.sleep(0.001)
+    assert engine.engine.step_calls >= 3
     engine.engine.stop_generating()
-    await asyncio.sleep(0)
-    assert engine.engine.step_calls == 4
-    await asyncio.sleep(0)
-    assert engine.engine.step_calls == 4
+    await asyncio.sleep(0.001)
+    old_step_calls = engine.engine.step_calls
+    await asyncio.sleep(0.001)
+    assert engine.engine.step_calls == old_step_calls
 
     await engine.add_request("3", "", None)
     await asyncio.sleep(0.01)
     assert engine.engine.add_request_calls == 3
-    assert engine.engine.step_calls == 5
+    assert engine.engine.step_calls == old_step_calls + 1
     await asyncio.sleep(0.01)
     assert engine.engine.add_request_calls == 3
-    assert engine.engine.step_calls == 5
+    assert engine.engine.step_calls == old_step_calls + 1
diff --git a/tests/async_engine/test_request_tracker.py b/tests/async_engine/test_request_tracker.py
index 4043558bae919..7b1f4a9e1eb2f 100644
--- a/tests/async_engine/test_request_tracker.py
+++ b/tests/async_engine/test_request_tracker.py
@@ -4,25 +4,14 @@
 from vllm.outputs import RequestOutput
 
 
-class DummyEvent:
-
-    def __init__(self):
-        self.flag = False
-
-    def set(self):
-        self.flag = True
-
-    def clear(self):
-        self.flag = False
-
-
-def test_request_tracker():
+@pytest.mark.asyncio
+async def test_request_tracker():
     tracker = RequestTracker()
-    tracker.new_requests_event = DummyEvent()
     stream_1 = tracker.add_request("1")
-    assert tracker.new_requests_event.flag
+    assert tracker.new_requests_event.is_set()
+    await tracker.wait_for_new_requests()
     new, finished = tracker.get_new_and_finished_requests()
-    assert not tracker.new_requests_event.flag
+    assert not tracker.new_requests_event.is_set()
     assert len(new) == 1
     assert new[0]["request_id"] == "1"
     assert not finished
@@ -30,9 +19,10 @@ def test_request_tracker():
 
     stream_2 = tracker.add_request("2")
     stream_3 = tracker.add_request("3")
-    assert tracker.new_requests_event.flag
+    assert tracker.new_requests_event.is_set()
+    await tracker.wait_for_new_requests()
     new, finished = tracker.get_new_and_finished_requests()
-    assert not tracker.new_requests_event.flag
+    assert not tracker.new_requests_event.is_set()
     assert len(new) == 2
     assert new[0]["request_id"] == "2"
     assert new[1]["request_id"] == "3"
@@ -43,7 +33,7 @@ def test_request_tracker():
     # request_ids must be unique
     with pytest.raises(KeyError):
         tracker.add_request("1")
-    assert not tracker.new_requests_event.flag
+    assert not tracker.new_requests_event.is_set()
 
     tracker.abort_request("1")
     new, finished = tracker.get_new_and_finished_requests()
@@ -54,7 +44,8 @@ def test_request_tracker():
 
     stream_4 = tracker.add_request("4")
     tracker.abort_request("4")
-    assert tracker.new_requests_event.flag
+    assert tracker.new_requests_event.is_set()
+    await tracker.wait_for_new_requests()
     new, finished = tracker.get_new_and_finished_requests()
     assert len(finished) == 1
     assert "4" in finished
@@ -62,11 +53,12 @@ def test_request_tracker():
     assert stream_4.finished
 
     stream_5 = tracker.add_request("5")
-    assert tracker.new_requests_event.flag
+    assert tracker.new_requests_event.is_set()
     tracker.process_request_output(
-        RequestOutput("2", "output", [], [], [], bool(finished)))
+        RequestOutput("2", "output", [], [], [], finished=True))
+    await tracker.wait_for_new_requests()
     new, finished = tracker.get_new_and_finished_requests()
-    assert not tracker.new_requests_event.flag
+    assert not tracker.new_requests_event.is_set()
     assert len(finished) == 1
     assert "2" in finished
     assert len(new) == 1
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index df66139fddcd1..65ab0c0634176 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -1,8 +1,9 @@
 import asyncio
+import os
 import time
 from functools import partial
 from typing import (Any, Dict, Iterable, List, Optional, Set, Tuple, Type,
-                    Union, AsyncIterator)
+                    Union, AsyncIterator, Callable)
 
 from vllm.lora.request import LoRARequest
 from vllm.config import ModelConfig
@@ -14,28 +15,31 @@
 from vllm.sampling_params import SamplingParams
 
 logger = init_logger(__name__)
+ENGINE_ITERATION_TIMEOUT_S = int(
+    os.environ.get("VLLM_ENGINE_ITERATION_TIMEOUT_S", "60"))
 
 
 class AsyncEngineDeadError(RuntimeError):
     pass
 
 
-def _raise_exception_on_finish(task: asyncio.Task,
-                               request_tracker: "RequestTracker") -> None:
+def _raise_exception_on_finish(
+        task: asyncio.Task, error_callback: Callable[[Exception],
+                                                     None]) -> None:
     msg = ("Task finished unexpectedly. This should never happen! "
            "Please open an issue on Github.")
+
+    exception = None
     try:
-        try:
-            task.result()
-        except asyncio.CancelledError:
-            return
-        except Exception as exc:
-            raise AsyncEngineDeadError(
-                msg + " See stack trace above for the actual cause.") from exc
+        task.result()
+        # NOTE: This will be thrown if task exits normally (which it should not)
         raise AsyncEngineDeadError(msg)
-    except Exception as exc:
-        request_tracker.propagate_exception(exc)
-        raise exc
+    except Exception as e:
+        exception = e
+        logger.error("Engine background task failed", exc_info=e)
+        error_callback(exception)
+        raise AsyncEngineDeadError(
+            msg + " See stack trace above for the actual cause.") from e
 
 
 class AsyncStream:
@@ -78,13 +82,13 @@ def __init__(self) -> None:
         self._finished_requests: asyncio.Queue[str] = asyncio.Queue()
         self._new_requests: asyncio.Queue[Tuple[AsyncStream,
                                                 dict]] = asyncio.Queue()
-        self.new_requests_event = None
+        self.new_requests_event = asyncio.Event()
 
     def __contains__(self, item):
         return item in self._request_streams
 
-    def init_event(self):
-        self.new_requests_event = asyncio.Event()
+    def __len__(self) -> int:
+        return len(self._request_streams)
 
     def propagate_exception(self,
                             exc: Exception,
@@ -93,9 +97,11 @@ def propagate_exception(self,
         (all if request_id is None)."""
         if request_id is not None:
             self._request_streams[request_id].put(exc)
+            self.abort_request(request_id)
         else:
-            for stream in self._request_streams.values():
+            for rid, stream in self._request_streams.items():
                 stream.put(exc)
+                self.abort_request(rid)
 
     def process_request_output(self,
                                request_output: RequestOutput,
@@ -172,12 +178,15 @@ def get_new_and_finished_requests(self) -> Tuple[List[Dict], Set[str]]:
             self._request_streams[stream.request_id] = stream
             new_requests.append(new_request)
 
-        self.new_requests_event.clear()
-
         return new_requests, finished_requests
 
     async def wait_for_new_requests(self):
-        await self.new_requests_event.wait()
+        if not self.has_new_requests():
+            await self.new_requests_event.wait()
+        self.new_requests_event.clear()
+
+    def has_new_requests(self):
+        return not self._new_requests.empty()
 
 
 class _AsyncLLMEngine(LLMEngine):
@@ -285,6 +294,10 @@ async def _run_workers_async(
         all_outputs = await asyncio.gather(*coros)
         return all_outputs
 
+    async def check_health_async(self):
+        """Raises an error if engine is unhealthy."""
+        self._check_if_any_actor_is_dead()
+
 
 class AsyncLLMEngine:
     """An asynchronous wrapper for LLMEngine.
@@ -335,27 +348,48 @@ def __init__(self,
         # collected
         self._background_loop_unshielded = None
         self.start_engine_loop = start_engine_loop
-        self._request_tracker = RequestTracker()
+        self._request_tracker: Optional[RequestTracker] = None
+        self._errored_with: Optional[BaseException] = None
 
     @property
     def is_running(self) -> bool:
         return (self.background_loop is not None
-                and not self.background_loop.done())
+                and not self._background_loop_unshielded.done())
+
+    @property
+    def is_stopped(self) -> bool:
+        return self.errored or (self.background_loop is not None
+                                and self._background_loop_unshielded.done())
+
+    @property
+    def errored(self) -> bool:
+        return self._errored_with is not None
+
+    def set_errored(self, exc: Exception) -> None:
+        self._errored_with = exc
+
+    def _error_callback(self, exc: Exception) -> None:
+        self.set_errored(exc)
+        self._request_tracker.propagate_exception(exc)
 
     def get_tokenizer(self):
         return self.engine.tokenizer.tokenizer
 
     def start_background_loop(self) -> None:
         """Start the background loop."""
+        if self.errored:
+            raise AsyncEngineDeadError(
+                "Background loop has errored already.") from self._errored_with
         if self.is_running:
             raise RuntimeError("Background loop is already running.")
-        self._request_tracker.init_event()
+        # Initialize the RequestTracker here so it uses the right event loop.
+        self._request_tracker = RequestTracker()
 
         self._background_loop_unshielded = asyncio.get_event_loop(
         ).create_task(self.run_engine_loop())
         self._background_loop_unshielded.add_done_callback(
             partial(_raise_exception_on_finish,
-                    request_tracker=self._request_tracker))
+                    error_callback=self._error_callback))
         self.background_loop = asyncio.shield(self._background_loop_unshielded)
 
     def _init_engine(self, *args,
@@ -423,12 +457,23 @@ async def _engine_abort(self, request_ids: Iterable[str]):
             self.engine.abort_request(request_ids)
 
     async def run_engine_loop(self):
-        # Initialize the RequestTracker here so it uses the right event loop.
         has_requests_in_progress = False
         while True:
             if not has_requests_in_progress:
+                logger.debug("Waiting for new requests...")
                 await self._request_tracker.wait_for_new_requests()
-            has_requests_in_progress = await self.engine_step()
+                logger.debug("Got new requests!")
+
+            # Abort if iteration takes too long due to unrecoverable errors
+            # (eg. NCCL timeouts).
+            try:
+                has_requests_in_progress = await asyncio.wait_for(
+                    self.engine_step(), ENGINE_ITERATION_TIMEOUT_S)
+            except asyncio.TimeoutError as exc:
+                logger.error(
+                    "Engine iteration timed out. This should never happen!")
+                self.set_errored(exc)
+                raise
             await asyncio.sleep(0)
 
     async def add_request(
@@ -647,3 +692,19 @@ async def do_log_stats(self) -> None:
             await self.engine.do_log_stats.remote()
         else:
             self.engine.do_log_stats()
+
+    async def check_health(self):
+        """Raises an error if engine is unhealthy."""
+        t = time.perf_counter()
+        logger.debug("Starting health check...")
+        if self.is_stopped:
+            raise AsyncEngineDeadError("Background loop is stopped.")
+
+        if self.engine_use_ray:
+            try:
+                await self.engine.check_health.remote()
+            except ray.exceptions.RayActorError as e:
+                raise RuntimeError("Engine is dead.") from e
+        else:
+            await self.engine.check_health_async()
+        logger.debug(f"Health check took {time.perf_counter()-t}s")
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 703756996b7f7..1f518cbf39b21 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1119,3 +1119,23 @@ def _compiled_ray_dag(self):
                 for worker in self.workers
             ])
         return forward_dag.experimental_compile()
+
+    def check_health(self) -> None:
+        """Raises an error if engine is unhealthy."""
+        self._check_if_any_actor_is_dead()
+
+    def _check_if_any_actor_is_dead(self):
+        if not self.parallel_config.worker_use_ray:
+            return
+
+        if not self.workers:
+            return
+
+        dead_actors = []
+        for actor in self.workers:
+            actor_state = ray.state.actors(actor._ray_actor_id.hex())  # pylint: disable=protected-access
+            if actor_state["State"] == "DEAD":
+                dead_actors.append(actor)
+        if dead_actors:
+            raise RuntimeError("At least one Worker is dead. "
+                               f"Dead Workers: {dead_actors}. ")

From 9a4548bae73a8831f668116d8a6e88491d933a4e Mon Sep 17 00:00:00 2001
From: Chen Wang <Chen.Wang1@ibm.com>
Date: Mon, 4 Mar 2024 18:51:56 -0500
Subject: [PATCH 055/196] Fix the openai benchmarking requests to work with
 latest OpenAI apis (#2992)

Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
---
 benchmarks/backend_request_func.py | 70 ++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index e7f74e2feaf86..d7cac22ce7a99 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -275,10 +275,80 @@ async def async_request_openai_completions(
     return output
 
 
+async def async_request_openai_chat_completions(
+    request_func_input: RequestFuncInput,
+    pbar: Optional[tqdm] = None,
+) -> RequestFuncOutput:
+    api_url = request_func_input.api_url
+    assert api_url.endswith(
+        "v1/chat/completions"
+    ), "OpenAI Chat API URL must end with 'v1/chat/completions'."
+
+    async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
+        assert not request_func_input.use_beam_search
+        payload = {
+            "model": request_func_input.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": request_func_input.prompt,
+                },
+            ],
+            "temperature": 0.0,
+            "max_tokens": request_func_input.output_len,
+            "stream": True,
+        }
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}"
+        }
+
+        output = RequestFuncOutput()
+        output.prompt_len = request_func_input.prompt_len
+
+        generated_text = ""
+        ttft = 0
+        st = time.perf_counter()
+        try:
+            async with session.post(url=api_url, json=payload,
+                                    headers=headers) as response:
+                if response.status == 200:
+                    async for chunk in response.content:
+                        if ttft == 0:
+                            ttft = time.perf_counter() - st
+                            output.ttft = ttft
+
+                        chunk = chunk.strip()
+                        if not chunk:
+                            continue
+
+                        chunk = chunk.decode("utf-8").lstrip("data: ")
+                        if chunk == "[DONE]":
+                            latency = time.perf_counter() - st
+                        else:
+                            body = json.loads(chunk)
+                            if "content" in body["choices"][0]["delta"]:
+                                generated_text += body["choices"][0]["delta"][
+                                    "content"]
+
+                    output.generated_text = generated_text
+                    output.success = True
+                    output.latency = latency
+                else:
+                    output.success = False
+        except (aiohttp.ClientOSError, aiohttp.ServerDisconnectedError):
+            output.success = False
+
+    if pbar:
+        pbar.update(1)
+    return output
+
+
 ASYNC_REQUEST_FUNCS = {
     "tgi": async_request_tgi,
     "vllm": async_request_vllm,
     "deepspeed-mii": async_request_deepspeed_mii,
     "openai": async_request_openai_completions,
+    "openai-chat": async_request_openai_chat_completions,
     "tensorrt-llm": async_request_trt_llm,
 }

From 05af6da8d927f70d15ab1ed25b01df3c967ad961 Mon Sep 17 00:00:00 2001
From: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com>
Date: Mon, 4 Mar 2024 21:14:53 -0500
Subject: [PATCH 056/196] [ROCm] enable cupy in order to enable  cudagraph mode
 for AMD GPUs (#3123)

Co-authored-by: lcskrishna <lollachaitanya@gmail.com>
---
 Dockerfile.rocm       | 30 +++++++++++++++++++++++++-----
 vllm/worker/worker.py |  4 +---
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/Dockerfile.rocm b/Dockerfile.rocm
index 54ae06be6e101..a45265d79a6ac 100644
--- a/Dockerfile.rocm
+++ b/Dockerfile.rocm
@@ -23,6 +23,9 @@ RUN echo "FA_BRANCH is $FA_BRANCH"
 # In that case, we need to use the python reference attention implementation in vllm
 ARG BUILD_FA="1"
 
+# whether to build cupy on rocm
+ARG BUILD_CUPY="1"
+
 # Install some basic utilities
 RUN apt-get update && apt-get install python3 python3-pip -y
 
@@ -70,16 +73,33 @@ RUN if [ "$BUILD_FA" = "1" ]; then \
     && cd ..; \
     fi
 
-COPY ./ /app/vllm
-
-RUN python3 -m pip install --upgrade pip
-RUN python3 -m pip install xformers==0.0.23 --no-deps
-
 # Error related to odd state for numpy 1.20.3 where there is no METADATA etc, but an extra LICENSES_bundled.txt.
 # Manually removed it so that later steps of numpy upgrade can continue
 RUN if [ "$BASE_IMAGE" = "rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1" ]; then \
     rm -rf /opt/conda/envs/py_3.9/lib/python3.9/site-packages/numpy-1.20.3.dist-info/; fi
 
+# build cupy
+RUN if [ "$BUILD_CUPY" = "1" ]; then \
+    mkdir -p libs \
+    && cd libs \
+    && git clone -b hipgraph_enablement --recursive https://github.com/ROCm/cupy.git \
+    && cd cupy \
+    && pip install mpi4py-mpich \
+    && pip install scipy==1.9.3 \
+    && pip install cython==0.29.* \
+    && env CC=$MPI_HOME/bin/mpicc python -m pip install mpi4py \
+    && export CUPY_INSTALL_USE_HIP=1 \
+    && export ROCM_HOME=/opt/rocm \
+    && export HCC_AMDGPU_TARGET="gfx90a,gfx942,gfx1100" \
+    && pip install . \
+    && cd ..; \
+    fi
+
+COPY ./ /app/vllm
+
+RUN python3 -m pip install --upgrade pip
+RUN python3 -m pip install xformers==0.0.23 --no-deps
+
 RUN cd /app \
     && cd vllm \
     && pip install -U -r requirements-rocm.txt \
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
index 9df518d155ec2..157e8c45836b1 100644
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -19,7 +19,6 @@
 from vllm.worker.cache_engine import CacheEngine
 from vllm.worker.model_runner import ModelRunner
 from vllm.lora.request import LoRARequest
-from vllm.utils import is_hip
 
 
 class Worker:
@@ -267,8 +266,7 @@ def init_distributed_environment(
                 "cupy.distributed is already initialized but the cupy world "
                 "size does not match parallel_config.world_size "
                 f"({cupy_world_size} vs. {parallel_config.world_size}).")
-    elif (parallel_config.world_size > 1 and cupy_port is not None
-          and not is_hip()):
+    elif (parallel_config.world_size > 1 and cupy_port is not None):
         # NOTE(woosuk): We don't initialize CuPy process group when world size
         # is 1.
         # TODO(woosuk): Support multi-node connection.

From 8999ec3c1632c91c194ab27df6bf274f5bcb0b5f Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Tue, 5 Mar 2024 15:35:43 -0800
Subject: [PATCH 057/196] Store `eos_token_id` in `Sequence` for easy access
 (#3166)

---
 tests/test_cache_block_hashing.py     |  3 +-
 vllm/core/scheduler.py                |  7 ++---
 vllm/engine/llm_engine.py             | 30 +++++++++-----------
 vllm/model_executor/layers/sampler.py |  1 -
 vllm/outputs.py                       | 41 ++++++++++++++-------------
 vllm/sequence.py                      | 11 ++++---
 6 files changed, 44 insertions(+), 49 deletions(-)

diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py
index 7c4ade7f8c8ed..c2067e52b59c0 100644
--- a/tests/test_cache_block_hashing.py
+++ b/tests/test_cache_block_hashing.py
@@ -54,7 +54,8 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int):
         for prompt in prompts:
             hashes[-1].append([])
             prompt_token_ids = tokenizer.encode(prompt)
-            seq = Sequence(seq_id, prompt, prompt_token_ids, block_size)
+            seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
+                           tokenizer.tokenizer.eos_token_id)
 
             num_blocks = len(prompt_token_ids) // block_size
             for idx in range(num_blocks):
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index 1ae58f525b0fb..c96c6d62ef19d 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -59,10 +59,9 @@ def is_empty(self) -> bool:
                 and not self.blocks_to_swap_out and not self.blocks_to_copy)
 
     def _sort_by_lora_ids(self) -> bool:
-        self.scheduled_seq_groups = sorted(
-            self.scheduled_seq_groups,
-            key=lambda g: (g.lora_request.lora_int_id
-                           if g.lora_request else 0, g.request_id))
+        self.scheduled_seq_groups = sorted(self.scheduled_seq_groups,
+                                           key=lambda g:
+                                           (g.lora_int_id, g.request_id))
 
     @property
     def lora_requests(self) -> Set[LoRARequest]:
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 1f518cbf39b21..52dc96e2b82e1 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -491,8 +491,10 @@ def add_request(
         # Create the sequences.
         block_size = self.cache_config.block_size
         seq_id = next(self.seq_counter)
+        eos_token_id = self.tokenizer.get_lora_tokenizer(
+            lora_request).eos_token_id
         seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
-                       lora_request)
+                       eos_token_id, lora_request)
 
         # Defensive copy of SamplingParams, which are used by the sampler,
         # this doesn't deep-copy LogitsProcessor objects
@@ -548,15 +550,13 @@ def _check_beam_search_early_stopping(
         if early_stopping is True:
             return True
 
-        current_worst_score = (current_worst_seq.get_beam_search_score(
+        current_worst_score = current_worst_seq.get_beam_search_score(
             length_penalty=length_penalty,
-            eos_token_id=self.get_tokenizer_for_seq(
-                current_worst_seq).eos_token_id))
+            eos_token_id=current_worst_seq.eos_token_id)
         if early_stopping is False:
-            highest_attainable_score = (best_running_seq.get_beam_search_score(
+            highest_attainable_score = best_running_seq.get_beam_search_score(
                 length_penalty=length_penalty,
-                eos_token_id=self.get_tokenizer_for_seq(
-                    best_running_seq).eos_token_id))
+                eos_token_id=best_running_seq.eos_token_id)
         else:
             assert early_stopping == "never"
             if length_penalty > 0.0:
@@ -570,8 +570,7 @@ def _check_beam_search_early_stopping(
                 highest_attainable_score = (
                     best_running_seq.get_beam_search_score(
                         length_penalty=length_penalty,
-                        eos_token_id=self.get_tokenizer_for_seq(
-                            best_running_seq).eos_token_id,
+                        eos_token_id=best_running_seq.eos_token_id,
                         seq_len=max_possible_length))
             else:
                 # Otherwise, beam search will prefer shorter sequences. The
@@ -580,8 +579,7 @@ def _check_beam_search_early_stopping(
                 highest_attainable_score = (
                     best_running_seq.get_beam_search_score(
                         length_penalty=length_penalty,
-                        eos_token_id=self.get_tokenizer_for_seq(
-                            best_running_seq).eos_token_id))
+                        eos_token_id=best_running_seq.eos_token_id))
         return current_worst_score >= highest_attainable_score
 
     def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
@@ -679,8 +677,7 @@ def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
         all_finished_seqs = existing_finished_seqs + new_finished_seqs
         # Sort the finished sequences by their scores.
         all_finished_seqs.sort(key=lambda x: x[0].get_beam_search_score(
-            length_penalty=length_penalty,
-            eos_token_id=self.get_tokenizer_for_seq(x[0]).eos_token_id),
+            length_penalty=length_penalty, eos_token_id=x[0].eos_token_id),
                                reverse=True)
         for seq, parent, is_new in all_finished_seqs[:beam_width]:
             if is_new:
@@ -707,8 +704,7 @@ def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
                               if not seq.is_finished()]
         # Sort the running sequences by their scores.
         running_child_seqs.sort(key=lambda x: x[0].get_beam_search_score(
-            length_penalty=length_penalty,
-            eos_token_id=self.get_tokenizer_for_seq(x[0]).eos_token_id),
+            length_penalty=length_penalty, eos_token_id=x[0].eos_token_id),
                                 reverse=True)
 
         # Check if we can stop the beam search.
@@ -1014,8 +1010,8 @@ def _check_stop(self, seq: Sequence,
             return
 
         # Check if the sequence has generated the EOS token.
-        if ((not sampling_params.ignore_eos) and seq.get_last_token_id()
-                == self.get_tokenizer_for_seq(seq).eos_token_id):
+        if ((not sampling_params.ignore_eos)
+                and seq.get_last_token_id() == seq.eos_token_id):
             seq.status = SequenceStatus.FINISHED_STOPPED
             return
 
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index b48dde0318d09..320cb443524ca 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -516,7 +516,6 @@ def _get_logprobs(
         if (i < sampling_metadata.num_prompts
                 and sampling_params.prompt_logprobs is not None):
             num_logprobs = sampling_params.prompt_logprobs
-            prompt_len = sampling_metadata.prompt_lens[i]
             prompt_tokens = sampling_metadata.seq_data[
                 seq_ids[0]].prompt_token_ids
             group_prompt_logprobs: PromptLogprobs = [None]
diff --git a/vllm/outputs.py b/vllm/outputs.py
index a6de2a5a2257b..4f9eddee11cd4 100644
--- a/vllm/outputs.py
+++ b/vllm/outputs.py
@@ -90,29 +90,30 @@ def from_seq_group(cls, seq_group: SequenceGroup) -> "RequestOutput":
         # Get the top-n sequences.
         n = seq_group.sampling_params.n
         seqs = seq_group.get_seqs()
-        if seq_group.sampling_params.use_beam_search:
-            sorting_key = lambda seq: seq.get_beam_search_score(
-                seq_group.sampling_params.length_penalty)
+        if n == 1:
+            top_n_seqs = seqs
         else:
-            sorting_key = lambda seq: seq.get_cumulative_logprob()
-        sorted_seqs = sorted(seqs, key=sorting_key, reverse=True)
-        top_n_seqs = sorted_seqs[:n]
+            if seq_group.sampling_params.use_beam_search:
+                sorting_key = lambda seq: seq.get_beam_search_score(
+                    seq_group.sampling_params.length_penalty)
+            else:
+                sorting_key = lambda seq: seq.get_cumulative_logprob()
+            sorted_seqs = sorted(seqs, key=sorting_key, reverse=True)
+            top_n_seqs = sorted_seqs[:n]
 
         # Create the outputs.
-        outputs: List[CompletionOutput] = []
-        for seq in top_n_seqs:
-            logprobs = seq.output_logprobs
-            if seq_group.sampling_params.logprobs is None:
-                # NOTE: We need to take care of this case because the sequence
-                # always has the logprobs of the sampled tokens even if the
-                # logprobs are not requested.
-                logprobs = None
-            finshed_reason = SequenceStatus.get_finished_reason(seq.status)
-            output = CompletionOutput(seqs.index(seq), seq.output_text,
-                                      seq.get_output_token_ids(),
-                                      seq.get_cumulative_logprob(), logprobs,
-                                      finshed_reason)
-            outputs.append(output)
+        # NOTE: We need omit logprobs here explicitly because the sequence
+        # always has the logprobs of the sampled tokens even if the
+        # logprobs are not requested.
+        include_logprobs = seq_group.sampling_params.logprobs
+        outputs = [
+            CompletionOutput(seqs.index(seq), seq.output_text,
+                             seq.get_output_token_ids(),
+                             seq.get_cumulative_logprob(),
+                             seq.output_logprobs if include_logprobs else None,
+                             SequenceStatus.get_finished_reason(seq.status))
+            for seq in top_n_seqs
+        ]
 
         # Every sequence in the sequence group should have the same prompt.
         prompt = seq_group.prompt
diff --git a/vllm/sequence.py b/vllm/sequence.py
index a110ab6b748f8..97b72fdc4cbeb 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -142,11 +142,13 @@ def __init__(
         prompt: str,
         prompt_token_ids: List[int],
         block_size: int,
+        eos_token_id: int,
         lora_request: Optional[LoRARequest] = None,
     ) -> None:
         self.seq_id = seq_id
         self.prompt = prompt
         self.block_size = block_size
+        self.eos_token_id = eos_token_id
         self.lora_request = lora_request
 
         self.data = SequenceData(prompt_token_ids)
@@ -362,12 +364,9 @@ def get_seqs(
         self,
         status: Optional[SequenceStatus] = None,
     ) -> List[Sequence]:
-        if status is None:
-            return list(self.seqs_dict.values())
-        else:
-            return [
-                seq for seq in self.seqs_dict.values() if seq.status == status
-            ]
+        return list(self.seqs_dict.values()) if status is None else [
+            seq for seq in self.seqs_dict.values() if seq.status == status
+        ]
 
     def get_unfinished_seqs(self) -> List[Sequence]:
         return [

From 2efce05dc3c7c1e367617465f8f661a058499e37 Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Tue, 5 Mar 2024 16:17:20 -0800
Subject: [PATCH 058/196] [Fix] Avoid pickling entire LLMEngine for Ray workers
 (#3207)

Co-authored-by: Antoni Baum <antoni.baum@protonmail.com>
---
 vllm/engine/llm_engine.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 52dc96e2b82e1..8484014c9a13f 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -158,6 +158,11 @@ def __init__(
         if USE_RAY_COMPILED_DAG:
             self.forward_dag = self._compiled_ray_dag()
 
+    def __reduce__(self):
+        # This is to ensure that the LLMEngine is not referenced in
+        # the closure used to initialize Ray worker actors
+        raise RuntimeError("LLMEngine should not be pickled!")
+
     def get_tokenizer_for_seq(self, sequence: Sequence):
         return self.tokenizer.get_lora_tokenizer(sequence.lora_request)
 
@@ -280,6 +285,8 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
         parallel_config = copy.deepcopy(self.parallel_config)
         scheduler_config = copy.deepcopy(self.scheduler_config)
         device_config = copy.deepcopy(self.device_config)
+        lora_config = copy.deepcopy(self.lora_config)
+        kv_cache_dtype = self.cache_config.cache_dtype
 
         for rank, (worker, (node_id,
                             _)) in enumerate(zip(self.workers,
@@ -295,22 +302,22 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
                     local_rank,
                     rank,
                     distributed_init_method,
-                    lora_config=self.lora_config,
-                    kv_cache_dtype=self.cache_config.cache_dtype,
+                    lora_config=lora_config,
+                    kv_cache_dtype=kv_cache_dtype,
                 ))
 
         driver_rank = 0
         driver_local_rank = node_workers[driver_node_id].index(driver_rank)
         self.driver_worker = Worker(
-            model_config,
-            parallel_config,
-            scheduler_config,
-            device_config,
+            self.model_config,
+            self.parallel_config,
+            self.scheduler_config,
+            self.device_config,
             driver_local_rank,
             driver_rank,
             distributed_init_method,
             lora_config=self.lora_config,
-            kv_cache_dtype=self.cache_config.cache_dtype,
+            kv_cache_dtype=kv_cache_dtype,
             is_driver_worker=True,
         )
 

From 24aecf421a4ad5989697010963074904fead9a1b Mon Sep 17 00:00:00 2001
From: SangBin Cho <rkooo567@gmail.com>
Date: Wed, 6 Mar 2024 11:23:34 +0900
Subject: [PATCH 059/196] [Tests] Add block manager and scheduler tests (#3108)

---
 .buildkite/test-pipeline.yaml    |   3 +
 tests/core/__init__.py           |   0
 tests/core/test_block_manager.py | 262 +++++++++++++++++++++++++++++++
 tests/core/test_scheduler.py     | 170 ++++++++++++++++++++
 tests/core/utils.py              |  27 ++++
 5 files changed, 462 insertions(+)
 create mode 100644 tests/core/__init__.py
 create mode 100644 tests/core/test_block_manager.py
 create mode 100644 tests/core/test_scheduler.py
 create mode 100644 tests/core/utils.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index c65ab04b8ddda..15f971b66e3bd 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -13,6 +13,9 @@ steps:
 
 - label: Basic Correctness Test
   command: pytest -v -s --forked basic_correctness
+  
+- label: Core Test
+  command: pytest -v -s core
 
 - label: Distributed Comm Ops Test
   command: pytest -v -s --forked test_comm_ops.py
diff --git a/tests/core/__init__.py b/tests/core/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/core/test_block_manager.py b/tests/core/test_block_manager.py
new file mode 100644
index 0000000000000..ecdf3025cffdf
--- /dev/null
+++ b/tests/core/test_block_manager.py
@@ -0,0 +1,262 @@
+import pytest
+import time
+from typing import List
+
+from vllm import SamplingParams
+from vllm.block import PhysicalTokenBlock
+from vllm.core.block_manager import BlockAllocator, BlockSpaceManager, AllocStatus
+from vllm.utils import Device
+from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
+
+from .utils import create_dummy_prompt
+
+
+def test_block_allocator_allocate():
+    block_size = 4
+    num_cpu_blocks = 4
+    cpu_allocator = BlockAllocator(Device.CPU, block_size, num_cpu_blocks)
+
+    # Allocate all available cpu blocks.
+    num_free = num_cpu_blocks
+    assert cpu_allocator.get_num_free_blocks() == num_free
+    for _ in range(num_cpu_blocks):
+        block = cpu_allocator.allocate()
+        num_free -= 1
+        assert block not in cpu_allocator.free_blocks
+        assert cpu_allocator.get_num_free_blocks() == num_free
+
+    with pytest.raises(ValueError):
+        cpu_allocator.allocate()
+
+
+def test_block_allocator_free():
+    block_size = 4
+    num_cpu_blocks = 4
+    cpu_allocator = BlockAllocator(Device.CPU, block_size, num_cpu_blocks)
+
+    # Allocate all available cpu blocks.
+    blocks: List[PhysicalTokenBlock] = []
+    for _ in range(num_cpu_blocks):
+        block = cpu_allocator.allocate()
+        blocks.append(block)
+        assert block not in cpu_allocator.free_blocks
+
+    # Free all allocated cpu blocks.
+    num_free = 0
+    assert cpu_allocator.get_num_free_blocks() == num_free
+    for block in blocks:
+        cpu_allocator.free(block)
+        num_free += 1
+        assert block in cpu_allocator.free_blocks
+        assert cpu_allocator.get_num_free_blocks() == num_free
+
+        with pytest.raises(ValueError):
+            cpu_allocator.free(block)
+
+
+def test_allocate():
+    block_size = 4
+    num_cpu_blocks = 4
+    num_gpu_blocks = 4
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=0)
+
+    # Allocate same sequence group to all available gpu blocks.
+    for i in range(num_gpu_blocks):
+        _, seq_group = create_dummy_prompt(str(i), block_size)
+        assert block_manager.can_allocate(seq_group)
+        block_manager.allocate(seq_group)
+    assert block_manager.can_allocate(seq_group) != AllocStatus.OK
+
+    # Allocate same sequence group to all available gpu blocks.
+    # Use watermark to reserve one gpu block.
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=1 / num_gpu_blocks)
+    for i in range(num_gpu_blocks - 1):
+        _, seq_group = create_dummy_prompt(str(i), block_size)
+        assert block_manager.can_allocate(seq_group)
+        block_manager.allocate(seq_group)
+    assert block_manager.can_allocate(seq_group) != AllocStatus.OK
+
+
+def test_append_slot_single_seq():
+    block_size = 4
+    num_cpu_blocks = 4
+    num_gpu_blocks = 4
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=0)
+
+    # Allocate single seq to gpu block.
+    prompt, seq_group = create_dummy_prompt("1", block_size)
+    block_manager.allocate(seq_group)
+
+    # Nothing to append. Sequence has no new logical blocks.
+    assert block_manager.can_append_slot(seq_group)
+    before_blocks = block_manager.get_num_free_gpu_blocks()
+    assert not block_manager.append_slot(prompt)
+    after_blocks = block_manager.get_num_free_gpu_blocks()
+    assert before_blocks == after_blocks
+
+    # Add block_size number of new tokens and append slot.
+    for i in range(block_size):
+        token_id = i + 5
+        prompt.append_token_id(token_id, {token_id: 0.0})
+
+    assert block_manager.can_append_slot(seq_group)
+    before_blocks = block_manager.get_num_free_gpu_blocks()
+    assert not block_manager.append_slot(prompt)
+    after_blocks = block_manager.get_num_free_gpu_blocks()
+    assert before_blocks - after_blocks == 1
+
+
+def test_append_slot_cow():
+    block_size = 4
+    num_cpu_blocks = 4
+    num_gpu_blocks = 4
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=0)
+
+    # Allocate prompt to gpu block.
+    prompt = Sequence(1, "one two three", [1, 2, 3], block_size)
+    child = prompt.fork(2)
+    token_id = 4
+    child.append_token_id(token_id, {token_id: 0.0})
+    seq_group = SequenceGroup("1", [prompt, child], SamplingParams(),
+                              time.time(), time.perf_counter)
+    block_manager.allocate(seq_group)
+
+    # Append slot for child token.
+    # Last block being modified is shared. Copy on write occurs.
+    assert block_manager.can_append_slot(seq_group)
+    before_blocks = block_manager.get_num_free_gpu_blocks()
+    src_block, dst_block = block_manager.append_slot(child)
+    assert src_block != dst_block
+
+    after_blocks = block_manager.get_num_free_gpu_blocks()
+    assert before_blocks - after_blocks == 1
+
+
+def test_fork():
+    block_size = 4
+    num_cpu_blocks = 4
+    num_gpu_blocks = 4
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=0)
+
+    prompt, seq_group = create_dummy_prompt("1",
+                                            block_size - 1,
+                                            block_size=block_size)
+    block_manager.allocate(seq_group)
+
+    # Fork prompt and copy block tables.
+    child = prompt.fork(2)
+    block_manager.fork(prompt, child)
+    assert block_manager.get_block_table(
+        prompt) == block_manager.get_block_table(child)
+    token_id = 4
+    # Append token to child. Block is shared so copy on write occurs.
+    child.append_token_id(token_id, {token_id: 0.0})
+    block_manager.append_slot(child)
+    assert block_manager.get_block_table(
+        prompt) != block_manager.get_block_table(child)
+
+
+def test_swap():
+    block_size = 4
+    num_cpu_blocks = 4
+    num_gpu_blocks = 4
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=0)
+
+    prompt, seq_group = create_dummy_prompt("1", prompt_length=block_size - 1)
+    prompt.status = SequenceStatus.WAITING
+    block_manager.allocate(seq_group)
+
+    # Emulate a forward pass by appending a single token.
+    # The block manager then knows how many unprocessed
+    # tokens will be written in the next forward pass.
+    token_id = 0
+    prompt.status = SequenceStatus.RUNNING
+    prompt.append_token_id(token_id, {token_id: 0.0})
+
+    # Swap seq group from GPU -> CPU.
+    gpu_blocks = block_manager.get_block_table(prompt)
+    assert block_manager.can_swap_out(seq_group)
+    before_cpu_blocks = block_manager.get_num_free_cpu_blocks()
+    before_gpu_blocks = block_manager.get_num_free_gpu_blocks()
+    mapping = block_manager.swap_out(seq_group)
+    assert list(mapping.keys()) == gpu_blocks
+    after_cpu_blocks = block_manager.get_num_free_cpu_blocks()
+    after_gpu_blocks = block_manager.get_num_free_gpu_blocks()
+    assert before_cpu_blocks == after_cpu_blocks + len(gpu_blocks)
+    assert before_gpu_blocks + len(gpu_blocks) == after_gpu_blocks
+    prompt.status = SequenceStatus.SWAPPED
+
+    # Swap seq group from CPU -> GPU.
+    cpu_blocks = block_manager.get_block_table(prompt)
+    assert block_manager.can_swap_in(seq_group)
+    before_cpu_blocks = block_manager.get_num_free_cpu_blocks()
+    before_gpu_blocks = block_manager.get_num_free_gpu_blocks()
+    mapping = block_manager.swap_in(seq_group)
+    assert list(mapping.keys()) == cpu_blocks
+    after_cpu_blocks = block_manager.get_num_free_cpu_blocks()
+    after_gpu_blocks = block_manager.get_num_free_gpu_blocks()
+    assert before_cpu_blocks + len(cpu_blocks) == after_cpu_blocks
+    assert before_gpu_blocks == after_gpu_blocks + len(cpu_blocks)
+
+
+def test_free():
+    block_size = 4
+    num_cpu_blocks = 4
+    num_gpu_blocks = 4
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=0)
+
+    prompt, seq_group = create_dummy_prompt("1", block_size)
+    block_manager.allocate(seq_group)
+
+    # Free allocated seq.
+    prompt_blocks = len(block_manager.get_block_table(prompt))
+    before_blocks = block_manager.get_num_free_gpu_blocks()
+    block_manager.free(prompt)
+    after_blocks = block_manager.get_num_free_gpu_blocks()
+    assert after_blocks == before_blocks + prompt_blocks
+
+    # Block table for freed seq is deleted.
+    with pytest.raises(KeyError):
+        block_manager.get_block_table(prompt)
+
+
+def test_reset():
+    block_size = 4
+    num_cpu_blocks = 4
+    num_gpu_blocks = 4
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      watermark=0)
+
+    # Allocate same seq group on all available gpu blocks.
+    original_blocks = block_manager.get_num_free_gpu_blocks()
+    for i in range(num_gpu_blocks):
+        _, seq_group = create_dummy_prompt(str(i), block_size)
+        block_manager.allocate(seq_group)
+    assert block_manager.get_num_free_gpu_blocks() == 0
+
+    # Resetting block manager frees all allocated blocks.
+    block_manager.reset()
+    assert block_manager.get_num_free_gpu_blocks() == original_blocks
diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py
new file mode 100644
index 0000000000000..6322b2f2d5e9e
--- /dev/null
+++ b/tests/core/test_scheduler.py
@@ -0,0 +1,170 @@
+from typing import List
+import pytest  # noqa
+
+from vllm.config import CacheConfig, SchedulerConfig
+from vllm.core.scheduler import Scheduler
+from vllm.sequence import SequenceGroup
+
+from .utils import create_dummy_prompt
+
+
+def test_scheduler_add_seq_group():
+    block_size = 4
+    scheduler_config = SchedulerConfig(100, 64, 1, 256)
+    cache_config = CacheConfig(block_size, 1.0, 1, "auto")
+    cache_config.num_cpu_blocks = 4
+    cache_config.num_gpu_blocks = 4
+    scheduler = Scheduler(scheduler_config, cache_config, None)
+
+    # Add seq group to scheduler.
+    num_seq_group = 4
+    for i in range(num_seq_group):
+        _, seq_group = create_dummy_prompt(str(i), block_size)
+        scheduler.add_seq_group(seq_group)
+        assert scheduler.get_num_unfinished_seq_groups() == i + 1
+
+
+def test_scheduler_abort_seq_group():
+    block_size = 4
+    scheduler_config = SchedulerConfig(100, 64, 1, 256)
+    cache_config = CacheConfig(block_size, 1.0, 1, "auto")
+    cache_config.num_cpu_blocks = 4
+    cache_config.num_gpu_blocks = 4
+    scheduler = Scheduler(scheduler_config, cache_config, None)
+
+    # Add multiple seq groups to scheduler.
+    num_seq_group = 4
+    request_ids = set()
+    for i in range(num_seq_group):
+        _, seq_group = create_dummy_prompt(str(i), block_size)
+        scheduler.add_seq_group(seq_group)
+        request_ids.add(str(i))
+
+    # Abort all added seq groups.
+    assert scheduler.get_num_unfinished_seq_groups() == num_seq_group
+    scheduler.abort_seq_group(request_ids)
+    assert scheduler.get_num_unfinished_seq_groups() == 0
+
+
+def test_scheduler_schedule_simple():
+    block_size = 4
+    num_seq_group = 4
+    max_model_len = 16
+    scheduler_config = SchedulerConfig(64, num_seq_group, max_model_len, 256)
+    cache_config = CacheConfig(block_size, 1.0, 1, "auto")
+    cache_config.num_cpu_blocks = 8
+    cache_config.num_gpu_blocks = 8
+    scheduler = Scheduler(scheduler_config, cache_config, None)
+
+    # Add seq groups to scheduler.
+    running: List[SequenceGroup] = []
+    for i in range(num_seq_group):
+        _, seq_group = create_dummy_prompt(str(i), prompt_length=block_size)
+        scheduler.add_seq_group(seq_group)
+        running.append(seq_group)
+
+    # Schedule seq groups prompts.
+    seq_group_meta, out = scheduler.schedule()
+    assert set(out.scheduled_seq_groups) == set(running)
+    assert out.num_batched_tokens == num_seq_group * seq_group.get_seqs(
+    )[0].get_len()
+    assert (not out.blocks_to_copy and not out.blocks_to_swap_in
+            and not out.blocks_to_swap_out)
+    assert len(seq_group_meta) == num_seq_group
+
+    # Schedule seq groups generation.
+    seq_group_meta, out = scheduler.schedule()
+    assert set(out.scheduled_seq_groups) == set(running)
+    assert out.num_batched_tokens == num_seq_group
+    assert (not out.blocks_to_copy and not out.blocks_to_swap_in
+            and not out.blocks_to_swap_out)
+    assert len(seq_group_meta) == num_seq_group
+
+
+def test_scheduler_schedule_preempt_abort():
+    block_size = 4
+    max_model_len = 16
+    scheduler_config = SchedulerConfig(64, 2, max_model_len, 256)
+    cache_config = CacheConfig(block_size, 1.0, 1, "auto")
+    cache_config.num_cpu_blocks = 2
+    cache_config.num_gpu_blocks = 2
+    scheduler = Scheduler(scheduler_config, cache_config, None)
+
+    # Add seq groups to scheduler.
+    seq_a, seq_group_a = create_dummy_prompt("1", block_size)
+    seq_b, seq_group_b = create_dummy_prompt("2", block_size)
+    scheduler.add_seq_group(seq_group_a)
+    scheduler.add_seq_group(seq_group_b)
+
+    # Schedule seq groups prompts.
+    seq_group_meta, out = scheduler.schedule()
+    assert out.scheduled_seq_groups == [seq_group_a, seq_group_b]
+    assert out.num_batched_tokens == seq_group_a.get_seqs()[0].get_len() * 2
+    assert (not out.blocks_to_copy and not out.blocks_to_swap_in
+            and not out.blocks_to_swap_out)
+    assert len(seq_group_meta) == 2
+    assert scheduler.get_num_unfinished_seq_groups() == 2
+
+    # Append "generated" tokens, allowing the sequence to mark prompt tokens as
+    # processed.
+    token_id = 0
+    seq_a.append_token_id(token_id, {token_id: 0.0})
+    seq_b.append_token_id(token_id, {token_id: 0.0})
+
+    # Schedule seq groups generation and preempt seq group b.
+    seq_group_meta, out = scheduler.schedule()
+    assert out.scheduled_seq_groups == [seq_group_a]
+    assert out.num_batched_tokens == 1
+    assert (not out.blocks_to_copy and not out.blocks_to_swap_in
+            and not out.blocks_to_swap_out)
+    assert len(seq_group_meta) == 1
+    assert scheduler.get_num_unfinished_seq_groups() == 2
+
+    # Abort seq group a. Re-schedule seq group b prompt with recomputation.
+    scheduler.abort_seq_group("1")
+    seq_group_meta, out = scheduler.schedule()
+    assert out.scheduled_seq_groups == [seq_group_b]
+    assert out.num_batched_tokens == seq_group_b.get_seqs()[0].get_len()
+    assert (not out.blocks_to_copy and not out.blocks_to_swap_in
+            and not out.blocks_to_swap_out)
+    assert len(seq_group_meta) == 1
+    assert scheduler.get_num_unfinished_seq_groups() == 1
+
+
+def test_scheduler_max_seqs():
+    block_size = 4
+    num_seq_group = 4
+    max_seq_group = 2
+    max_model_len = 16
+    scheduler_config = SchedulerConfig(64, max_seq_group, max_model_len, 256)
+    cache_config = CacheConfig(block_size, 1.0, 1, "auto")
+    cache_config.num_cpu_blocks = 8
+    cache_config.num_gpu_blocks = 8
+    scheduler = Scheduler(scheduler_config, cache_config, None)
+
+    all_seq_groups: List[SequenceGroup] = []
+    # Add seq groups to scheduler.
+    for i in range(num_seq_group):
+        _, seq_group = create_dummy_prompt(str(i), prompt_length=block_size)
+        all_seq_groups.append(seq_group)
+
+    # Append 1 seq group
+    scheduler.add_seq_group(all_seq_groups[0])
+
+    # Schedule seq groups prompts.
+    _, out = scheduler.schedule()
+    assert set(out.scheduled_seq_groups) == set([all_seq_groups[0]])
+
+    # Schedule seq groups generation.
+    _, out = scheduler.schedule()
+    assert set(out.scheduled_seq_groups) == set([all_seq_groups[0]])
+
+    # Append 2 more seq group
+    scheduler.add_seq_group(all_seq_groups[1])
+    scheduler.add_seq_group(all_seq_groups[2])
+
+    # Schedule seq groups prompts.
+    # Only 1 seq group should be scheduled since max_seq_group is 2
+    # and one is prompting.
+    _, out = scheduler.schedule()
+    assert set(out.scheduled_seq_groups) == set([all_seq_groups[1]])
diff --git a/tests/core/utils.py b/tests/core/utils.py
new file mode 100644
index 0000000000000..9c0cfe1a7cf66
--- /dev/null
+++ b/tests/core/utils.py
@@ -0,0 +1,27 @@
+import time
+from typing import Tuple
+
+from vllm import SamplingParams
+from vllm.sequence import Sequence, SequenceGroup
+
+
+def create_dummy_prompt(
+        request_id: str,
+        prompt_length: int,
+        block_size: int = None) -> Tuple[Sequence, SequenceGroup]:
+    if not block_size:
+        block_size = prompt_length
+
+    # Create dummy prompt sequence with tokens 0...block_size-1
+    # and prompt "0 ... block_size".
+    prompt_tokens = list(range(prompt_length))
+    prompt_str = " ".join([str(t) for t in prompt_tokens])
+    prompt = Sequence(int(request_id), prompt_str, prompt_tokens, block_size)
+    seq_group = SequenceGroup(request_id, [prompt], SamplingParams(),
+                              time.time(), None, None)
+
+    return prompt, seq_group
+
+
+def round_up_to_next_block(seq_len: int, block_size: int) -> int:
+    return (seq_len + block_size - 1) // block_size

From a33ce60c6629e8c22aaf002ae8478a685e726e3e Mon Sep 17 00:00:00 2001
From: Cade Daniel <edacih@gmail.com>
Date: Wed, 6 Mar 2024 01:04:23 -0800
Subject: [PATCH 060/196] [Testing] Fix core tests (#3224)

---
 tests/core/test_block_manager.py | 49 ++++++++++++++++++++------------
 tests/core/test_scheduler.py     |  6 ++--
 tests/core/utils.py              |  2 +-
 vllm/sequence.py                 |  2 +-
 4 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/tests/core/test_block_manager.py b/tests/core/test_block_manager.py
index ecdf3025cffdf..04d01f7724e4f 100644
--- a/tests/core/test_block_manager.py
+++ b/tests/core/test_block_manager.py
@@ -6,7 +6,7 @@
 from vllm.block import PhysicalTokenBlock
 from vllm.core.block_manager import BlockAllocator, BlockSpaceManager, AllocStatus
 from vllm.utils import Device
-from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
+from vllm.sequence import Sequence, SequenceGroup, SequenceStatus, Logprob
 
 from .utils import create_dummy_prompt
 
@@ -22,7 +22,8 @@ def test_block_allocator_allocate():
     for _ in range(num_cpu_blocks):
         block = cpu_allocator.allocate()
         num_free -= 1
-        assert block not in cpu_allocator.free_blocks
+
+        assert block.block_hash not in cpu_allocator.evictor
         assert cpu_allocator.get_num_free_blocks() == num_free
 
     with pytest.raises(ValueError):
@@ -39,7 +40,7 @@ def test_block_allocator_free():
     for _ in range(num_cpu_blocks):
         block = cpu_allocator.allocate()
         blocks.append(block)
-        assert block not in cpu_allocator.free_blocks
+        assert block.block_hash not in cpu_allocator.evictor
 
     # Free all allocated cpu blocks.
     num_free = 0
@@ -47,7 +48,7 @@ def test_block_allocator_free():
     for block in blocks:
         cpu_allocator.free(block)
         num_free += 1
-        assert block in cpu_allocator.free_blocks
+        assert block.block_hash in cpu_allocator.evictor
         assert cpu_allocator.get_num_free_blocks() == num_free
 
         with pytest.raises(ValueError):
@@ -106,7 +107,7 @@ def test_append_slot_single_seq():
     # Add block_size number of new tokens and append slot.
     for i in range(block_size):
         token_id = i + 5
-        prompt.append_token_id(token_id, {token_id: 0.0})
+        prompt.append_token_id(token_id, {token_id: Logprob(0.0)})
 
     assert block_manager.can_append_slot(seq_group)
     before_blocks = block_manager.get_num_free_gpu_blocks()
@@ -119,25 +120,37 @@ def test_append_slot_cow():
     block_size = 4
     num_cpu_blocks = 4
     num_gpu_blocks = 4
-    block_manager = BlockSpaceManager(block_size,
-                                      num_cpu_blocks,
-                                      num_gpu_blocks,
+    block_manager = BlockSpaceManager(block_size=block_size,
+                                      num_cpu_blocks=num_cpu_blocks,
+                                      num_gpu_blocks=num_gpu_blocks,
                                       watermark=0)
 
-    # Allocate prompt to gpu block.
-    prompt = Sequence(1, "one two three", [1, 2, 3], block_size)
-    child = prompt.fork(2)
-    token_id = 4
-    child.append_token_id(token_id, {token_id: 0.0})
+    # Allocate prompt to gpu block. There is one slot left in the block.
+    prompt = Sequence(seq_id=1,
+                      prompt="one two three",
+                      prompt_token_ids=[1, 2, 3],
+                      block_size=block_size)
+
+    # Fork the sequence, such that a COW will be required when we append a new
+    # token id.
+    child = prompt.fork(new_seq_id=2)
+
+    # Allocate space for the sequence group.
     seq_group = SequenceGroup("1", [prompt, child], SamplingParams(),
                               time.time(), time.perf_counter)
     block_manager.allocate(seq_group)
 
-    # Append slot for child token.
-    # Last block being modified is shared. Copy on write occurs.
+    # Fork and append a new token id. We expect a COW to be scheduled.
+    token_id = 4
+    child.append_token_id(token_id, {token_id: Logprob(0.0)})
+    block_manager.fork(prompt, child)
+
     assert block_manager.can_append_slot(seq_group)
     before_blocks = block_manager.get_num_free_gpu_blocks()
-    src_block, dst_block = block_manager.append_slot(child)
+
+    maybe_src_dst_block = block_manager.append_slot(child)
+    assert maybe_src_dst_block is not None
+    src_block, dst_block = maybe_src_dst_block
     assert src_block != dst_block
 
     after_blocks = block_manager.get_num_free_gpu_blocks()
@@ -165,7 +178,7 @@ def test_fork():
         prompt) == block_manager.get_block_table(child)
     token_id = 4
     # Append token to child. Block is shared so copy on write occurs.
-    child.append_token_id(token_id, {token_id: 0.0})
+    child.append_token_id(token_id, {token_id: Logprob(0.0)})
     block_manager.append_slot(child)
     assert block_manager.get_block_table(
         prompt) != block_manager.get_block_table(child)
@@ -189,7 +202,7 @@ def test_swap():
     # tokens will be written in the next forward pass.
     token_id = 0
     prompt.status = SequenceStatus.RUNNING
-    prompt.append_token_id(token_id, {token_id: 0.0})
+    prompt.append_token_id(token_id, {token_id: Logprob(0.0)})
 
     # Swap seq group from GPU -> CPU.
     gpu_blocks = block_manager.get_block_table(prompt)
diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py
index 6322b2f2d5e9e..ebfeb8ba04812 100644
--- a/tests/core/test_scheduler.py
+++ b/tests/core/test_scheduler.py
@@ -3,7 +3,7 @@
 
 from vllm.config import CacheConfig, SchedulerConfig
 from vllm.core.scheduler import Scheduler
-from vllm.sequence import SequenceGroup
+from vllm.sequence import SequenceGroup, Logprob
 
 from .utils import create_dummy_prompt
 
@@ -108,8 +108,8 @@ def test_scheduler_schedule_preempt_abort():
     # Append "generated" tokens, allowing the sequence to mark prompt tokens as
     # processed.
     token_id = 0
-    seq_a.append_token_id(token_id, {token_id: 0.0})
-    seq_b.append_token_id(token_id, {token_id: 0.0})
+    seq_a.append_token_id(token_id, {token_id: Logprob(0.0)})
+    seq_b.append_token_id(token_id, {token_id: Logprob(0.0)})
 
     # Schedule seq groups generation and preempt seq group b.
     seq_group_meta, out = scheduler.schedule()
diff --git a/tests/core/utils.py b/tests/core/utils.py
index 9c0cfe1a7cf66..6469789e89386 100644
--- a/tests/core/utils.py
+++ b/tests/core/utils.py
@@ -18,7 +18,7 @@ def create_dummy_prompt(
     prompt_str = " ".join([str(t) for t in prompt_tokens])
     prompt = Sequence(int(request_id), prompt_str, prompt_tokens, block_size)
     seq_group = SequenceGroup(request_id, [prompt], SamplingParams(),
-                              time.time(), None, None)
+                              time.time(), None)
 
     return prompt, seq_group
 
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 97b72fdc4cbeb..19dafe3cb0fc9 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -142,7 +142,7 @@ def __init__(
         prompt: str,
         prompt_token_ids: List[int],
         block_size: int,
-        eos_token_id: int,
+        eos_token_id: Optional[int] = None,
         lora_request: Optional[LoRARequest] = None,
     ) -> None:
         self.seq_id = seq_id

From 4cb3b924cdeb6b809f0a0311f9833253d9162699 Mon Sep 17 00:00:00 2001
From: Chujie Zheng <chujiezhengchn@gmail.com>
Date: Wed, 6 Mar 2024 14:41:42 -0800
Subject: [PATCH 061/196] Add tqdm `dynamic_ncols=True` (#3242)

---
 vllm/entrypoints/llm.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 62f1d172377f6..1f463bdaaedc3 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -191,7 +191,9 @@ def _run_engine(self, use_tqdm: bool) -> List[RequestOutput]:
         # Initialize tqdm.
         if use_tqdm:
             num_requests = self.llm_engine.get_num_unfinished_requests()
-            pbar = tqdm(total=num_requests, desc="Processed prompts")
+            pbar = tqdm(total=num_requests,
+                        desc="Processed prompts",
+                        dynamic_ncols=True)
         # Run the engine.
         outputs: List[RequestOutput] = []
         while self.llm_engine.has_unfinished_requests():

From d3c04b6a39df016504c28ec3fc27ea58ca802a28 Mon Sep 17 00:00:00 2001
From: TechxGenus <jianghao0728@mail.ustc.edu.cn>
Date: Thu, 7 Mar 2024 08:19:14 +0800
Subject: [PATCH 062/196] Add GPTQ support for Gemma (#3200)

---
 vllm/model_executor/models/gemma.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
index 03948132d32c3..bf1f164ff700d 100644
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -325,11 +325,17 @@ def load_weights(self,
                 if shard_name not in name:
                     continue
                 name = name.replace(shard_name, param_name)
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
                 param = params_dict[name]
                 weight_loader = param.weight_loader
                 weight_loader(param, loaded_weight, shard_id)
                 break
             else:
+                # Skip loading extra bias for GPTQ models.
+                if name.endswith(".bias") and name not in params_dict:
+                    continue
                 # GemmaRMSNorm is different from Llama's in that it multiplies
                 # (1 + weight) to the output, instead of just weight.
                 if "norm.weight" in name:

From cbf4c05b156c8705c6bb1a94b9edc0a5b4d26e20 Mon Sep 17 00:00:00 2001
From: Chen Wang <Chen.Wang1@ibm.com>
Date: Thu, 7 Mar 2024 03:39:28 -0500
Subject: [PATCH 063/196] Update requirements-dev.txt to include package for
 benchmarking scripts. (#3181)

Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
---
 requirements-dev.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 55e102374fd73..dfcbfa4253f1c 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -21,3 +21,6 @@ einops # required for MPT
 openai
 requests
 ray
+
+# Benchmarking
+aiohttp

From 2daf23ab0cf00da157b1255faddcf0a269283d36 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Thu, 7 Mar 2024 01:45:50 -0800
Subject: [PATCH 064/196] Separate attention backends (#3005)

---
 .gitignore                                    |   3 +
 setup.py                                      |  48 +++-
 tests/kernels/test_prefix_prefill.py          |   2 +-
 vllm/__init__.py                              |  30 ++-
 .../layers/attention/__init__.py              |   5 +
 .../layers/attention/attention.py             |  59 +++++
 .../backends}/__init__.py                     |   0
 .../layers/attention/backends/flash_attn.py   | 124 ++++++++++
 .../backends/xformers.py}                     | 216 +++++-------------
 .../layers/attention/ops/__init__.py          |   0
 .../layers/attention/ops/paged_attn.py        | 138 +++++++++++
 .../ops}/prefix_prefill.py                    |   0
 vllm/model_executor/models/baichuan.py        |  13 +-
 vllm/model_executor/models/bloom.py           |  10 +-
 vllm/model_executor/models/chatglm.py         |   4 +-
 vllm/model_executor/models/deepseek.py        |  10 +-
 vllm/model_executor/models/falcon.py          |  28 +--
 vllm/model_executor/models/gemma.py           |  10 +-
 vllm/model_executor/models/gpt2.py            |   6 +-
 vllm/model_executor/models/gpt_bigcode.py     |  10 +-
 vllm/model_executor/models/gpt_j.py           |   4 +-
 vllm/model_executor/models/gpt_neox.py        |   4 +-
 vllm/model_executor/models/internlm2.py       |  10 +-
 vllm/model_executor/models/llama.py           |  12 +-
 vllm/model_executor/models/mixtral.py         |   4 +-
 vllm/model_executor/models/mixtral_quant.py   |   4 +-
 vllm/model_executor/models/mpt.py             |  12 +-
 vllm/model_executor/models/olmo.py            |   8 +-
 vllm/model_executor/models/opt.py             |   8 +-
 vllm/model_executor/models/orion.py           |  10 +-
 vllm/model_executor/models/phi.py             |   4 +-
 vllm/model_executor/models/qwen.py            |   4 +-
 vllm/model_executor/models/qwen2.py           |  12 +-
 vllm/model_executor/models/stablelm.py        |  10 +-
 vllm/model_executor/models/starcoder2.py      |   4 +-
 35 files changed, 558 insertions(+), 268 deletions(-)
 create mode 100644 vllm/model_executor/layers/attention/__init__.py
 create mode 100644 vllm/model_executor/layers/attention/attention.py
 rename vllm/model_executor/layers/{triton_kernel => attention/backends}/__init__.py (100%)
 create mode 100644 vllm/model_executor/layers/attention/backends/flash_attn.py
 rename vllm/model_executor/layers/{attention.py => attention/backends/xformers.py} (56%)
 create mode 100644 vllm/model_executor/layers/attention/ops/__init__.py
 create mode 100644 vllm/model_executor/layers/attention/ops/paged_attn.py
 rename vllm/model_executor/layers/{triton_kernel => attention/ops}/prefix_prefill.py (100%)

diff --git a/.gitignore b/.gitignore
index b5195629e5cf3..0b14c98270c41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -184,3 +184,6 @@ _build/
 
 # Benchmark dataset
 *.json
+
+# Third-party Python packages.
+vllm/thirdparty_files/
diff --git a/setup.py b/setup.py
index 745b5a9b2d02a..57d7a139e8237 100644
--- a/setup.py
+++ b/setup.py
@@ -3,6 +3,7 @@
 import os
 import re
 import subprocess
+import sys
 import warnings
 from pathlib import Path
 from typing import List, Set
@@ -14,6 +15,8 @@
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME, ROCM_HOME
 
 ROOT_DIR = os.path.dirname(__file__)
+# This is a temporary directory to store third-party packages.
+THIRDPARTY_SUBDIR = "vllm/thirdparty_files"
 
 # If you are developing the C++ backend of vLLM, consider building vLLM with
 # `python setup.py develop` since it will give you incremental builds.
@@ -324,8 +327,46 @@ def get_torch_arch_list() -> Set[str]:
                     "nvcc": NVCC_FLAGS_PUNICA,
                 },
             ))
-elif _is_neuron():
-    neuronxcc_version = get_neuronxcc_version()
+
+    # Download the FlashAttention package.
+    # Adapted from https://github.com/ray-project/ray/blob/f92928c9cfcbbf80c3a8534ca4911de1b44069c0/python/setup.py#L518-L530
+    flash_attn_version = "2.5.6"
+    install_dir = os.path.join(ROOT_DIR, THIRDPARTY_SUBDIR)
+    subprocess.check_call(
+        [
+            sys.executable,
+            "-m",
+            "pip",
+            "install",
+            "-q",
+            f"--target={install_dir}",
+            "einops",  # Dependency of flash-attn.
+            f"flash-attn=={flash_attn_version}",
+            "--no-dependencies",  # Required to avoid re-installing torch.
+        ],
+        env=dict(os.environ, CC="gcc"),
+    )
+
+    # Copy the FlashAttention package into the vLLM package after build.
+    class build_ext(BuildExtension):
+
+        def run(self):
+            super().run()
+            target_dir = os.path.join(self.build_lib, THIRDPARTY_SUBDIR)
+            if not os.path.exists(target_dir):
+                os.makedirs(target_dir)
+            self.copy_tree(install_dir, target_dir)
+
+    class BinaryDistribution(setuptools.Distribution):
+
+        def has_ext_modules(self):
+            return True
+
+else:
+    build_ext = BuildExtension
+    BinaryDistribution = setuptools.Distribution
+    if _is_neuron():
+        neuronxcc_version = get_neuronxcc_version()
 
 vllm_extension_sources = [
     "csrc/cache_kernels.cu",
@@ -468,6 +509,7 @@ def get_requirements() -> List[str]:
     python_requires=">=3.8",
     install_requires=get_requirements(),
     ext_modules=ext_modules,
-    cmdclass={"build_ext": BuildExtension} if not _is_neuron() else {},
+    cmdclass={"build_ext": build_ext} if not _is_neuron() else {},
+    distclass=BinaryDistribution,
     package_data=package_data,
 )
diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index c068b38a66910..e881cd1ec3753 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -3,7 +3,7 @@
 import time
 
 import torch
-from vllm.model_executor.layers.triton_kernel.prefix_prefill import (
+from vllm.model_executor.layers.attention.ops.prefix_prefill import (
     context_attention_fwd)
 from xformers import ops as xops
 from xformers.ops.fmha.attn_bias import BlockDiagonalCausalFromBottomRightMask
diff --git a/vllm/__init__.py b/vllm/__init__.py
index f1e30f5eb6e6e..59f1345b58d42 100644
--- a/vllm/__init__.py
+++ b/vllm/__init__.py
@@ -1,12 +1,28 @@
 """vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
 
-from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
-from vllm.engine.async_llm_engine import AsyncLLMEngine
-from vllm.engine.llm_engine import LLMEngine
-from vllm.engine.ray_utils import initialize_cluster
-from vllm.entrypoints.llm import LLM
-from vllm.outputs import CompletionOutput, RequestOutput
-from vllm.sampling_params import SamplingParams
+
+# Adapted from https://github.com/ray-project/ray/blob/f92928c9cfcbbf80c3a8534ca4911de1b44069c0/python/ray/__init__.py#L11
+def _configure_system():
+    import os
+    import sys
+
+    # Importing flash-attn.
+    thirdparty_files = os.path.join(os.path.abspath(os.path.dirname(__file__)),
+                                    "thirdparty_files")
+    sys.path.insert(0, thirdparty_files)
+
+
+_configure_system()
+# Delete configuration function.
+del _configure_system
+
+from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs  # noqa: E402
+from vllm.engine.async_llm_engine import AsyncLLMEngine  # noqa: E402
+from vllm.engine.llm_engine import LLMEngine  # noqa: E402
+from vllm.engine.ray_utils import initialize_cluster  # noqa: E402
+from vllm.entrypoints.llm import LLM  # noqa: E402
+from vllm.outputs import CompletionOutput, RequestOutput  # noqa: E402
+from vllm.sampling_params import SamplingParams  # noqa: E402
 
 __version__ = "0.3.3"
 
diff --git a/vllm/model_executor/layers/attention/__init__.py b/vllm/model_executor/layers/attention/__init__.py
new file mode 100644
index 0000000000000..1c42a3d28f976
--- /dev/null
+++ b/vllm/model_executor/layers/attention/__init__.py
@@ -0,0 +1,5 @@
+from vllm.model_executor.layers.attention.attention import Attention
+
+__all__ = [
+    "Attention",
+]
diff --git a/vllm/model_executor/layers/attention/attention.py b/vllm/model_executor/layers/attention/attention.py
new file mode 100644
index 0000000000000..830e82e10f7ad
--- /dev/null
+++ b/vllm/model_executor/layers/attention/attention.py
@@ -0,0 +1,59 @@
+"""Attention layer."""
+from typing import List, Optional
+
+import torch
+import torch.nn as nn
+
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.utils import is_hip
+
+
+class Attention(nn.Module):
+    """Attention layer.
+
+    This class takes query, key, and value tensors as input. The input tensors
+    can either contain prompt tokens or generation tokens.
+    The class does the following:
+
+    1. Store the input key and value tensors in the KV cache.
+    2. Perform (multi-head/multi-query/grouped-query) attention.
+    3. Return the output tensor.
+    """
+
+    def __init__(
+        self,
+        num_heads: int,
+        head_size: int,
+        scale: float,
+        num_kv_heads: Optional[int] = None,
+        alibi_slopes: Optional[List[float]] = None,
+        sliding_window: Optional[int] = None,
+    ) -> None:
+        super().__init__()
+        if (not is_hip() and torch.cuda.get_device_capability()[0] >= 8 and
+                torch.get_default_dtype() in (torch.float16, torch.bfloat16)):
+            # Ampere or later NVIDIA GPUs.
+            # NOTE(woosuk): FlashAttention does not support FP32.
+            from vllm.model_executor.layers.attention.backends.flash_attn import FlashAttentionBackend
+            self.backend = FlashAttentionBackend(num_heads, head_size, scale,
+                                                 num_kv_heads, alibi_slopes,
+                                                 sliding_window)
+        else:
+            # Turing and Volta NVIDIA GPUs or AMD GPUs.
+            # Or FP32 on any GPU.
+            from vllm.model_executor.layers.attention.backends.xformers import XFormersBackend
+            self.backend = XFormersBackend(num_heads, head_size, scale,
+                                           num_kv_heads, alibi_slopes,
+                                           sliding_window)
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        key_cache: Optional[torch.Tensor],
+        value_cache: Optional[torch.Tensor],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        return self.backend.forward(query, key, value, key_cache, value_cache,
+                                    input_metadata)
diff --git a/vllm/model_executor/layers/triton_kernel/__init__.py b/vllm/model_executor/layers/attention/backends/__init__.py
similarity index 100%
rename from vllm/model_executor/layers/triton_kernel/__init__.py
rename to vllm/model_executor/layers/attention/backends/__init__.py
diff --git a/vllm/model_executor/layers/attention/backends/flash_attn.py b/vllm/model_executor/layers/attention/backends/flash_attn.py
new file mode 100644
index 0000000000000..512f4e49c7eb2
--- /dev/null
+++ b/vllm/model_executor/layers/attention/backends/flash_attn.py
@@ -0,0 +1,124 @@
+"""Attention layer with Flash and PagedAttention."""
+from typing import List, Optional
+
+# NOTE(woosuk): This imports flash_attn under vllm/thirdparty_files/.
+from flash_attn import flash_attn_func
+import torch
+
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.attention.ops.paged_attn import (
+    PagedAttentionImpl)
+
+
+class FlashAttentionBackend:
+
+    def __init__(
+        self,
+        num_heads: int,
+        head_size: int,
+        scale: float,
+        num_kv_heads: Optional[int] = None,
+        alibi_slopes: Optional[List[float]] = None,
+        sliding_window: Optional[int] = None,
+    ) -> None:
+        self.num_heads = num_heads
+        self.head_size = head_size
+        self.scale = float(scale)
+        self.num_kv_heads = num_heads if num_kv_heads is None else num_kv_heads
+        self.sliding_window = sliding_window
+        if alibi_slopes is not None:
+            alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32)
+        self.alibi_slopes = alibi_slopes
+
+        assert self.num_heads % self.num_kv_heads == 0
+        self.num_queries_per_kv = self.num_heads // self.num_kv_heads
+        suppored_head_sizes = PagedAttentionImpl.get_supported_head_sizes()
+        if head_size not in suppored_head_sizes:
+            raise ValueError(
+                f"Head size {head_size} is not supported by PagedAttention. "
+                f"Supported head sizes are: {suppored_head_sizes}.")
+
+        self.sliding_window = ((self.sliding_window, self.sliding_window) if
+                               self.sliding_window is not None else (-1, -1))
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        key_cache: Optional[torch.Tensor],
+        value_cache: Optional[torch.Tensor],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        """Forward pass with FlashAttention and PagedAttention.
+
+        Args:
+            query: shape = [batch_size, seq_len, num_heads * head_size]
+            key: shape = [batch_size, seq_len, num_kv_heads * head_size]
+            value: shape = [batch_size, seq_len, num_kv_heads * head_size]
+            key_cache: shape = [num_blocks, num_kv_heads, head_size/x,
+                block_size, x]
+            value_cache: shape = [num_blocks, num_kv_heads, head_size,
+                block_size]
+            input_metadata: metadata for the inputs.
+        Returns:
+            shape = [batch_size, seq_len, num_heads * head_size]
+        """
+        batch_size, seq_len, hidden_size = query.shape
+        # Reshape the query, key, and value tensors.
+        query = query.view(-1, self.num_heads, self.head_size)
+        key = key.view(-1, self.num_kv_heads, self.head_size)
+        value = value.view(-1, self.num_kv_heads, self.head_size)
+
+        # Reshape the keys and values and store them in the cache.
+        # If key_cache and value_cache are not provided, the new key and value
+        # vectors will not be cached. This happens during the initial memory
+        # profiling run.
+        if key_cache is not None and value_cache is not None:
+            PagedAttentionImpl.reshape_and_cache(key, value, key_cache,
+                                                 value_cache, input_metadata)
+
+        if input_metadata.is_prompt:
+            # Prompt run.
+            if (key_cache is None or value_cache is None
+                    or input_metadata.block_tables.numel() == 0):
+                # normal attention
+                query = query.unflatten(0, (batch_size, seq_len))
+                key = key.unflatten(0, (batch_size, seq_len))
+                value = value.unflatten(0, (batch_size, seq_len))
+                output = flash_attn_func(
+                    query,
+                    key,
+                    value,
+                    softmax_scale=self.scale,
+                    causal=True,
+                    window_size=self.sliding_window,
+                    alibi_slopes=self.alibi_slopes,
+                )
+            else:
+                # prefix-enabled attention
+                output = PagedAttentionImpl.forward_prefix(
+                    query,
+                    key,
+                    value,
+                    key_cache,
+                    value_cache,
+                    input_metadata,
+                    self.num_heads,
+                    self.num_kv_heads,
+                    self.alibi_slopes,
+                )
+        else:
+            # Decoding run.
+            output = PagedAttentionImpl.forward_decode(
+                query,
+                key_cache,
+                value_cache,
+                input_metadata,
+                self.num_kv_heads,
+                self.scale,
+                self.alibi_slopes,
+            )
+
+        # Reshape the output tensor.
+        return output.view(batch_size, seq_len, hidden_size)
diff --git a/vllm/model_executor/layers/attention.py b/vllm/model_executor/layers/attention/backends/xformers.py
similarity index 56%
rename from vllm/model_executor/layers/attention.py
rename to vllm/model_executor/layers/attention/backends/xformers.py
index 2a82325b80213..bad2a648b6703 100644
--- a/vllm/model_executor/layers/attention.py
+++ b/vllm/model_executor/layers/attention/backends/xformers.py
@@ -1,37 +1,19 @@
-"""Multi-head attention."""
+"""Attention layer with xFormers and PagedAttention."""
+import importlib
 from typing import List, Optional
 
-import importlib
 import torch
-import torch.nn as nn
 from xformers import ops as xops
 from xformers.ops.fmha.attn_bias import (BlockDiagonalCausalMask,
                                          LowerTriangularMaskWithTensorBias)
 
-from vllm._C import ops
-from vllm._C import cache_ops
 from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.triton_kernel.prefix_prefill import (
-    context_attention_fwd)
+from vllm.model_executor.layers.attention.ops.paged_attn import (
+    PagedAttentionImpl)
 from vllm.utils import is_hip
 
-_SUPPORTED_HEAD_SIZES = [64, 80, 96, 112, 128, 256]
-# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
-_PARTITION_SIZE = 512
-
-
-class PagedAttention(nn.Module):
-    """MHA/MQA/GQA layer with PagedAttention.
 
-    This class takes query, key, and value tensors as input. The input tensors
-    can either contain prompt tokens or generation tokens.
-    The class does the following:
-
-    1. Reshape and store the input key and value tensors in the KV cache.
-    2. Perform (multi-head/multi-query/grouped-query) attention using either
-        xformers or the PagedAttention custom op.
-    3. Return the output tensor.
-    """
+class XFormersBackend:
 
     def __init__(
         self,
@@ -42,7 +24,6 @@ def __init__(
         alibi_slopes: Optional[List[float]] = None,
         sliding_window: Optional[int] = None,
     ) -> None:
-        super().__init__()
         self.num_heads = num_heads
         self.head_size = head_size
         self.scale = float(scale)
@@ -50,48 +31,17 @@ def __init__(
         self.sliding_window = sliding_window
         if alibi_slopes is not None:
             alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32)
-        self.register_buffer("alibi_slopes", alibi_slopes, persistent=False)
+        self.alibi_slopes = alibi_slopes
 
         assert self.num_heads % self.num_kv_heads == 0
         self.num_queries_per_kv = self.num_heads // self.num_kv_heads
+        suppored_head_sizes = PagedAttentionImpl.get_supported_head_sizes()
+        if head_size not in suppored_head_sizes:
+            raise ValueError(
+                f"Head size {head_size} is not supported by PagedAttention. "
+                f"Supported head sizes are: {suppored_head_sizes}.")
 
-        if self.head_size not in _SUPPORTED_HEAD_SIZES:
-            raise ValueError(f"head_size ({self.head_size}) is not supported. "
-                             f"Supported head sizes: {_SUPPORTED_HEAD_SIZES}.")
-
-        self.use_ref_attention = self.check_use_ref_attention()
-
-    def check_use_ref_attention(self) -> bool:
-        if not is_hip():
-            return False
-        # For ROCm, check whether flash attention is installed or not.
-        # if not, use_ref_attention needs to be True
-        return importlib.util.find_spec("flash_attn") is None
-
-    def ref_masked_attention(
-        self,
-        query: torch.Tensor,
-        key: torch.Tensor,
-        value: torch.Tensor,
-    ) -> torch.Tensor:
-        query = query.view(-1, self.num_heads, self.head_size)
-        key = key.view(-1, self.num_kv_heads, self.head_size)
-        value = value.view(-1, self.num_kv_heads, self.head_size)
-
-        seq_len, _, _ = query.shape
-        attn_mask = torch.triu(torch.ones(seq_len,
-                                          seq_len,
-                                          dtype=query.dtype,
-                                          device=query.device),
-                               diagonal=1)
-        attn_mask = attn_mask * torch.finfo(query.dtype).min
-
-        attn_weights = self.scale * torch.einsum("qhd,khd->hqk", query,
-                                                 key).float()
-        attn_weights = attn_weights + attn_mask.float()
-        attn_weights = torch.softmax(attn_weights, dim=-1).to(value.dtype)
-        out = torch.einsum("hqk,khd->qhd", attn_weights, value)
-        return out
+        self.use_ref_attention = _check_use_ref_attention()
 
     def forward(
         self,
@@ -102,7 +52,7 @@ def forward(
         value_cache: Optional[torch.Tensor],
         input_metadata: InputMetadata,
     ) -> torch.Tensor:
-        """PagedAttention forward pass.
+        """Forward pass with xFormers and PagedAttention.
 
         Args:
             query: shape = [batch_size, seq_len, num_heads * head_size]
@@ -127,19 +77,14 @@ def forward(
         # vectors will not be cached. This happens during the initial memory
         # profiling run.
         if key_cache is not None and value_cache is not None:
-            cache_ops.reshape_and_cache(
-                key,
-                value,
-                key_cache,
-                value_cache,
-                input_metadata.slot_mapping.flatten(),
-                input_metadata.kv_cache_dtype,
-            )
+            PagedAttentionImpl.reshape_and_cache(key, value, key_cache,
+                                                 value_cache, input_metadata)
 
         if input_metadata.is_prompt:
-            # normal attention
+            # Prompt run.
             if (key_cache is None or value_cache is None
                     or input_metadata.block_tables.numel() == 0):
+                # normal attention
                 if self.num_kv_heads != self.num_heads:
                     # As of Nov 2023, xformers only supports MHA. For MQA/GQA,
                     # project the key and value tensors to the desired number of
@@ -175,13 +120,19 @@ def forward(
                             seq_len, query.dtype)
 
                 if self.use_ref_attention:
-                    output = self.ref_masked_attention(
+                    output = _ref_masked_attention(
                         query,
                         key,
                         value,
+                        self.num_heads,
+                        self.num_kv_heads,
+                        self.head_size,
+                        self.scale,
                     )
-                    # Using view got RuntimeError: view size is not compatible with input tensor's size and stride
-                    # (at least one dimension spans across two contiguous subspaces). Use reshape instead
+                    # Using view got RuntimeError: view size is not compatible
+                    # with input tensor's size and stride (at least one
+                    # dimension spans across two contiguous subspaces).
+                    # Use reshape instead.
                     return output.reshape(batch_size, seq_len, hidden_size)
 
                 # TODO(woosuk): Too many view operations. Let's try to reduce
@@ -206,27 +157,21 @@ def forward(
                     (is_hip()) else None,
                 )
                 output = out.view_as(query)
+
             else:
                 # prefix-enabled attention
-                output = torch.empty_like(query)
-                context_attention_fwd(
+                output = PagedAttentionImpl.forward_prefix(
                     query,
                     key,
                     value,
-                    output,
                     key_cache,
                     value_cache,
-                    input_metadata.block_tables,  # [BS, max_block_per_request]
-                    input_metadata.start_loc,
-                    input_metadata.prompt_lens,
-                    input_metadata.context_lens,
-                    input_metadata.max_seq_len,
-                    getattr(self, "alibi_slopes", None),
+                    input_metadata,
+                    self.alibi_slopes,
                 )
-
         else:
             # Decoding run.
-            output = _paged_attention(
+            output = PagedAttentionImpl.forward_decode(
                 query,
                 key_cache,
                 value_cache,
@@ -274,76 +219,37 @@ def _make_alibi_bias(
     return attn_bias
 
 
-def _paged_attention(
+def _check_use_ref_attention() -> bool:
+    if not is_hip():
+        return False
+    # For ROCm, check whether flash attention is installed or not.
+    # if not, use_ref_attention needs to be True
+    return importlib.util.find_spec("flash_attn") is None
+
+
+def _ref_masked_attention(
     query: torch.Tensor,
-    key_cache: torch.Tensor,
-    value_cache: torch.Tensor,
-    input_metadata: InputMetadata,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    num_heads: int,
     num_kv_heads: int,
+    head_size: int,
     scale: float,
-    alibi_slopes: Optional[torch.Tensor],
 ) -> torch.Tensor:
-    output = torch.empty_like(query)
-
-    block_size = value_cache.shape[3]
-    num_seqs, num_heads, head_size = query.shape
-    max_num_partitions = (
-        (input_metadata.max_context_len + _PARTITION_SIZE - 1) //
-        _PARTITION_SIZE)
-    # NOTE(woosuk): We use a simple heuristic to decide whether to use
-    # PagedAttention V1 or V2. If the number of partitions is 1, we use
-    # V1 to avoid the overhead of reduction. Also, if the number of
-    # sequences or heads is large, we use V1 since there is enough work
-    # to parallelize.
-    # TODO(woosuk): Tune this heuristic.
-    # For context len > 8192, use V2 kernel to avoid shared memory shortage.
-    use_v1 = input_metadata.max_context_len <= 8192 and (
-        max_num_partitions == 1 or num_seqs * num_heads > 512)
-    if use_v1:
-        # Run PagedAttention V1.
-        ops.paged_attention_v1(
-            output,
-            query,
-            key_cache,
-            value_cache,
-            num_kv_heads,
-            scale,
-            input_metadata.block_tables,
-            input_metadata.context_lens,
-            block_size,
-            input_metadata.max_context_len,
-            alibi_slopes,
-            input_metadata.kv_cache_dtype,
-        )
-    else:
-        # Run PagedAttention V2.
-        assert _PARTITION_SIZE % block_size == 0
-        tmp_output = torch.empty(
-            size=(num_seqs, num_heads, max_num_partitions, head_size),
-            dtype=output.dtype,
-            device=output.device,
-        )
-        exp_sums = torch.empty(
-            size=(num_seqs, num_heads, max_num_partitions),
-            dtype=torch.float32,
-            device=output.device,
-        )
-        max_logits = torch.empty_like(exp_sums)
-        ops.paged_attention_v2(
-            output,
-            exp_sums,
-            max_logits,
-            tmp_output,
-            query,
-            key_cache,
-            value_cache,
-            num_kv_heads,
-            scale,
-            input_metadata.block_tables,
-            input_metadata.context_lens,
-            block_size,
-            input_metadata.max_context_len,
-            alibi_slopes,
-            input_metadata.kv_cache_dtype,
-        )
-    return output
+    query = query.view(-1, num_heads, head_size)
+    key = key.view(-1, num_kv_heads, head_size)
+    value = value.view(-1, num_kv_heads, head_size)
+
+    seq_len, _, _ = query.shape
+    attn_mask = torch.triu(torch.ones(seq_len,
+                                      seq_len,
+                                      dtype=query.dtype,
+                                      device=query.device),
+                           diagonal=1)
+    attn_mask = attn_mask * torch.finfo(query.dtype).min
+
+    attn_weights = scale * torch.einsum("qhd,khd->hqk", query, key).float()
+    attn_weights = attn_weights + attn_mask.float()
+    attn_weights = torch.softmax(attn_weights, dim=-1).to(value.dtype)
+    out = torch.einsum("hqk,khd->qhd", attn_weights, value)
+    return out
diff --git a/vllm/model_executor/layers/attention/ops/__init__.py b/vllm/model_executor/layers/attention/ops/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/vllm/model_executor/layers/attention/ops/paged_attn.py b/vllm/model_executor/layers/attention/ops/paged_attn.py
new file mode 100644
index 0000000000000..c5a9618c2395b
--- /dev/null
+++ b/vllm/model_executor/layers/attention/ops/paged_attn.py
@@ -0,0 +1,138 @@
+from typing import List, Optional
+
+import torch
+
+from vllm._C import cache_ops
+from vllm._C import ops
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.attention.ops.prefix_prefill import (
+    context_attention_fwd)
+
+# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
+_PARTITION_SIZE = 512
+
+
+class PagedAttentionImpl:
+
+    @staticmethod
+    def get_supported_head_sizes() -> List[int]:
+        return [64, 80, 96, 112, 128, 256]
+
+    @staticmethod
+    def reshape_and_cache(
+        key: torch.Tensor,
+        value: torch.Tensor,
+        key_cache: torch.Tensor,
+        value_cache: torch.Tensor,
+        input_metadata: InputMetadata,
+    ) -> None:
+        cache_ops.reshape_and_cache(
+            key,
+            value,
+            key_cache,
+            value_cache,
+            input_metadata.slot_mapping.flatten(),
+            input_metadata.kv_cache_dtype,
+        )
+
+    @staticmethod
+    def forward_decode(
+        query: torch.Tensor,
+        key_cache: torch.Tensor,
+        value_cache: torch.Tensor,
+        input_metadata: InputMetadata,
+        num_kv_heads: int,
+        scale: float,
+        alibi_slopes: Optional[torch.Tensor],
+    ) -> torch.Tensor:
+        output = torch.empty_like(query)
+
+        block_size = value_cache.shape[3]
+        num_seqs, num_heads, head_size = query.shape
+        max_num_partitions = (
+            (input_metadata.max_context_len + _PARTITION_SIZE - 1) //
+            _PARTITION_SIZE)
+        # NOTE(woosuk): We use a simple heuristic to decide whether to use
+        # PagedAttention V1 or V2. If the number of partitions is 1, we use
+        # V1 to avoid the overhead of reduction. Also, if the number of
+        # sequences or heads is large, we use V1 since there is enough work
+        # to parallelize.
+        # TODO(woosuk): Tune this heuristic.
+        # For context len > 8192, use V2 kernel to avoid shared memory shortage.
+        use_v1 = input_metadata.max_context_len <= 8192 and (
+            max_num_partitions == 1 or num_seqs * num_heads > 512)
+        if use_v1:
+            # Run PagedAttention V1.
+            ops.paged_attention_v1(
+                output,
+                query,
+                key_cache,
+                value_cache,
+                num_kv_heads,
+                scale,
+                input_metadata.block_tables,
+                input_metadata.context_lens,
+                block_size,
+                input_metadata.max_context_len,
+                alibi_slopes,
+                input_metadata.kv_cache_dtype,
+            )
+        else:
+            # Run PagedAttention V2.
+            assert _PARTITION_SIZE % block_size == 0
+            tmp_output = torch.empty(
+                size=(num_seqs, num_heads, max_num_partitions, head_size),
+                dtype=output.dtype,
+                device=output.device,
+            )
+            exp_sums = torch.empty(
+                size=(num_seqs, num_heads, max_num_partitions),
+                dtype=torch.float32,
+                device=output.device,
+            )
+            max_logits = torch.empty_like(exp_sums)
+            ops.paged_attention_v2(
+                output,
+                exp_sums,
+                max_logits,
+                tmp_output,
+                query,
+                key_cache,
+                value_cache,
+                num_kv_heads,
+                scale,
+                input_metadata.block_tables,
+                input_metadata.context_lens,
+                block_size,
+                input_metadata.max_context_len,
+                alibi_slopes,
+                input_metadata.kv_cache_dtype,
+            )
+        return output
+
+    @staticmethod
+    def forward_prefix(
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        key_cache: torch.Tensor,
+        value_cache: torch.Tensor,
+        input_metadata: InputMetadata,
+        alibi_slopes: Optional[torch.Tensor],
+    ) -> torch.Tensor:
+        output = torch.empty_like(query)
+        context_attention_fwd(
+            query,
+            key,
+            value,
+            output,
+            key_cache,
+            value_cache,
+            input_metadata.block_tables,  # [BS, max_block_per_request]
+            input_metadata.start_loc,
+            input_metadata.prompt_lens,
+            input_metadata.context_lens,
+            input_metadata.max_seq_len,
+            alibi_slopes,
+        )
+        return output
diff --git a/vllm/model_executor/layers/triton_kernel/prefix_prefill.py b/vllm/model_executor/layers/attention/ops/prefix_prefill.py
similarity index 100%
rename from vllm/model_executor/layers/triton_kernel/prefix_prefill.py
rename to vllm/model_executor/layers/attention/ops/prefix_prefill.py
diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py
index 550dec6487f9e..6da0082b94285 100644
--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
@@ -27,7 +27,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
@@ -151,10 +151,10 @@ def __init__(
             alibi_slopes = alibi_slopes[head_start:head_end].tolist()
 
             scaling = self.head_dim**-0.5
-            self.attn = PagedAttention(self.num_heads,
-                                       self.head_dim,
-                                       scaling,
-                                       alibi_slopes=alibi_slopes)
+            self.attn = Attention(self.num_heads,
+                                  self.head_dim,
+                                  scaling,
+                                  alibi_slopes=alibi_slopes)
         else:
             self.rotary_emb = get_rope(
                 self.head_dim,
@@ -163,8 +163,7 @@ def __init__(
                 base=self.rope_theta,
             )
             self.scaling = self.head_dim**-0.5
-            self.attn = PagedAttention(self.num_heads, self.head_dim,
-                                       self.scaling)
+            self.attn = Attention(self.num_heads, self.head_dim, self.scaling)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py
index 4adfb6b78102f..0548b2b140b1b 100644
--- a/vllm/model_executor/models/bloom.py
+++ b/vllm/model_executor/models/bloom.py
@@ -25,7 +25,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -107,10 +107,10 @@ def __init__(
         alibi_slopes = alibi_slopes[head_start:head_end].tolist()
 
         scaling = self.head_dim**-0.5
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   scaling,
-                                   alibi_slopes=alibi_slopes)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              scaling,
+                              alibi_slopes=alibi_slopes)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py
index dca8d724f976b..1c5dcfacaff2b 100644
--- a/vllm/model_executor/models/chatglm.py
+++ b/vllm/model_executor/models/chatglm.py
@@ -10,7 +10,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
@@ -87,7 +87,7 @@ def __init__(
             base=10000 * rope_ratio,
             is_neox_style=False,
         )
-        self.attn = PagedAttention(
+        self.attn = Attention(
             self.num_heads,
             self.head_dim,
             self.scaling,
diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py
index 6dba952736921..f2dca3df27cfb 100644
--- a/vllm/model_executor/models/deepseek.py
+++ b/vllm/model_executor/models/deepseek.py
@@ -29,7 +29,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.fused_moe import fused_moe
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
@@ -229,10 +229,10 @@ def __init__(
             base=rope_theta,
             rope_scaling=rope_scaling,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_kv_heads)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              self.scaling,
+                              num_kv_heads=self.num_kv_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py
index 2b5e022312e3b..3c148be5b10f4 100644
--- a/vllm/model_executor/models/falcon.py
+++ b/vllm/model_executor/models/falcon.py
@@ -28,7 +28,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -150,10 +150,10 @@ def __init__(
                 max_position=max_position_embeddings,
                 base=rope_theta,
             )
-            self.attn = PagedAttention(self.num_heads,
-                                       self.head_dim,
-                                       self.inv_norm_factor,
-                                       num_kv_heads=self.num_kv_heads)
+            self.attn = Attention(self.num_heads,
+                                  self.head_dim,
+                                  self.inv_norm_factor,
+                                  num_kv_heads=self.num_kv_heads)
         elif self.use_alibi:
             tp_rank = get_tensor_model_parallel_rank()
             head_start = tp_rank * self.num_heads
@@ -161,16 +161,16 @@ def __init__(
             alibi_slopes = (_get_alibi_slopes(self.total_num_heads) *
                             self.inv_norm_factor)
             alibi_slopes = alibi_slopes[head_start:head_end].tolist()
-            self.attn = PagedAttention(self.num_heads,
-                                       self.head_dim,
-                                       self.inv_norm_factor,
-                                       num_kv_heads=self.num_kv_heads,
-                                       alibi_slopes=alibi_slopes)
+            self.attn = Attention(self.num_heads,
+                                  self.head_dim,
+                                  self.inv_norm_factor,
+                                  num_kv_heads=self.num_kv_heads,
+                                  alibi_slopes=alibi_slopes)
         else:
-            self.attn = PagedAttention(self.num_heads,
-                                       self.head_dim,
-                                       scale=self.inv_norm_factor,
-                                       num_kv_heads=self.num_kv_heads)
+            self.attn = Attention(self.num_heads,
+                                  self.head_dim,
+                                  scale=self.inv_norm_factor,
+                                  num_kv_heads=self.num_kv_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
index bf1f164ff700d..386a36cf492d6 100644
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -23,7 +23,7 @@
 from vllm.config import LoRAConfig
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import GeluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
@@ -123,10 +123,10 @@ def __init__(self,
             base=self.rope_theta,
             is_neox_style=True,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_kv_heads)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              self.scaling,
+                              num_kv_heads=self.num_kv_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py
index 661da0fe0434e..3f7b21e5a4133 100644
--- a/vllm/model_executor/models/gpt2.py
+++ b/vllm/model_executor/models/gpt2.py
@@ -25,7 +25,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -73,9 +73,7 @@ def __init__(
             bias=True,
             linear_method=linear_method,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   scale=self.scale)
+        self.attn = Attention(self.num_heads, self.head_dim, scale=self.scale)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py
index ef4c1d4143c88..5c30d47d93e36 100644
--- a/vllm/model_executor/models/gpt_bigcode.py
+++ b/vllm/model_executor/models/gpt_bigcode.py
@@ -26,7 +26,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -85,10 +85,10 @@ def __init__(
             bias=True,
             linear_method=linear_method,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   scale=self.scale,
-                                   num_kv_heads=self.num_kv_heads)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              scale=self.scale,
+                              num_kv_heads=self.num_kv_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py
index 5bab30d9d442e..b8c6822e9825e 100644
--- a/vllm/model_executor/models/gpt_j.py
+++ b/vllm/model_executor/models/gpt_j.py
@@ -24,7 +24,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -86,7 +86,7 @@ def __init__(
             base=rope_theta,
             is_neox_style=False,
         )
-        self.attn = PagedAttention(self.num_heads, self.head_size, scaling)
+        self.attn = Attention(self.num_heads, self.head_size, scaling)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py
index 8f7e1063e0c1d..98107350e60b9 100644
--- a/vllm/model_executor/models/gpt_neox.py
+++ b/vllm/model_executor/models/gpt_neox.py
@@ -24,7 +24,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -87,7 +87,7 @@ def __init__(
             max_position=max_position_embeddings,
             base=rope_theta,
         )
-        self.attn = PagedAttention(self.num_heads, self.head_size, scaling)
+        self.attn = Attention(self.num_heads, self.head_size, scaling)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py
index ebf1d8a89a022..0ae0a85643456 100644
--- a/vllm/model_executor/models/internlm2.py
+++ b/vllm/model_executor/models/internlm2.py
@@ -7,7 +7,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
@@ -114,10 +114,10 @@ def __init__(
             base=rope_theta,
             rope_scaling=rope_scaling,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_kv_heads)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              self.scaling,
+                              num_kv_heads=self.num_kv_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
index d35887cc0f6a3..4c163dfdab537 100644
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -30,7 +30,7 @@
 from vllm.config import LoRAConfig
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
@@ -139,11 +139,11 @@ def __init__(
             base=rope_theta,
             rope_scaling=rope_scaling,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_kv_heads,
-                                   sliding_window=sliding_window)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              self.scaling,
+                              num_kv_heads=self.num_kv_heads,
+                              sliding_window=sliding_window)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py
index 0100624a44d78..d47834e519697 100644
--- a/vllm/model_executor/models/mixtral.py
+++ b/vllm/model_executor/models/mixtral.py
@@ -29,7 +29,7 @@
 
 from vllm.config import LoRAConfig
 from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.fused_moe import fused_moe
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
@@ -197,7 +197,7 @@ def __init__(self,
             base=int(self.rope_theta),
             is_neox_style=True,
         )
-        self.attn = PagedAttention(
+        self.attn = Attention(
             self.num_heads,
             self.head_dim,
             self.scaling,
diff --git a/vllm/model_executor/models/mixtral_quant.py b/vllm/model_executor/models/mixtral_quant.py
index a8dadce24aa1d..25c7f1978c0dc 100644
--- a/vllm/model_executor/models/mixtral_quant.py
+++ b/vllm/model_executor/models/mixtral_quant.py
@@ -32,7 +32,7 @@
 from transformers import MixtralConfig
 
 from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                ReplicatedLinear,
@@ -214,7 +214,7 @@ def __init__(self,
             base=int(self.rope_theta),
             is_neox_style=True,
         )
-        self.attn = PagedAttention(
+        self.attn = Attention(
             self.num_heads,
             self.head_dim,
             self.scaling,
diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py
index 22a876e2ef691..16ecac3d0529a 100644
--- a/vllm/model_executor/models/mpt.py
+++ b/vllm/model_executor/models/mpt.py
@@ -8,7 +8,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -105,11 +105,11 @@ def __init__(
 
         self.head_dim = self.d_model // self.total_num_heads
         scaling = self.head_dim**-0.5
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   scaling,
-                                   alibi_slopes=alibi_slopes,
-                                   num_kv_heads=self.num_kv_heads)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              scaling,
+                              alibi_slopes=alibi_slopes,
+                              num_kv_heads=self.num_kv_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py
index 9d563039208c8..fa7a6d850051e 100644
--- a/vllm/model_executor/models/olmo.py
+++ b/vllm/model_executor/models/olmo.py
@@ -43,7 +43,7 @@
 from torch import nn
 
 from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (
     ColumnParallelLinear,
     LinearMethodBase,
@@ -126,9 +126,9 @@ def __init__(
                 base=rope_theta,
             )
         self.scaling = self.head_dim**-0.5
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   scale=self.scaling)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              scale=self.scaling)
 
         # Attention output projection.
         self.attn_out = RowParallelLinear(
diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py
index 393b2dcabcd5a..782f43ce265bd 100644
--- a/vllm/model_executor/models/opt.py
+++ b/vllm/model_executor/models/opt.py
@@ -25,7 +25,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -89,9 +89,9 @@ def __init__(
             bias=bias,
             linear_method=linear_method,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   scale=self.scaling)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              scale=self.scaling)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py
index 0b067d4fc8802..6039b1cdc3534 100644
--- a/vllm/model_executor/models/orion.py
+++ b/vllm/model_executor/models/orion.py
@@ -12,7 +12,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
                                                QKVParallelLinear,
@@ -118,10 +118,10 @@ def __init__(
             base=rope_theta,
             rope_scaling=rope_scaling,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_kv_heads)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              self.scaling,
+                              num_kv_heads=self.num_kv_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/phi.py b/vllm/model_executor/models/phi.py
index d143261968288..039dc7a9b7675 100644
--- a/vllm/model_executor/models/phi.py
+++ b/vllm/model_executor/models/phi.py
@@ -43,7 +43,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import get_act_fn
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                LinearMethodBase,
                                                QKVParallelLinear,
@@ -108,7 +108,7 @@ def __init__(self,
             max_position=max_position_embeddings,
             base=rope_theta,
         )
-        self.attn = PagedAttention(self.num_heads, self.head_size, scaling)
+        self.attn = Attention(self.num_heads, self.head_size, scaling)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py
index 37af84c7cd53f..d4d5a4e8bb9a5 100644
--- a/vllm/model_executor/models/qwen.py
+++ b/vllm/model_executor/models/qwen.py
@@ -12,7 +12,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
@@ -104,7 +104,7 @@ def __init__(
             base=rope_theta,
             rope_scaling=rope_scaling,
         )
-        self.attn = PagedAttention(self.num_heads, self.head_dim, self.scaling)
+        self.attn = Attention(self.num_heads, self.head_dim, self.scaling)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index e823e6f8c3dbe..3586a7fb82778 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -30,7 +30,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
@@ -135,11 +135,11 @@ def __init__(self,
             max_position=max_position,
             base=self.rope_theta,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_kv_heads,
-                                   sliding_window=self.sliding_window)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              self.scaling,
+                              num_kv_heads=self.num_kv_heads,
+                              sliding_window=self.sliding_window)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py
index 44c57e5a6d4f9..d1a547f815616 100644
--- a/vllm/model_executor/models/stablelm.py
+++ b/vllm/model_executor/models/stablelm.py
@@ -25,7 +25,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                MergedColumnParallelLinear,
                                                QKVParallelLinear,
@@ -122,10 +122,10 @@ def __init__(self,
             max_position=self.config.max_position_embeddings,
             base=self.config.rope_theta,
         )
-        self.attn = PagedAttention(self.num_heads,
-                                   self.head_dim,
-                                   self.scaling,
-                                   num_kv_heads=self.num_key_value_heads)
+        self.attn = Attention(self.num_heads,
+                              self.head_dim,
+                              self.scaling,
+                              num_kv_heads=self.num_key_value_heads)
 
     def forward(
         self,
diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py
index 1eda07b724cae..efa235233372f 100644
--- a/vllm/model_executor/models/starcoder2.py
+++ b/vllm/model_executor/models/starcoder2.py
@@ -25,7 +25,7 @@
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.model_executor.layers.attention import PagedAttention
+from vllm.model_executor.layers.attention import Attention
 from vllm.model_executor.layers.activation import get_act_fn
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
@@ -103,7 +103,7 @@ def __init__(self,
             base=int(self.rope_theta),
             is_neox_style=True,
         )
-        self.attn = PagedAttention(
+        self.attn = Attention(
             self.num_heads,
             self.head_dim,
             self.scaling,

From 385da2dae2b90e5273da8dfce881727bd9c574a1 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Thu, 7 Mar 2024 11:42:42 -0800
Subject: [PATCH 065/196] Measure model memory usage (#3120)

---
 vllm/utils.py               | 25 +++++++++++++++++++++++++
 vllm/worker/model_runner.py | 18 ++++++++++++------
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index 9cdf623379516..5b94067cec777 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -3,6 +3,7 @@
 import socket
 import subprocess
 import uuid
+import gc
 from platform import uname
 from typing import List, Tuple, Union
 from packaging.version import parse, Version
@@ -309,3 +310,27 @@ def create_kv_caches_with_random(
                 f"Does not support value cache of type {cache_dtype}")
         value_caches.append(value_cache)
     return key_caches, value_caches
+
+
+class measure_cuda_memory:
+
+    def __init__(self, device=None):
+        self.device = device
+
+    def current_memory_usage(self) -> float:
+        # Return the memory usage in bytes.
+        torch.cuda.reset_peak_memory_stats(self.device)
+        mem = torch.cuda.max_memory_allocated(self.device)
+        return mem
+
+    def __enter__(self):
+        self.initial_memory = self.current_memory_usage()
+        # This allows us to call methods of the context manager if needed
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.final_memory = self.current_memory_usage()
+        self.consumed_memory = self.final_memory - self.initial_memory
+
+        # Force garbage collection
+        gc.collect()
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index aff8ebc903623..b01f865f1bb03 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -21,7 +21,7 @@
 from vllm.lora.worker_manager import LRUCacheWorkerLoRAManager
 from vllm.lora.layers import LoRAMapping
 from vllm.lora.request import LoRARequest
-from vllm.utils import in_wsl
+from vllm.utils import in_wsl, measure_cuda_memory
 
 logger = init_logger(__name__)
 
@@ -85,11 +85,17 @@ def __init__(
             self.model_config.enforce_eager = True
 
     def load_model(self) -> None:
-        self.model = get_model(self.model_config,
-                               self.device_config,
-                               lora_config=self.lora_config,
-                               parallel_config=self.parallel_config,
-                               scheduler_config=self.scheduler_config)
+        with measure_cuda_memory() as m:
+            self.model = get_model(self.model_config,
+                                   self.device_config,
+                                   lora_config=self.lora_config,
+                                   parallel_config=self.parallel_config,
+                                   scheduler_config=self.scheduler_config)
+
+        self.model_memory_usage = m.consumed_memory
+        logger.info(
+            f"Loading model weights took {self.model_memory_usage / float(2**30):.4f} GB"
+        )
 
         vocab_size = self.model.config.vocab_size
 

From 8cbba4622c8c526b207b17e3ba51e18e2c766419 Mon Sep 17 00:00:00 2001
From: jacobthebanana <50071502+jacobthebanana@users.noreply.github.com>
Date: Thu, 7 Mar 2024 18:03:22 -0500
Subject: [PATCH 066/196] Possible fix for conflict between Automated Prefix
 Caching (#2762) and multi-LoRA support (#1804) (#3263)

---
 tests/test_cache_block_hashing.py | 46 +++++++++++++++++++++----------
 vllm/sequence.py                  |  3 +-
 2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py
index c2067e52b59c0..fb541f38f3489 100644
--- a/tests/test_cache_block_hashing.py
+++ b/tests/test_cache_block_hashing.py
@@ -2,8 +2,11 @@
 
 Run `pytest tests/test_cache_block_hashing.py`.
 """
+from typing import List, Optional
+
 import pytest
 
+from vllm.lora.request import LoRARequest
 from vllm.transformers_utils.tokenizer import TokenizerGroup
 from vllm.sequence import Sequence
 
@@ -36,7 +39,10 @@ def flatten_2d(li):
 @pytest.mark.parametrize("model", ["facebook/opt-125m"])
 @pytest.mark.parametrize("block_size", [16])
 @pytest.mark.parametrize("max_num_seqs", [256])
-def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int):
+@pytest.mark.parametrize("concurrent_lora_int_ids",
+                         [[None], [1], [None, 1], [None, 1, 2], [1, 2]])
+def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
+                             concurrent_lora_int_ids: List[Optional[int]]):
 
     tokenizer = TokenizerGroup(
         tokenizer_id="facebook/opt-125m",
@@ -48,20 +54,30 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int):
     hashes = []
 
     for prefix in prefixes:
-        hashes.append([])
-        prompts = [prefix + prompt for prompt in sample_prompts]
-        seq_id = 0
-        for prompt in prompts:
-            hashes[-1].append([])
-            prompt_token_ids = tokenizer.encode(prompt)
-            seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
-                           tokenizer.tokenizer.eos_token_id)
-
-            num_blocks = len(prompt_token_ids) // block_size
-            for idx in range(num_blocks):
-                hashes[-1][-1].append(seq.hash_of_block(idx))
-
-            seq_id += 1
+        for lora_int_id in concurrent_lora_int_ids:
+            lora_request = None
+
+            if lora_int_id is not None:
+                lora_request = LoRARequest(
+                    f"example_lora_{lora_int_id}",
+                    lora_int_id,
+                    f"example/path/to/lora_{lora_int_id}",
+                )
+
+            hashes.append([])
+            prompts = [prefix + prompt for prompt in sample_prompts]
+            seq_id = 0
+            for prompt in prompts:
+                hashes[-1].append([])
+                prompt_token_ids = tokenizer.encode(prompt)
+                seq = Sequence(seq_id, prompt, prompt_token_ids, block_size,
+                               tokenizer.tokenizer.eos_token_id, lora_request)
+
+                num_blocks = len(prompt_token_ids) // block_size
+                for idx in range(num_blocks):
+                    hashes[-1][-1].append(seq.hash_of_block(idx))
+
+                seq_id += 1
 
     # Check that hashes made with two prefixes with different first blocks are
     # different everywhere.
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 19dafe3cb0fc9..fee96a875dde5 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -175,7 +175,8 @@ def hash_of_block(self, logical_idx: int) -> int:
         # TODO: The current hashing function is O(L^2). We should optimize
         # this in the future.
         num_tokens = self.num_hashed_tokens_of_block(logical_idx)
-        return hash(tuple(self.data.get_token_ids()[0:num_tokens]))
+        return hash(
+            (tuple(self.data.get_token_ids()[0:num_tokens]), self.lora_int_id))
 
     def num_hashed_tokens_of_block(self, logical_idx: int):
         return logical_idx * self.block_size + self.block_size

From b35cc93420e37b72dc1c4bbedb06012fd294b743 Mon Sep 17 00:00:00 2001
From: ElizaWszola <eliza@neuralmagic.com>
Date: Fri, 8 Mar 2024 01:37:28 +0100
Subject: [PATCH 067/196] Fix auto prefix bug (#3239)

---
 tests/engine/test_computed_prefix_blocks.py | 34 +++++++++++++++++++++
 vllm/core/block_manager.py                  | 28 +++++++++--------
 vllm/worker/model_runner.py                 |  1 +
 3 files changed, 51 insertions(+), 12 deletions(-)
 create mode 100644 tests/engine/test_computed_prefix_blocks.py

diff --git a/tests/engine/test_computed_prefix_blocks.py b/tests/engine/test_computed_prefix_blocks.py
new file mode 100644
index 0000000000000..ed35212cc3f11
--- /dev/null
+++ b/tests/engine/test_computed_prefix_blocks.py
@@ -0,0 +1,34 @@
+import pytest
+
+from vllm.engine.arg_utils import EngineArgs
+from vllm.engine.llm_engine import LLMEngine
+from vllm.sampling_params import SamplingParams
+
+
+@pytest.mark.parametrize("model", ["facebook/opt-125m"])
+@pytest.mark.parametrize("block_size", [16])
+def test_computed_prefix_blocks(model: str, block_size: int):
+    # This test checks if we are able to run the engine to completion
+    # without triggering asserts.
+    # We are in a scenario where all blocks from the second request's prompt
+    # are full and already computed when the second request arrives.
+    prompt = (
+        "You are a helpful assistant. How do I build a car from cardboard and "
+        "paper clips? Is there an easy to follow video tutorial available "
+        "online for free?")
+    prompt2 = (
+        " Please recommend to me some resources where I can learn not only to "
+        "handle technical difficulties of building a car, but also "
+        "decoration.")
+
+    engine_args = EngineArgs(model=model,
+                             block_size=block_size,
+                             enable_prefix_caching=True)
+
+    engine = LLMEngine.from_engine_args(engine_args)
+    sampling_params = SamplingParams()
+
+    engine.add_request("0", prompt + prompt2, sampling_params)
+    engine.step()
+    engine.add_request("1", prompt, sampling_params)
+    engine.step()
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index daf83827a7e52..52b120f227eda 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -1,6 +1,6 @@
 """A block manager that manages token blocks."""
 import enum
-from itertools import count
+from itertools import count, takewhile
 from os.path import commonprefix
 from typing import Dict, List, Optional, Set, Tuple
 
@@ -426,23 +426,29 @@ def access_all_blocks_in_seq(
         for block in block_table:
             block.last_accessed = access_time
 
-    def compute_last_full_block_in_seq(self, seq: Sequence):
+    def compute_full_blocks_in_seq(self, seq: Sequence):
         if seq.seq_id not in self.block_tables:
             return
         max_full_block = seq.get_len() // self.block_size - 1
         block_table = self.block_tables[seq.seq_id]
         if max_full_block == -1:
             return
-        block_table[max_full_block].computed = True
+        for i in reversed(range(max_full_block)):
+            if block_table[i].computed:
+                break
+            block_table[i].computed = True
 
-    def get_all_block_ids_till_computed(self, seq: Sequence) -> List[int]:
+    def get_all_computed_blocks(self, seq: Sequence) -> List[int]:
         if seq.seq_id not in self.block_tables:
             return []
         block_table = self.block_tables[seq.seq_id]
-        for block_idx in reversed(range(len(block_table))):
-            if block_table[block_idx].computed:
-                return [b.block_number for b in block_table[:block_idx + 1]]
-        return []
+        # NOTE We exclude the last block to avoid the case where the entire
+        # prompt is cached. This would cause erroneous behavior in model
+        # runner.
+        return [
+            b.block_number
+            for b in takewhile(lambda b: b.computed, block_table[:-1])
+        ]
 
     def get_common_computed_block_ids(self,
                                       seq_group: SequenceGroup) -> List[int]:
@@ -451,14 +457,12 @@ def get_common_computed_block_ids(self,
             return []
 
         ids_list = [
-            self.get_all_block_ids_till_computed(seq)
+            self.get_all_computed_blocks(seq)
             for seq in iter(seq_group.seqs_dict.values())
         ]
         return commonprefix([ids for ids in ids_list if ids != []])
 
     def mark_blocks_as_computed(self, seq_group: SequenceGroup):
-        # NOTE: We only mark the last full block because with prefix caching,
-        # all blocks until the marked one are guaranteed to be computed.
         if self.enable_caching:
             for seq in seq_group.seqs_dict.values():
-                self.compute_last_full_block_in_seq(seq)
+                self.compute_full_blocks_in_seq(seq)
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index b01f865f1bb03..9023b0c59b3fb 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -215,6 +215,7 @@ def _prepare_prompt(
                 slot_mapping[-1].append(slot)
 
         max_prompt_len = max(subquery_lens)
+        assert max_prompt_len > 0
         input_tokens = _make_tensor_with_pad(input_tokens,
                                              max_prompt_len,
                                              pad=0,

From d2339d6840498397f6e373489ed120cd2cce8eb4 Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Thu, 7 Mar 2024 16:38:12 -0800
Subject: [PATCH 068/196] Connect engine healthcheck to openai server (#3260)

---
 vllm/entrypoints/openai/api_server.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 993a834e5a720..9f29b4ac92f48 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -160,6 +160,7 @@ async def validation_exception_handler(_, exc):
 @app.get("/health")
 async def health() -> Response:
     """Health check."""
+    await openai_serving_chat.engine.check_health()
     return Response(status_code=200)
 
 

From c59e120c557743b0fc8178ee1796c8a3def78bf4 Mon Sep 17 00:00:00 2001
From: whyiug <whyiug@hotmail.com>
Date: Fri, 8 Mar 2024 13:58:24 +0800
Subject: [PATCH 069/196] Feature add lora support for Qwen2 (#3177)

---
 csrc/punica/bgmv/bgmv_config.h      |  2 ++
 vllm/model_executor/models/qwen2.py | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/csrc/punica/bgmv/bgmv_config.h b/csrc/punica/bgmv/bgmv_config.h
index d5fee9c40d00c..3eb84ceb4d534 100644
--- a/csrc/punica/bgmv/bgmv_config.h
+++ b/csrc/punica/bgmv/bgmv_config.h
@@ -21,6 +21,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
     f(in_T, out_T, W_T, narrow, 2048) \
     f(in_T, out_T, W_T, narrow, 2560) \
     f(in_T, out_T, W_T, narrow, 2752) \
+    f(in_T, out_T, W_T, narrow, 2816) \
     f(in_T, out_T, W_T, narrow, 3072) \
     f(in_T, out_T, W_T, narrow, 3456) \
     f(in_T, out_T, W_T, narrow, 3584) \
@@ -36,6 +37,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
     f(in_T, out_T, W_T, narrow, 10240) \
     f(in_T, out_T, W_T, narrow, 11008) \
     f(in_T, out_T, W_T, narrow, 12288) \
+    f(in_T, out_T, W_T, narrow, 13696) \
     f(in_T, out_T, W_T, narrow, 13824) \
     f(in_T, out_T, W_T, narrow, 14336) \
     f(in_T, out_T, W_T, narrow, 16384) \
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 3586a7fb82778..4dd63f923e5f2 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -46,6 +46,7 @@
 from vllm.model_executor.weight_utils import (default_weight_loader,
                                               hf_model_weights_iterator)
 from vllm.sequence import SamplerOutput
+from vllm.config import LoRAConfig
 
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 
@@ -264,12 +265,35 @@ def forward(
 
 
 class Qwen2ForCausalLM(nn.Module):
+    packed_modules_mapping = {
+        "qkv_proj": [
+            "q_proj",
+            "k_proj",
+            "v_proj",
+        ],
+        "gate_up_proj": [
+            "gate_proj",
+            "up_proj",
+        ],
+    }
+
+    # LoRA specific attributes
+    supported_lora_modules = [
+        "qkv_proj",
+        "o_proj",
+        "gate_up_proj",
+        "down_proj",
+    ]
+    embedding_modules = {}
+    embedding_padding_modules = []
 
     def __init__(
         self,
         config: Qwen2Config,
         linear_method: Optional[LinearMethodBase] = None,
+        lora_config: Optional[LoRAConfig] = None,
     ) -> None:
+        del lora_config
         super().__init__()
         self.config = config
         self.linear_method = linear_method

From 1ece1ae829dcbc4b1b19b3e2d3042457615e862f Mon Sep 17 00:00:00 2001
From: TianYu GUO <guoty9@mail2.sysu.edu.cn>
Date: Fri, 8 Mar 2024 14:22:59 +0800
Subject: [PATCH 070/196] [Minor Fix] Fix comments in benchmark_serving (#3252)

---
 benchmarks/benchmark_serving.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 7d389a9c7d703..3f5e2d9c8f4dc 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -12,7 +12,7 @@
 On the client side, run:
     python benchmarks/benchmark_serving.py \
         --backend <backend> \
-        --tokenizer <your_model> --dataset <target_dataset> \
+        --model <your_model> --dataset <target_dataset> \
         --request-rate <request_rate>
 """
 import argparse
@@ -171,10 +171,10 @@ async def benchmark(
     else:
         raise ValueError(f"Unknown backend: {backend}")
 
-    pbar = None if disable_tqdm else tqdm(total=len(input_requests))
-
     print(f"Traffic request rate: {request_rate}")
 
+    pbar = None if disable_tqdm else tqdm(total=len(input_requests))
+
     benchmark_start_time = time.perf_counter()
     tasks = []
     async for request in get_request(input_requests, request_rate):

From 99c3cfb83c20d45899ab8cbfdddce98c7cffb7b1 Mon Sep 17 00:00:00 2001
From: Roger Wang <136131678+ywang96@users.noreply.github.com>
Date: Fri, 8 Mar 2024 09:58:01 -0800
Subject: [PATCH 071/196] [Docs] Fix Unmocked Imports (#3275)

---
 docs/source/conf.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5a45c6f9d1e0a..61d24e1612128 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -72,8 +72,15 @@
 
 # Mock out external dependencies here.
 autodoc_mock_imports = [
-    "torch", "transformers", "psutil", "prometheus_client", "sentencepiece",
-    "vllm.cuda_utils", "vllm._C"
+    "torch",
+    "transformers",
+    "psutil",
+    "prometheus_client",
+    "sentencepiece",
+    "vllm.cuda_utils",
+    "vllm._C",
+    "numpy",
+    "tqdm",
 ]
 
 for mock_target in autodoc_mock_imports:

From 1cb0cc2975d1c42c445c795f955b783e78919502 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Fri, 8 Mar 2024 10:52:20 -0800
Subject: [PATCH 072/196] [FIX] Make `flash_attn` optional (#3269)

---
 .gitignore                                    |  3 --
 setup.py                                      | 48 ++-----------------
 vllm/__init__.py                              | 30 +++---------
 .../layers/attention/attention.py             | 37 +++++++++++---
 .../layers/attention/backends/flash_attn.py   |  1 -
 5 files changed, 41 insertions(+), 78 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0b14c98270c41..b5195629e5cf3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -184,6 +184,3 @@ _build/
 
 # Benchmark dataset
 *.json
-
-# Third-party Python packages.
-vllm/thirdparty_files/
diff --git a/setup.py b/setup.py
index 57d7a139e8237..745b5a9b2d02a 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,6 @@
 import os
 import re
 import subprocess
-import sys
 import warnings
 from pathlib import Path
 from typing import List, Set
@@ -15,8 +14,6 @@
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME, ROCM_HOME
 
 ROOT_DIR = os.path.dirname(__file__)
-# This is a temporary directory to store third-party packages.
-THIRDPARTY_SUBDIR = "vllm/thirdparty_files"
 
 # If you are developing the C++ backend of vLLM, consider building vLLM with
 # `python setup.py develop` since it will give you incremental builds.
@@ -327,46 +324,8 @@ def get_torch_arch_list() -> Set[str]:
                     "nvcc": NVCC_FLAGS_PUNICA,
                 },
             ))
-
-    # Download the FlashAttention package.
-    # Adapted from https://github.com/ray-project/ray/blob/f92928c9cfcbbf80c3a8534ca4911de1b44069c0/python/setup.py#L518-L530
-    flash_attn_version = "2.5.6"
-    install_dir = os.path.join(ROOT_DIR, THIRDPARTY_SUBDIR)
-    subprocess.check_call(
-        [
-            sys.executable,
-            "-m",
-            "pip",
-            "install",
-            "-q",
-            f"--target={install_dir}",
-            "einops",  # Dependency of flash-attn.
-            f"flash-attn=={flash_attn_version}",
-            "--no-dependencies",  # Required to avoid re-installing torch.
-        ],
-        env=dict(os.environ, CC="gcc"),
-    )
-
-    # Copy the FlashAttention package into the vLLM package after build.
-    class build_ext(BuildExtension):
-
-        def run(self):
-            super().run()
-            target_dir = os.path.join(self.build_lib, THIRDPARTY_SUBDIR)
-            if not os.path.exists(target_dir):
-                os.makedirs(target_dir)
-            self.copy_tree(install_dir, target_dir)
-
-    class BinaryDistribution(setuptools.Distribution):
-
-        def has_ext_modules(self):
-            return True
-
-else:
-    build_ext = BuildExtension
-    BinaryDistribution = setuptools.Distribution
-    if _is_neuron():
-        neuronxcc_version = get_neuronxcc_version()
+elif _is_neuron():
+    neuronxcc_version = get_neuronxcc_version()
 
 vllm_extension_sources = [
     "csrc/cache_kernels.cu",
@@ -509,7 +468,6 @@ def get_requirements() -> List[str]:
     python_requires=">=3.8",
     install_requires=get_requirements(),
     ext_modules=ext_modules,
-    cmdclass={"build_ext": build_ext} if not _is_neuron() else {},
-    distclass=BinaryDistribution,
+    cmdclass={"build_ext": BuildExtension} if not _is_neuron() else {},
     package_data=package_data,
 )
diff --git a/vllm/__init__.py b/vllm/__init__.py
index 59f1345b58d42..f1e30f5eb6e6e 100644
--- a/vllm/__init__.py
+++ b/vllm/__init__.py
@@ -1,28 +1,12 @@
 """vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
 
-
-# Adapted from https://github.com/ray-project/ray/blob/f92928c9cfcbbf80c3a8534ca4911de1b44069c0/python/ray/__init__.py#L11
-def _configure_system():
-    import os
-    import sys
-
-    # Importing flash-attn.
-    thirdparty_files = os.path.join(os.path.abspath(os.path.dirname(__file__)),
-                                    "thirdparty_files")
-    sys.path.insert(0, thirdparty_files)
-
-
-_configure_system()
-# Delete configuration function.
-del _configure_system
-
-from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs  # noqa: E402
-from vllm.engine.async_llm_engine import AsyncLLMEngine  # noqa: E402
-from vllm.engine.llm_engine import LLMEngine  # noqa: E402
-from vllm.engine.ray_utils import initialize_cluster  # noqa: E402
-from vllm.entrypoints.llm import LLM  # noqa: E402
-from vllm.outputs import CompletionOutput, RequestOutput  # noqa: E402
-from vllm.sampling_params import SamplingParams  # noqa: E402
+from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
+from vllm.engine.async_llm_engine import AsyncLLMEngine
+from vllm.engine.llm_engine import LLMEngine
+from vllm.engine.ray_utils import initialize_cluster
+from vllm.entrypoints.llm import LLM
+from vllm.outputs import CompletionOutput, RequestOutput
+from vllm.sampling_params import SamplingParams
 
 __version__ = "0.3.3"
 
diff --git a/vllm/model_executor/layers/attention/attention.py b/vllm/model_executor/layers/attention/attention.py
index 830e82e10f7ad..724dd0511c5aa 100644
--- a/vllm/model_executor/layers/attention/attention.py
+++ b/vllm/model_executor/layers/attention/attention.py
@@ -1,12 +1,16 @@
 """Attention layer."""
+from functools import lru_cache
 from typing import List, Optional
 
 import torch
 import torch.nn as nn
 
+from vllm.logger import init_logger
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.utils import is_hip
 
+logger = init_logger(__name__)
+
 
 class Attention(nn.Module):
     """Attention layer.
@@ -30,17 +34,12 @@ def __init__(
         sliding_window: Optional[int] = None,
     ) -> None:
         super().__init__()
-        if (not is_hip() and torch.cuda.get_device_capability()[0] >= 8 and
-                torch.get_default_dtype() in (torch.float16, torch.bfloat16)):
-            # Ampere or later NVIDIA GPUs.
-            # NOTE(woosuk): FlashAttention does not support FP32.
+        if _use_flash_attn():
             from vllm.model_executor.layers.attention.backends.flash_attn import FlashAttentionBackend
             self.backend = FlashAttentionBackend(num_heads, head_size, scale,
                                                  num_kv_heads, alibi_slopes,
                                                  sliding_window)
         else:
-            # Turing and Volta NVIDIA GPUs or AMD GPUs.
-            # Or FP32 on any GPU.
             from vllm.model_executor.layers.attention.backends.xformers import XFormersBackend
             self.backend = XFormersBackend(num_heads, head_size, scale,
                                            num_kv_heads, alibi_slopes,
@@ -57,3 +56,29 @@ def forward(
     ) -> torch.Tensor:
         return self.backend.forward(query, key, value, key_cache, value_cache,
                                     input_metadata)
+
+
+@lru_cache(maxsize=1)
+def _use_flash_attn() -> bool:
+    try:
+        import flash_attn  # noqa: F401
+    except ImportError:
+        logger.info("flash_attn is not found. Using xformers backend.")
+        return False
+
+    if is_hip():
+        # AMD GPUs.
+        return False
+    if torch.cuda.get_device_capability()[0] < 8:
+        # Volta and Turing NVIDIA GPUs.
+        logger.info("flash_attn is not supported on Turing or older GPUs. "
+                    "Using xformers backend.")
+        return False
+    if torch.get_default_dtype() not in (torch.float16, torch.bfloat16):
+        logger.info(
+            "flash_attn only supports torch.float16 or torch.bfloat16. "
+            "Using xformers backend.")
+        return False
+
+    logger.info("Using flash_attn backend.")
+    return True
diff --git a/vllm/model_executor/layers/attention/backends/flash_attn.py b/vllm/model_executor/layers/attention/backends/flash_attn.py
index 512f4e49c7eb2..4abe195f274a7 100644
--- a/vllm/model_executor/layers/attention/backends/flash_attn.py
+++ b/vllm/model_executor/layers/attention/backends/flash_attn.py
@@ -1,7 +1,6 @@
 """Attention layer with Flash and PagedAttention."""
 from typing import List, Optional
 
-# NOTE(woosuk): This imports flash_attn under vllm/thirdparty_files/.
 from flash_attn import flash_attn_func
 import torch
 

From c2c5e0909ad4457ad542117939c2629ebe2db609 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Fri, 8 Mar 2024 13:33:10 -0800
Subject: [PATCH 073/196] Move model filelocks from `/tmp/` to
 `~/.cache/vllm/locks/` dir (#3241)

---
 vllm/model_executor/weight_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py
index 3570366887e78..24d78db8d2637 100644
--- a/vllm/model_executor/weight_utils.py
+++ b/vllm/model_executor/weight_utils.py
@@ -20,6 +20,9 @@
 
 logger = init_logger(__name__)
 
+_xdg_cache_home = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
+_vllm_filelocks_path = os.path.join(_xdg_cache_home, 'vllm/locks/')
+
 
 class Disabledtqdm(tqdm):
 
@@ -28,7 +31,8 @@ def __init__(self, *args, **kwargs):
 
 
 def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
-    lock_dir = cache_dir if cache_dir is not None else "/tmp"
+    lock_dir = cache_dir if cache_dir is not None else _vllm_filelocks_path
+    os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
     lock_file_name = model_name_or_path.replace("/", "-") + ".lock"
     lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name))
     return lock

From f48c6791b7bfc2579ad575d33ed83912f0bfb011 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Fri, 8 Mar 2024 17:16:14 -0800
Subject: [PATCH 074/196] [FIX] Fix prefix test error on main (#3286)

---
 vllm/model_executor/layers/attention/backends/flash_attn.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vllm/model_executor/layers/attention/backends/flash_attn.py b/vllm/model_executor/layers/attention/backends/flash_attn.py
index 4abe195f274a7..58ccd461b993e 100644
--- a/vllm/model_executor/layers/attention/backends/flash_attn.py
+++ b/vllm/model_executor/layers/attention/backends/flash_attn.py
@@ -103,8 +103,6 @@ def forward(
                     key_cache,
                     value_cache,
                     input_metadata,
-                    self.num_heads,
-                    self.num_kv_heads,
                     self.alibi_slopes,
                 )
         else:

From 8437bae6ef47a690d18c72f0da02c7e5abe83866 Mon Sep 17 00:00:00 2001
From: Cade Daniel <edacih@gmail.com>
Date: Fri, 8 Mar 2024 23:32:46 -0800
Subject: [PATCH 075/196] [Speculative decoding 3/9] Worker which speculates,
 scores, and applies rejection sampling (#3103)

---
 .buildkite/test-pipeline.yaml                 |   5 +-
 tests/{worker => }/spec_decode/__init__.py    |   0
 tests/spec_decode/test_batch_expansion.py     |  95 +++
 tests/spec_decode/test_metrics.py             | 157 +++++
 .../spec_decode/test_multi_step_worker.py     | 162 ++++-
 tests/spec_decode/test_spec_decode_worker.py  | 591 ++++++++++++++++++
 tests/spec_decode/test_utils.py               | 111 ++++
 tests/{worker => }/spec_decode/utils.py       | 115 +++-
 tests/test_sequence.py                        |  50 ++
 .../layers/rejection_sampler.py               |  10 +-
 vllm/model_executor/layers/sampler.py         |   2 +-
 vllm/sequence.py                              |  55 +-
 vllm/spec_decode/batch_expansion.py           | 351 +++++++++++
 vllm/spec_decode/interfaces.py                |  77 +++
 vllm/spec_decode/metrics.py                   | 174 ++++++
 vllm/spec_decode/multi_step_worker.py         | 366 +++++++++++
 vllm/spec_decode/spec_decode_worker.py        | 372 +++++++++++
 vllm/spec_decode/util.py                      |  99 +++
 vllm/worker/model_runner.py                   |  11 +-
 vllm/worker/spec_decode/multi_step_worker.py  | 178 ------
 vllm/worker/worker.py                         |  20 +-
 21 files changed, 2786 insertions(+), 215 deletions(-)
 rename tests/{worker => }/spec_decode/__init__.py (100%)
 create mode 100644 tests/spec_decode/test_batch_expansion.py
 create mode 100644 tests/spec_decode/test_metrics.py
 rename tests/{worker => }/spec_decode/test_multi_step_worker.py (61%)
 create mode 100644 tests/spec_decode/test_spec_decode_worker.py
 create mode 100644 tests/spec_decode/test_utils.py
 rename tests/{worker => }/spec_decode/utils.py (60%)
 create mode 100644 tests/test_sequence.py
 create mode 100644 vllm/spec_decode/batch_expansion.py
 create mode 100644 vllm/spec_decode/interfaces.py
 create mode 100644 vllm/spec_decode/metrics.py
 create mode 100644 vllm/spec_decode/multi_step_worker.py
 create mode 100644 vllm/spec_decode/spec_decode_worker.py
 create mode 100644 vllm/spec_decode/util.py
 delete mode 100644 vllm/worker/spec_decode/multi_step_worker.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 15f971b66e3bd..42a1eacb6de57 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -28,7 +28,7 @@ steps:
   num_gpus: 2 # only support 1 or 2 for now.
 
 - label: Engine Test
-  command: pytest -v -s engine
+  command: pytest -v -s engine test_sequence.py
 
 - label: Entrypoints Test
   command: pytest -v -s entrypoints
@@ -52,6 +52,9 @@ steps:
 - label: Worker Test
   command: pytest -v -s worker
 
+- label: Speculative decoding tests
+  command: pytest -v -s spec_decode
+
 - label: LoRA Test
   command: pytest -v -s lora --forked
 
diff --git a/tests/worker/spec_decode/__init__.py b/tests/spec_decode/__init__.py
similarity index 100%
rename from tests/worker/spec_decode/__init__.py
rename to tests/spec_decode/__init__.py
diff --git a/tests/spec_decode/test_batch_expansion.py b/tests/spec_decode/test_batch_expansion.py
new file mode 100644
index 0000000000000..fddc3995452cc
--- /dev/null
+++ b/tests/spec_decode/test_batch_expansion.py
@@ -0,0 +1,95 @@
+import torch
+import pytest
+
+from vllm.spec_decode.batch_expansion import BatchExpansionTop1Scorer
+
+from .utils import mock_worker, create_seq_group_metadata_from_prompts
+
+
+@pytest.mark.parametrize('num_target_seq_ids', [100])
+def test_create_target_seq_id_iterator(num_target_seq_ids: int):
+    """Verify all new sequence ids are greater than all input
+    seq ids.
+    """
+    scorer = BatchExpansionTop1Scorer(mock_worker(), 'cuda:0', 32_000)
+
+    all_seq_ids = [
+        [1, 3, 5, 7],
+        list(range(100)) + [0],
+        [100],
+    ]
+
+    for seq_ids in all_seq_ids:
+        max_seq_id = max(seq_ids)
+        iterator = scorer._create_target_seq_id_iterator(seq_ids)  # pylint: disable=protected-access
+        for _ in range(num_target_seq_ids):
+            assert next(iterator) > max_seq_id
+
+
+@pytest.mark.parametrize('k', [1, 2, 6])
+def test_get_token_ids_to_score(k: int):
+    """Verify correct tokens are selected for scoring.
+    """
+    proposal_token_ids = torch.tensor(
+        list(range(k)),
+        dtype=torch.int64,
+        device='cuda',
+    )
+
+    expected_output = [
+        [],
+    ]
+    for i in range(proposal_token_ids.shape[0]):
+        expected_output.append(proposal_token_ids[:i + 1].tolist())
+
+    scorer = BatchExpansionTop1Scorer(mock_worker(), 'cuda:0', 32_000)
+    actual_output = scorer._get_token_ids_to_score(proposal_token_ids)  # pylint: disable=protected-access
+
+    actual_output = [
+        x.tolist() if isinstance(x, torch.Tensor) else x for x in actual_output
+    ]
+
+    assert actual_output == expected_output
+
+
+@pytest.mark.parametrize('k', [1, 2, 6])
+def test_create_single_target_seq_group_metadata(k: int):
+    """Verify correct creation of a batch-expanded seq group metadata.
+    """
+
+    prompt_tokens = [1, 2, 3]
+    prev_output_tokens = [4, 5, 6]
+
+    token_ids = list(range(k))
+
+    num_tokens_processed = len(prompt_tokens) + len(prev_output_tokens) - 1
+
+    final_seq_len = len(prompt_tokens) + len(prev_output_tokens) + len(
+        token_ids)
+
+    block_size = 32
+    input_seq_group_metadata = create_seq_group_metadata_from_prompts(
+        [prompt_tokens], 2048 // block_size, block_size, [final_seq_len],
+        [prev_output_tokens], [num_tokens_processed])[0]
+
+    input_seq_id = list(input_seq_group_metadata.seq_data.keys())[0]
+    target_seq_id = 100
+
+    scorer = BatchExpansionTop1Scorer(mock_worker(), 'cuda:0', 32_000)
+    output = scorer._create_single_target_seq_group_metadata(  # pylint: disable=protected-access
+        input_seq_group_metadata,
+        input_seq_id,
+        target_seq_id,
+        token_ids,
+    )
+
+    assert output.request_id == input_seq_group_metadata.request_id
+    assert len(output.seq_data) == 1
+    assert output.seq_data[target_seq_id].get_prompt_token_ids(
+    ) == prompt_tokens
+    assert output.seq_data[target_seq_id].get_output_token_ids(
+    ) == prev_output_tokens + token_ids
+
+    assert len(output.block_tables) == 1
+    assert output.block_tables[
+        target_seq_id] == input_seq_group_metadata.block_tables[input_seq_id]
diff --git a/tests/spec_decode/test_metrics.py b/tests/spec_decode/test_metrics.py
new file mode 100644
index 0000000000000..941ea37aa81e0
--- /dev/null
+++ b/tests/spec_decode/test_metrics.py
@@ -0,0 +1,157 @@
+import torch
+import math
+import pytest
+
+from unittest.mock import MagicMock
+
+from vllm.spec_decode.metrics import AsyncMetricsCollector
+
+
+def test_initial_call_returns_none():
+    """Expect first call to get metrics to return None.
+    """
+    rej_sampler = MagicMock()
+    rej_sampler.num_accepted_tokens = torch.tensor(0,
+                                                   dtype=torch.long,
+                                                   device='cuda')
+    rej_sampler.num_emitted_tokens = torch.tensor(0,
+                                                  dtype=torch.long,
+                                                  device='cuda')
+    rej_sampler.num_draft_tokens = 0
+
+    collector = AsyncMetricsCollector(rej_sampler)
+    collector.init_gpu_tensors(rank=0)
+    maybe_metrics = collector.maybe_collect_rejsample_metrics(k=5)
+    assert maybe_metrics is None
+
+
+def test_second_call_returns_metrics():
+    """Expect second call to not return None.
+    """
+    rej_sampler = MagicMock()
+    rej_sampler.num_accepted_tokens = torch.tensor(0,
+                                                   dtype=torch.long,
+                                                   device='cuda')
+    rej_sampler.num_emitted_tokens = torch.tensor(0,
+                                                  dtype=torch.long,
+                                                  device='cuda')
+    rej_sampler.num_draft_tokens = 0
+
+    collect_interval_s = 5.0
+    timer = MagicMock()
+    timer.side_effect = [
+        0.0, collect_interval_s + 0.1, collect_interval_s + 0.2
+    ]
+
+    collector = AsyncMetricsCollector(rejection_sampler=rej_sampler,
+                                      timer=timer,
+                                      collect_interval_s=collect_interval_s)
+    collector.init_gpu_tensors(rank=0)
+    _ = collector.maybe_collect_rejsample_metrics(k=5)
+    metrics = collector.maybe_collect_rejsample_metrics(k=5)
+    assert metrics is not None
+
+
+@pytest.mark.parametrize("rank", [1, 2, 3, 4])
+def test_nonzero_rank_noop(rank):
+    """Verify nonzero ranks don't collect metrics.
+    """
+    rej_sampler = MagicMock()
+    rej_sampler.num_accepted_tokens = torch.tensor(0,
+                                                   dtype=torch.long,
+                                                   device='cuda')
+    rej_sampler.num_emitted_tokens = torch.tensor(0,
+                                                  dtype=torch.long,
+                                                  device='cuda')
+    rej_sampler.num_draft_tokens = 0
+
+    collector = AsyncMetricsCollector(rej_sampler)
+    collector.init_gpu_tensors(rank=rank)
+    _ = collector.maybe_collect_rejsample_metrics(k=5)
+    metrics = collector.maybe_collect_rejsample_metrics(k=5)
+    assert metrics is None
+
+
+def test_noop_until_time():
+    """Verify metrics aren't collected until enough time passes.
+    """
+    rej_sampler = MagicMock()
+    rej_sampler.num_accepted_tokens = torch.tensor(0,
+                                                   dtype=torch.long,
+                                                   device='cuda')
+    rej_sampler.num_emitted_tokens = torch.tensor(0,
+                                                  dtype=torch.long,
+                                                  device='cuda')
+    rej_sampler.num_draft_tokens = 0
+
+    collect_interval_s = 5.0
+    timer = MagicMock()
+    timer.side_effect = [
+        0.0, collect_interval_s - 0.1, collect_interval_s - 0.1,
+        collect_interval_s + 0.1, collect_interval_s + 0.1
+    ]
+
+    collector = AsyncMetricsCollector(rejection_sampler=rej_sampler,
+                                      timer=timer,
+                                      collect_interval_s=collect_interval_s)
+    collector.init_gpu_tensors(rank=0)
+
+    _ = collector.maybe_collect_rejsample_metrics(k=5)
+    metrics = collector.maybe_collect_rejsample_metrics(k=5)
+    assert metrics is None
+
+    _ = collector.maybe_collect_rejsample_metrics(k=5)
+    metrics = collector.maybe_collect_rejsample_metrics(k=5)
+    assert metrics is not None
+
+
+@pytest.mark.parametrize("has_data", [True, False])
+def test_initial_metrics_has_correct_values(has_data: bool):
+    """Test correctness of metrics data.
+    """
+    if has_data:
+        num_accepted_tokens = 103
+        num_emitted_tokens = 104
+        num_draft_tokens = 105
+    else:
+        num_accepted_tokens = 0
+        num_emitted_tokens = 0
+        num_draft_tokens = 0
+    k = 5
+
+    num_possible_tokens = AsyncMetricsCollector.get_max_num_accepted_tokens(
+        num_draft_tokens, k)
+
+    rej_sampler = MagicMock()
+    rej_sampler.num_accepted_tokens = torch.tensor(num_accepted_tokens,
+                                                   dtype=torch.long,
+                                                   device='cuda')
+    rej_sampler.num_emitted_tokens = torch.tensor(num_emitted_tokens,
+                                                  dtype=torch.long,
+                                                  device='cuda')
+    rej_sampler.num_draft_tokens = num_draft_tokens
+
+    collect_interval_s = 5.0
+    timer = MagicMock()
+    timer.side_effect = [
+        0.0, collect_interval_s + 0.1, collect_interval_s + 0.2
+    ]
+
+    collector = AsyncMetricsCollector(rejection_sampler=rej_sampler,
+                                      timer=timer,
+                                      collect_interval_s=collect_interval_s)
+    collector.init_gpu_tensors(rank=0)
+    _ = collector.maybe_collect_rejsample_metrics(k)
+    metrics = collector.maybe_collect_rejsample_metrics(k)
+
+    assert metrics.num_spec_tokens == k
+    assert metrics.accepted_tokens == num_accepted_tokens
+    assert metrics.draft_tokens == num_draft_tokens
+    assert metrics.emitted_tokens == num_emitted_tokens
+
+    if has_data:
+        assert metrics.draft_acceptance_rate == num_accepted_tokens / num_draft_tokens
+        assert metrics.system_efficiency == num_emitted_tokens / num_possible_tokens
+    else:
+        assert math.isnan(metrics.draft_acceptance_rate)
+        assert math.isnan(metrics.system_efficiency)
diff --git a/tests/worker/spec_decode/test_multi_step_worker.py b/tests/spec_decode/test_multi_step_worker.py
similarity index 61%
rename from tests/worker/spec_decode/test_multi_step_worker.py
rename to tests/spec_decode/test_multi_step_worker.py
index ea54802903578..88bb7c293fe95 100644
--- a/tests/worker/spec_decode/test_multi_step_worker.py
+++ b/tests/spec_decode/test_multi_step_worker.py
@@ -3,14 +3,15 @@
 import pytest
 from unittest.mock import MagicMock
 
-from vllm.worker.spec_decode.multi_step_worker import MultiStepWorker
+from vllm.spec_decode.multi_step_worker import MultiStepWorker, DraftModelTop1Proposer
 from vllm.worker.worker import Worker
 from vllm.model_executor.utils import set_random_seed
+from vllm.sequence import SamplerOutput
 
 from .utils import (create_execute_model_data, create_worker,
                     create_seq_group_metadata_from_prompts, zero_kv_cache,
                     patch_execute_model_with_seeds,
-                    assert_logprobs_dict_allclose)
+                    assert_logprobs_dict_allclose, create_batch)
 
 
 @pytest.mark.parametrize('num_steps', list(range(1, 17)))
@@ -259,3 +260,160 @@ def test_same_output_for_multi_step():
             multi_step_output_logprobs, single_step_output_logprobs):
         assert_logprobs_dict_allclose(multi_step_logprobs,
                                       single_step_logprobs)
+
+
+@torch.inference_mode()
+def test_draft_proposals_full_speculation_len():
+    """Verify DraftModelTop1Proposer correctly handles case where all sequences
+    can speculate.
+    """
+    k = 10
+    batch_size = 32
+    vocab_size = 32_000
+    device = 'cuda:0'
+
+    draft_worker = MagicMock()
+    proposer = DraftModelTop1Proposer(
+        draft_worker=draft_worker,
+        device=device,
+        max_model_len=2048,
+        vocab_size=vocab_size,
+    )
+    draft_worker.execute_model_multi_step.return_value = [
+        SamplerOutput(
+            outputs=[],
+            sampled_token_probs=torch.rand(batch_size,
+                                           vocab_size,
+                                           device=device,
+                                           dtype=torch.float32),
+            sampled_token_ids=torch.randint(low=0,
+                                            high=vocab_size,
+                                            size=(batch_size, ),
+                                            device=device,
+                                            dtype=torch.long),
+        ) for _ in range(k)
+    ]
+
+    execute_model_data, _, _ = create_batch(batch_size, k)
+
+    proposals = proposer.get_proposals(
+        **execute_model_data.to_dict(),
+        max_proposal_len=k,
+    )
+
+    assert torch.is_tensor(proposals.proposal_token_ids)
+    assert torch.is_tensor(proposals.proposal_probs)
+
+    assert proposals.proposal_token_ids.shape == torch.Size([batch_size, k])
+    assert proposals.proposal_probs.shape[:-1] == torch.Size([batch_size, k])
+
+    assert proposals.proposal_lens.shape == torch.Size([batch_size])
+    assert proposals.proposal_lens.tolist() == [k for _ in range(batch_size)]
+
+
+@torch.inference_mode()
+def test_draft_proposals_no_speculations():
+    """Verify DraftModelTop1Proposer correctly handles case where no sequences
+    can speculate.
+    """
+    k = 10
+    batch_size = 32
+    vocab_size = 32_000
+    device = 'cuda:0'
+    prompt_len = 10
+
+    draft_worker = MagicMock()
+    proposer = DraftModelTop1Proposer(
+        draft_worker=draft_worker,
+        device=device,
+        max_model_len=prompt_len + k - 1,
+        vocab_size=vocab_size,
+    )
+
+    execute_model_data, _, _ = create_batch(batch_size,
+                                            k,
+                                            prompt_len=prompt_len)
+
+    proposals = proposer.get_proposals(
+        **execute_model_data.to_dict(),
+        max_proposal_len=k,
+    )
+
+    assert torch.is_tensor(proposals.proposal_token_ids)
+    assert torch.is_tensor(proposals.proposal_probs)
+
+    assert proposals.proposal_token_ids.shape == torch.Size([0, k])
+    assert proposals.proposal_probs.shape[:-1] == torch.Size([0, k])
+
+    assert proposals.proposal_lens.shape == torch.Size([batch_size])
+    assert proposals.proposal_lens.tolist() == [0 for _ in range(batch_size)]
+
+
+@torch.inference_mode()
+def test_draft_proposals_mixed_k():
+    """Verify DraftModelTop1Proposer correctly handles case some sequences can
+    speculate and some can't.
+    """
+    k = 10
+    batch_size = 32
+    vocab_size = 32_000
+    device = 'cuda:0'
+
+    small_prompt_len = 5
+    long_prompt_len = 10
+    prev_output_token_len = 20
+
+    expected_num_proposal_seqs = 6
+    expected_num_no_proposal_seqs = batch_size - expected_num_proposal_seqs
+
+    prompt_len = [
+        small_prompt_len for _ in range(expected_num_proposal_seqs - 1)
+    ] + [long_prompt_len
+         for _ in range(expected_num_no_proposal_seqs)] + [small_prompt_len]
+
+    draft_worker = MagicMock()
+    proposer = DraftModelTop1Proposer(
+        draft_worker=draft_worker,
+        device=device,
+        max_model_len=long_prompt_len + prev_output_token_len + k - 1,
+        vocab_size=vocab_size,
+    )
+
+    draft_worker.execute_model_multi_step.return_value = [
+        SamplerOutput(
+            outputs=[],
+            sampled_token_probs=torch.rand(expected_num_proposal_seqs,
+                                           vocab_size,
+                                           device=device,
+                                           dtype=torch.float32),
+            sampled_token_ids=torch.randint(
+                low=0,
+                high=vocab_size,
+                size=(expected_num_proposal_seqs, ),
+                device=device,
+                dtype=torch.long),
+        ) for _ in range(k)
+    ]
+
+    execute_model_data, _, _ = create_batch(
+        batch_size,
+        k,
+        prompt_len=prompt_len,
+        prev_output_token_len=prev_output_token_len,
+    )
+
+    proposals = proposer.get_proposals(
+        **execute_model_data.to_dict(),
+        max_proposal_len=k,
+    )
+
+    assert torch.is_tensor(proposals.proposal_token_ids)
+    assert torch.is_tensor(proposals.proposal_probs)
+
+    assert proposals.proposal_token_ids.shape == torch.Size([batch_size, k])
+    assert proposals.proposal_probs.shape[:-1] == torch.Size([batch_size, k])
+
+    assert proposals.proposal_lens.shape == torch.Size([batch_size])
+    assert proposals.proposal_lens.tolist() == [
+        k for _ in range(expected_num_proposal_seqs - 1)
+    ] + [0 for _ in range(expected_num_no_proposal_seqs)] + [k]
diff --git a/tests/spec_decode/test_spec_decode_worker.py b/tests/spec_decode/test_spec_decode_worker.py
new file mode 100644
index 0000000000000..e919711c3ed2c
--- /dev/null
+++ b/tests/spec_decode/test_spec_decode_worker.py
@@ -0,0 +1,591 @@
+import torch
+import random
+import pytest
+from unittest.mock import MagicMock
+
+from vllm.spec_decode.multi_step_worker import MultiStepWorker
+from vllm.spec_decode.spec_decode_worker import SpecDecodeWorker, split_num_cache_blocks_evenly
+from vllm.spec_decode.interfaces import SpeculativeProposals
+from vllm.model_executor.utils import set_random_seed
+from vllm.model_executor.layers.rejection_sampler import RejectionSampler
+from .utils import mock_worker, create_batch, ExecuteModelData, create_sampler_output_list
+from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics, AsyncMetricsCollector
+
+
+@pytest.mark.parametrize('k', [1, 2, 6])
+@pytest.mark.parametrize('batch_size', [1, 2, 32])
+@torch.inference_mode()
+def test_correctly_calls_draft_model(k: int, batch_size: int):
+    """Verify SpecDecodeWorker calls the draft worker with correct
+    inputs. Everything else is mocked out.
+    """
+    draft_worker = mock_worker(cls=MultiStepWorker)
+    target_worker = mock_worker()
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+
+    exception_secret = 'artifical stop'
+    draft_worker.get_spec_proposals.side_effect = ValueError(exception_secret)
+
+    execute_model_data, _, _ = create_batch(batch_size, k)
+
+    with pytest.raises(ValueError, match=exception_secret):
+        worker.execute_model(**execute_model_data.to_dict(), num_spec_tokens=k)
+
+    call_args_list = draft_worker.get_spec_proposals.call_args_list
+    assert len(call_args_list) == 1
+
+    for args, _ in call_args_list:
+        (seq_group_metadata_list, blocks_to_swap_in, blocks_to_swap_out,
+         blocks_to_copy, actual_k) = args
+        actual_execute_model_data = ExecuteModelData(seq_group_metadata_list,
+                                                     blocks_to_swap_in,
+                                                     blocks_to_swap_out,
+                                                     blocks_to_copy)
+        assert actual_execute_model_data == execute_model_data
+        assert actual_k == k
+
+
+@pytest.mark.parametrize('k', [1, 2, 6])
+@pytest.mark.parametrize('batch_size', [1, 2, 32])
+@torch.inference_mode()
+def test_correctly_calls_target_model(k: int, batch_size: int):
+    """Verify SpecDecodeWorker calls the target model with correct
+    inputs. Everything else is mocked out.
+    """
+    draft_worker = mock_worker(cls=MultiStepWorker)
+    target_worker = mock_worker()
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+
+    draft_worker.device = 'cuda'
+    target_worker.device = 'cuda'
+
+    set_random_seed(1)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+    worker.init_model()
+
+    vocab_size = 32_000
+
+    proposal_token_ids = torch.randint(low=0,
+                                       high=vocab_size,
+                                       size=(batch_size, k),
+                                       dtype=torch.int64,
+                                       device='cuda')
+    proposal_probs = torch.rand(batch_size,
+                                k,
+                                vocab_size,
+                                dtype=torch.float32,
+                                device='cuda')
+    proposal_lens = torch.ones(batch_size, dtype=torch.int64,
+                               device='cuda') * k
+
+    execute_model_data, prompts, prev_output_tokens = create_batch(
+        batch_size, k)
+
+    draft_worker.get_spec_proposals.return_value = SpeculativeProposals(
+        proposal_token_ids=proposal_token_ids,
+        proposal_probs=proposal_probs,
+        proposal_lens=proposal_lens)
+
+    exception_secret = 'artifical stop'
+    target_worker.execute_model.side_effect = ValueError(exception_secret)
+
+    with pytest.raises(ValueError, match=exception_secret):
+        worker.execute_model(**execute_model_data.to_dict(), num_spec_tokens=k)
+
+    seen_contexts = []
+
+    call_args_list = target_worker.execute_model.call_args_list
+    assert len(call_args_list) == 1
+    for args, kwargs in call_args_list:
+        target_execute_model_data = ExecuteModelData.from_dict(kwargs)
+
+        assert len(target_execute_model_data.seq_group_metadata_list) == (
+            k + 1) * batch_size
+        for seq_group_metadata in (
+                target_execute_model_data.seq_group_metadata_list):
+            for seq_data in seq_group_metadata.seq_data.values():
+                seen_contexts.append(seq_data.get_token_ids())
+
+    expected_seen_contexts = []
+
+    for prompt, prev_generated, draft_tokens in zip(
+            prompts, prev_output_tokens, proposal_token_ids.tolist()):
+
+        for i in range(len(draft_tokens) + 1):
+            expected_seen_contexts.append(prompt + prev_generated +
+                                          draft_tokens[:i])
+
+    seen_contexts.sort()
+    expected_seen_contexts.sort()
+    assert expected_seen_contexts == seen_contexts
+
+
+@pytest.mark.parametrize('k', [1, 2, 6])
+@pytest.mark.parametrize('batch_size', [1, 2, 32])
+@torch.inference_mode()
+def test_correctly_calls_rejection_sampler(k: int, batch_size: int):
+    """Verify SpecDecodeWorker calls the rejection sampler with
+    correct inputs. Everything else is mocked out.
+    """
+    vocab_size = 32_000
+
+    draft_worker = mock_worker(cls=MultiStepWorker, vocab_size=vocab_size)
+    target_worker = mock_worker(vocab_size=vocab_size)
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+    draft_worker.device = 'cuda'
+    target_worker.device = 'cuda'
+
+    set_random_seed(1)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+    worker.init_model()
+
+    proposal_token_ids = torch.randint(low=0,
+                                       high=vocab_size,
+                                       size=(batch_size, k),
+                                       dtype=torch.int64,
+                                       device='cuda')
+    proposal_probs = torch.rand(batch_size,
+                                k,
+                                vocab_size,
+                                dtype=torch.float32,
+                                device='cuda')
+
+    proposal_lens = torch.ones(batch_size, dtype=torch.int64,
+                               device='cuda') * k
+
+    execute_model_data, _, _ = create_batch(batch_size, k)
+
+    draft_worker.get_spec_proposals.return_value = SpeculativeProposals(
+        proposal_token_ids=proposal_token_ids,
+        proposal_probs=proposal_probs,
+        proposal_lens=proposal_lens)
+
+    target_token_ids = torch.randint(low=0,
+                                     high=vocab_size,
+                                     size=(1, batch_size * (k + 1)),
+                                     dtype=torch.int64,
+                                     device='cuda')
+    target_token_probs = torch.rand(1,
+                                    batch_size * (k + 1),
+                                    vocab_size,
+                                    dtype=torch.float32,
+                                    device='cuda')
+    target_output = create_sampler_output_list(target_token_ids,
+                                               target_token_probs)
+
+    target_worker.execute_model.return_value = target_output[0]
+
+    exception_secret = 'artifical stop'
+    rejection_sampler.side_effect = ValueError(exception_secret)
+
+    with pytest.raises(ValueError, match=exception_secret):
+        worker.execute_model(**execute_model_data.to_dict(), num_spec_tokens=k)
+
+    assert len(rejection_sampler.call_args_list) == 1
+    args, _ = rejection_sampler.call_args_list[0]
+    (actual_proposal_scores, actual_bonus_token_ids, actual_proposal_probs,
+     actual_proposal_token_ids) = args
+
+    assert torch.equal(actual_bonus_token_ids,
+                       target_token_ids.reshape(batch_size, k + 1)[:, -1:])
+    assert torch.equal(
+        actual_proposal_scores,
+        target_token_probs.reshape(batch_size, k + 1, -1)[:, :-1])
+    assert torch.equal(actual_proposal_token_ids, proposal_token_ids)
+    assert torch.equal(actual_proposal_probs, proposal_probs)
+
+
+@pytest.mark.parametrize('k', [1, 2, 6])
+@pytest.mark.parametrize('batch_size', [1, 2, 32])
+@torch.inference_mode()
+def test_correctly_formats_output(k: int, batch_size: int):
+    """Verify SpecDecodeWorker formats sampler output correctly.
+    Everything else is mocked out.
+    """
+    vocab_size = 32_000
+
+    draft_worker = mock_worker(cls=MultiStepWorker, vocab_size=vocab_size)
+    target_worker = mock_worker(vocab_size=vocab_size)
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+    draft_worker.device = 'cuda'
+    target_worker.device = 'cuda'
+
+    set_random_seed(1)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+    worker.init_model()
+
+    proposal_token_ids = torch.randint(low=0,
+                                       high=vocab_size,
+                                       size=(batch_size, k),
+                                       dtype=torch.int64,
+                                       device='cuda')
+    proposal_probs = torch.rand(batch_size,
+                                k,
+                                vocab_size,
+                                dtype=torch.float32,
+                                device='cuda')
+
+    proposal_lens = torch.ones(batch_size, dtype=torch.int64,
+                               device='cuda') * k
+
+    execute_model_data, _, _ = create_batch(batch_size, k)
+
+    draft_worker.get_spec_proposals.return_value = SpeculativeProposals(
+        proposal_token_ids=proposal_token_ids,
+        proposal_probs=proposal_probs,
+        proposal_lens=proposal_lens)
+
+    target_token_ids = torch.randint(low=0,
+                                     high=vocab_size,
+                                     size=(1, batch_size * (k + 1)),
+                                     dtype=torch.int64,
+                                     device='cuda')
+    target_token_probs = torch.rand(1,
+                                    batch_size * (k + 1),
+                                    vocab_size,
+                                    dtype=torch.float32,
+                                    device='cuda')
+    target_output = create_sampler_output_list(target_token_ids,
+                                               target_token_probs)
+
+    target_worker.execute_model.return_value = target_output[0]
+
+    rejection_sampler_output = torch.randint(low=0,
+                                             high=vocab_size,
+                                             size=(batch_size, k + 1),
+                                             dtype=torch.int64,
+                                             device='cuda')
+    for i in range(batch_size):
+        minimum_accepted_tokens = 1
+        rejection_sampler_output[i][
+            -random.randint(minimum_accepted_tokens, k + 1):] = -1
+
+    rejection_sampler.return_value = rejection_sampler_output
+
+    output = worker.execute_model(**execute_model_data.to_dict(),
+                                  num_spec_tokens=k)
+
+    expected_output = create_sampler_output_list(
+        rejection_sampler_output.transpose(0, 1), [None for _ in range(k + 1)])
+
+    seq_ids = [
+        next(iter(seq_group_metadata.seq_data.keys()))
+        for seq_group_metadata in execute_model_data.seq_group_metadata_list
+    ]
+    actual_output_by_seq = {seq_id: [] for seq_id in seq_ids}
+    expected_output_by_seq = {seq_id: [] for seq_id in seq_ids}
+
+    for step in output:
+        for seq_group in step:
+            for sample in seq_group.samples:
+                seq_id = sample.parent_seq_id
+                actual_output_by_seq[seq_id].append(sample)
+
+    for step in expected_output:
+        for seq_group in step:
+            for sample in seq_group.samples:
+                seq_id = sample.parent_seq_id
+                expected_output_by_seq[seq_id].append(sample)
+
+    all_seen_seq_ids = set(
+        list(actual_output_by_seq.keys()) +
+        list(expected_output_by_seq.keys()))
+    for seq_id in all_seen_seq_ids:
+        actual_by_step = actual_output_by_seq[seq_id]
+        expected_by_step = expected_output_by_seq[seq_id]
+
+        for i in range(k + 1):
+            if i >= len(actual_by_step):
+                assert expected_by_step[i].output_token == -1
+                continue
+            assert actual_by_step[i].output_token == expected_by_step[
+                i].output_token
+            assert actual_by_step[i].logprobs == expected_by_step[i].logprobs
+
+
+@pytest.mark.parametrize('k', [1, 2])
+@pytest.mark.parametrize('batch_size', [1])
+@pytest.mark.parametrize('returns_metrics', [True, False])
+@torch.inference_mode()
+def test_collects_metrics(k: int, batch_size: int, returns_metrics: bool):
+    """Verify SpecDecodeWorker collects metrics.
+    """
+    vocab_size = 32_000
+
+    draft_worker = mock_worker(cls=MultiStepWorker, vocab_size=vocab_size)
+    target_worker = mock_worker(vocab_size=vocab_size)
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+    draft_worker.device = 'cuda'
+    target_worker.device = 'cuda'
+
+    set_random_seed(1)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+    worker.init_model()
+
+    proposal_token_ids = torch.randint(low=0,
+                                       high=vocab_size,
+                                       size=(batch_size, k),
+                                       dtype=torch.int64,
+                                       device='cuda')
+    proposal_probs = torch.rand(batch_size,
+                                k,
+                                vocab_size,
+                                dtype=torch.float32,
+                                device='cuda')
+
+    proposal_lens = torch.ones(batch_size, dtype=torch.int64,
+                               device='cuda') * k
+
+    execute_model_data, _, _ = create_batch(batch_size, k)
+
+    draft_worker.get_spec_proposals.return_value = SpeculativeProposals(
+        proposal_token_ids=proposal_token_ids,
+        proposal_probs=proposal_probs,
+        proposal_lens=proposal_lens)
+
+    target_token_ids = torch.randint(low=0,
+                                     high=vocab_size,
+                                     size=(1, batch_size * (k + 1)),
+                                     dtype=torch.int64,
+                                     device='cuda')
+    target_token_probs = torch.rand(1,
+                                    batch_size * (k + 1),
+                                    vocab_size,
+                                    dtype=torch.float32,
+                                    device='cuda')
+    target_output = create_sampler_output_list(target_token_ids,
+                                               target_token_probs)
+
+    target_worker.execute_model.return_value = target_output[0]
+
+    rejection_sampler_output = torch.randint(low=0,
+                                             high=vocab_size,
+                                             size=(batch_size, k + 1),
+                                             dtype=torch.int64,
+                                             device='cuda')
+    for i in range(batch_size):
+        minimum_accepted_tokens = 1
+        rejection_sampler_output[i][
+            -random.randint(minimum_accepted_tokens, k + 1):] = -1
+
+    rejection_sampler.return_value = rejection_sampler_output
+
+    mock_rejsample_metrics = MagicMock(
+        spec=SpecDecodeWorkerMetrics) if returns_metrics else None
+    metrics_collector.maybe_collect_rejsample_metrics.return_value = mock_rejsample_metrics
+
+    output = worker.execute_model(**execute_model_data.to_dict(),
+                                  num_spec_tokens=k)
+    assert output[0].spec_decode_worker_metrics == mock_rejsample_metrics
+
+    call_args_list = metrics_collector.maybe_collect_rejsample_metrics.call_args_list
+    assert len(call_args_list) == 1
+    args, kwargs = call_args_list[0]
+    assert args[0] == k or kwargs.get('k', -1) == k
+
+
+@pytest.mark.parametrize('k', [0])
+@pytest.mark.parametrize('batch_size', [1, 2, 32])
+@torch.inference_mode()
+def test_k_equals_zero(k: int, batch_size: int):
+    """Verify that the SpecDecodeWorker calls the draft and target workers
+    when k is zero. This happens during prefill.
+    """
+    draft_worker = mock_worker(cls=MultiStepWorker)
+    target_worker = mock_worker()
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+
+    draft_worker.device = 'cuda'
+    target_worker.device = 'cuda'
+
+    set_random_seed(1)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+
+    execute_model_data, prompts, prev_output_tokens = create_batch(
+        batch_size, k, prev_output_token_len=0)
+
+    out = worker.execute_model(**execute_model_data.to_dict(),
+                               num_spec_tokens=k)
+
+    assert len(out) == 1, f"expected only one token output when {k=}"
+    assert out[0].probs is None, "expect gpu tensor references to be None"
+    assert out[
+        0].sampled_tokens is None, "expect gpu tensor references to be None"
+
+    draft_worker.execute_model.assert_called_once_with(
+        **execute_model_data.to_dict(), return_python_output=False)
+    target_worker.execute_model.assert_called_once_with(
+        **execute_model_data.to_dict())
+
+
+@pytest.mark.parametrize('k', [0, 5])
+@pytest.mark.parametrize('batch_size', [0])
+@torch.inference_mode()
+def test_empty_input_batch(k: int, batch_size: int):
+    """Verify that the SpecDecodeWorker calls the draft and target workers
+    when the input batch is empty. This can happen if the engine communicates
+    to the workers information without scheduling a batch.
+    """
+    draft_worker = mock_worker(cls=MultiStepWorker)
+    target_worker = mock_worker()
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+
+    draft_worker.device = 'cuda'
+    target_worker.device = 'cuda'
+
+    set_random_seed(1)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+
+    execute_model_data, prompts, prev_output_tokens = create_batch(
+        batch_size, k, prev_output_token_len=0)
+
+    out = worker.execute_model(**execute_model_data.to_dict(),
+                               num_spec_tokens=k)
+
+    assert len(out) == 1, f"expected only one token output when {k=}"
+    assert out[0].probs is None, "expect gpu tensor references to be None"
+    assert out[
+        0].sampled_tokens is None, "expect gpu tensor references to be None"
+
+    draft_worker.execute_model.assert_called_once_with(
+        **execute_model_data.to_dict(), return_python_output=False)
+    target_worker.execute_model.assert_called_once_with(
+        **execute_model_data.to_dict())
+
+
+@torch.inference_mode()
+def test_init_model():
+    """Verify SpecDecodeWorker invokes proposer/scorer worker init_model, as
+    well as other GPU initialization.
+    """
+    draft_worker = mock_worker(cls=MultiStepWorker)
+    target_worker = mock_worker()
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+
+    worker.init_model()
+
+    draft_worker.init_model.assert_called_once()
+
+    target_worker.init_model.assert_called_once()
+
+    metrics_collector.init_gpu_tensors.assert_called_once()
+    rejection_sampler.init_gpu_tensors.assert_called_once()
+
+
+@torch.inference_mode()
+def test_init_cache_engine():
+    """Verify SpecDecodeWorker invokes init_cache_engine on proposer/scorer
+    workers.
+    """
+    draft_worker = mock_worker(cls=MultiStepWorker)
+    target_worker = mock_worker()
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+
+    cache_config = MagicMock()
+
+    worker.init_cache_engine(cache_config)
+
+    draft_worker.init_cache_engine.assert_called_once_with(cache_config)
+    target_worker.init_cache_engine.assert_called_once_with(cache_config)
+
+
+@pytest.mark.parametrize('available_gpu_blocks', [1, 1024])
+@pytest.mark.parametrize('available_cpu_blocks', [500])
+@pytest.mark.parametrize('target_cache_block_size_bytes', [2 * 2 * 4096])
+@pytest.mark.parametrize('draft_kv_size_bytes', [0, 2 * 2 * 768, 2 * 2 * 4096])
+@torch.inference_mode()
+def test_profile_num_available_blocks(available_gpu_blocks: int,
+                                      available_cpu_blocks: int,
+                                      target_cache_block_size_bytes: int,
+                                      draft_kv_size_bytes: int):
+    """Verify SpecDecodeWorker correctly profiles num available GPU blocks.
+    Specifically, it should run profiling in the scorer worker, and then evenly
+    split the blocks between proposer and scorer worker.
+    """
+    draft_worker = mock_worker(cls=MultiStepWorker)
+    target_worker = mock_worker()
+    rejection_sampler = MagicMock(spec=RejectionSampler)
+    rejection_sampler.token_id_dtype = torch.int64
+    metrics_collector = MagicMock(spec=AsyncMetricsCollector)
+
+    target_worker.profile_num_available_blocks.return_value = (
+        available_gpu_blocks, available_cpu_blocks)
+    target_worker.get_cache_block_size_bytes.return_value = target_cache_block_size_bytes
+    draft_worker.get_cache_block_size_bytes.return_value = draft_kv_size_bytes
+
+    worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
+                              metrics_collector)
+
+    # These values do not directly impact the adjusted block size calculation,
+    # so they can be fixed.
+    gpu_memory_utilization = 0.9
+    cpu_swap_space = 100
+    block_size = 16
+
+    num_gpu_blocks, num_cpu_blocks = worker.profile_num_available_blocks(
+        block_size, gpu_memory_utilization, cpu_swap_space, cache_dtype="auto")
+
+    target_worker.profile_num_available_blocks.assert_called_once_with(
+        block_size, gpu_memory_utilization, cpu_swap_space, "auto")
+    assert num_cpu_blocks == available_cpu_blocks
+
+    assert num_gpu_blocks == split_num_cache_blocks_evenly(
+        target_cache_block_size_bytes, draft_kv_size_bytes,
+        available_gpu_blocks)
+
+
+@pytest.mark.parametrize('available_gpu_blocks',
+                         list(range(20)) + [1024, 1024**2])
+@pytest.mark.parametrize('target_cache_block_size_bytes',
+                         [2 * 2 * 4096, 2 * 2 * 8192])
+@pytest.mark.parametrize('draft_kv_size_bytes', [0, 2 * 2 * 768, 2 * 2 * 4096])
+@torch.inference_mode()
+def test_split_num_cache_blocks_evenly(available_gpu_blocks: int,
+                                       target_cache_block_size_bytes: int,
+                                       draft_kv_size_bytes: int):
+    """Verify split_num_cache_blocks_evenly does not exceed original memory
+    allocation in bytes.
+    """
+    num_blocks = split_num_cache_blocks_evenly(target_cache_block_size_bytes,
+                                               draft_kv_size_bytes,
+                                               available_gpu_blocks)
+    assert (num_blocks * target_cache_block_size_bytes) + (
+        num_blocks * draft_kv_size_bytes) <= (available_gpu_blocks *
+                                              target_cache_block_size_bytes)
diff --git a/tests/spec_decode/test_utils.py b/tests/spec_decode/test_utils.py
new file mode 100644
index 0000000000000..19833ddb06154
--- /dev/null
+++ b/tests/spec_decode/test_utils.py
@@ -0,0 +1,111 @@
+from vllm.spec_decode.util import get_all_seq_ids
+from vllm.sequence import SequenceGroupMetadata
+from vllm.spec_decode.util import split_batch_by_proposal_len
+
+import pytest
+from unittest.mock import MagicMock
+
+
+def test_get_all_seq_ids():
+    """Verify get_all_seq_ids extracts all seq ids.
+    """
+    expected_seq_ids = list(range(10)) + list(range(100, 110))
+
+    seq_group_metadata_list = [
+        SequenceGroupMetadata(
+            request_id=str(seq_id),
+            is_prompt=True,
+            seq_data={
+                seq_id: MagicMock(),
+            },
+            sampling_params=MagicMock(),
+            block_tables={
+                seq_id: MagicMock(),
+            },
+            lora_request=None,
+        ) for seq_id in expected_seq_ids
+    ]
+
+    actual_seq_ids = get_all_seq_ids(seq_group_metadata_list)
+    assert actual_seq_ids == expected_seq_ids
+
+
+@pytest.fixture
+def fake_sequence_group_metadata():
+    seq_ids = list(range(3))
+    return [
+        SequenceGroupMetadata(
+            request_id=str(i),
+            is_prompt=True,
+            seq_data={
+                i: MagicMock(),
+            },
+            sampling_params=MagicMock(),
+            block_tables={
+                i: MagicMock(),
+            },
+            lora_request=None,
+        ) for i in seq_ids
+    ]
+
+
+def test_filter_zero_length_proposals(fake_sequence_group_metadata):
+    proposal_lens = [0, 1, 0]
+    filtered_groups, indices = split_batch_by_proposal_len(
+        fake_sequence_group_metadata,
+        proposal_lens,
+        select_proposal_len_zero=True)
+
+    expected_groups = [
+        fake_sequence_group_metadata[0], fake_sequence_group_metadata[2]
+    ]
+    expected_indices = [0, 2]
+
+    assert filtered_groups == expected_groups
+    assert indices == expected_indices
+
+
+def test_filter_non_zero_length_proposals(fake_sequence_group_metadata):
+    proposal_lens = [0, 1, 2]
+    filtered_groups, indices = split_batch_by_proposal_len(
+        fake_sequence_group_metadata,
+        proposal_lens,
+        select_proposal_len_zero=False)
+
+    expected_groups = [
+        fake_sequence_group_metadata[1], fake_sequence_group_metadata[2]
+    ]
+    expected_indices = [1, 2]
+
+    assert filtered_groups == expected_groups
+    assert indices == expected_indices
+
+
+def test_empty_inputs():
+    filtered_groups, indices = split_batch_by_proposal_len(
+        [], [], select_proposal_len_zero=True)
+
+    assert filtered_groups == []
+    assert indices == []
+
+
+def test_all_zero_with_non_zero_filter(fake_sequence_group_metadata):
+    proposal_lens = [0, 0, 0]
+    filtered_groups, indices = split_batch_by_proposal_len(
+        fake_sequence_group_metadata,
+        proposal_lens,
+        select_proposal_len_zero=False)
+
+    assert filtered_groups == []
+    assert indices == []
+
+
+def test_all_non_zero_with_zero_filter(fake_sequence_group_metadata):
+    proposal_lens = [1, 1, 1]
+    filtered_groups, indices = split_batch_by_proposal_len(
+        fake_sequence_group_metadata,
+        proposal_lens,
+        select_proposal_len_zero=True)
+
+    assert filtered_groups == []
+    assert indices == []
diff --git a/tests/worker/spec_decode/utils.py b/tests/spec_decode/utils.py
similarity index 60%
rename from tests/worker/spec_decode/utils.py
rename to tests/spec_decode/utils.py
index fa8767cf898aa..997093988c0eb 100644
--- a/tests/worker/spec_decode/utils.py
+++ b/tests/spec_decode/utils.py
@@ -1,13 +1,16 @@
 import torch
-from typing import List, Optional, Dict
+from typing import List, Optional, Dict, Iterable, Union
+from unittest.mock import MagicMock
 
 from vllm.worker.worker import Worker
 from vllm.utils import get_distributed_init_method, get_ip, get_open_port
 from vllm.engine.arg_utils import EngineArgs
-from vllm.sequence import Logprob, SequenceGroupMetadata, SequenceData
+from vllm.sequence import (Logprob, SequenceGroupMetadata, SequenceData,
+                           SamplerOutput, SequenceGroupOutput, SequenceOutput)
 from vllm.sampling_params import SamplingParams
 from vllm.worker.cache_engine import CacheEngine
 from vllm.model_executor.utils import set_random_seed
+from itertools import count
 from dataclasses import dataclass, fields
 
 
@@ -24,6 +27,11 @@ def to_dict(self):
         return dict(
             (field.name, getattr(self, field.name)) for field in fields(self))
 
+    @classmethod
+    def from_dict(cls, d):
+        cleaned = dict((field.name, d[field.name]) for field in fields(cls))
+        return cls(**cleaned)
+
 
 def round_up_to_next_block(seq_len: int, block_size: int) -> int:
     return (seq_len + block_size - 1) // block_size
@@ -50,6 +58,21 @@ def create_execute_model_data(
     )
 
 
+def mock_worker(cls=None,
+                vocab_size: int = 30_000,
+                max_model_len: int = 2048,
+                rank: int = 0) -> MagicMock:
+    if cls is None:
+        cls = Worker
+
+    worker = MagicMock(spec=cls)
+    worker.vocab_size = vocab_size
+    worker.max_model_len = max_model_len
+    worker.rank = rank
+    worker.device = 'cuda:0'
+    return worker
+
+
 def patch_execute_model_with_seeds(worker: Worker, rand_seeds: List[int]):
     seed_iter = iter(rand_seeds)
     original_execute_model = worker.execute_model
@@ -117,25 +140,12 @@ def create_seq_group_metadata_from_prompts(
     block_size: int,
     final_seq_lens: List[int],
     continuations: Optional[List[List[int]]] = None,
-    num_tokens_processed: Optional[List[int]] = None,
     seq_ids: Optional[List[int]] = None,
 ) -> List[SequenceGroupMetadata]:
 
     if continuations is None:
         continuations = [[] for _ in prompts]
 
-    if num_tokens_processed is None:
-        # Default to 1 token missing from kv cache for generation sequences.
-        num_tokens_processed = []
-        for continuation, prompt in zip(continuations, prompts):
-            # If prefill, then default to zero tokens processed.
-            if not continuation:
-                num_tokens_processed.append(0)
-            else:
-                # If generation, then default to all but one tokens processed.
-                num_tokens_processed.append(
-                    len(continuation) + len(prompt) - 1)
-
     if seq_ids is None:
         seq_ids = list(i for i, _ in enumerate(prompts))
 
@@ -155,13 +165,15 @@ def create_seq_group_metadata_from_prompts(
             is_prompt=len(cont_token_ids) == 0,
             seq_data={
                 i:
-                SequenceData(prompt_token_ids=prompt_token_ids[:] +
-                             cont_token_ids[:])
+                SequenceData(
+                    prompt_token_ids=prompt_token_ids[:],
+                    output_token_ids=cont_token_ids[:],
+                ),
             },
             sampling_params=SamplingParams(temperature=0.0, ),
             block_tables={i: block_allocations[i][:]},
-        ) for i, (prompt_token_ids, cont_token_ids, num_tokens_saved) in
-        enumerate(zip(prompts, continuations, num_tokens_processed))
+        ) for i, (prompt_token_ids,
+                  cont_token_ids) in enumerate(zip(prompts, continuations))
     ]
 
 
@@ -178,3 +190,68 @@ def assert_logprobs_dict_allclose(
             expected = torch.tensor(
                 single_step_expected_logprobs[token_id].logprob)
             assert torch.allclose(actual, expected)
+
+
+def create_sampler_output_list(
+        token_ids: torch.Tensor,
+        probs: Iterable[Optional[torch.Tensor]],
+        seq_ids: Optional[List[int]] = None) -> List[SamplerOutput]:
+    num_steps, batch_size = token_ids.shape
+    token_ids_by_step = token_ids.tolist()
+
+    if seq_ids is None:
+        seq_ids = list(range(batch_size))
+
+    return [
+        SamplerOutput(outputs=[
+            SequenceGroupOutput(
+                samples=[
+                    SequenceOutput(
+                        output_token=token_id,
+                        parent_seq_id=seq_ids[seq_index],
+                        logprobs={token_id: 0},
+                    )
+                ],
+                prompt_logprobs=None,
+            ) for seq_index, token_id in enumerate(token_ids_by_step[step])
+        ],
+                      sampled_token_probs=probs[step],
+                      sampled_token_ids=token_ids[step])
+        for step in range(num_steps)
+    ]
+
+
+def create_batch(batch_size,
+                 k,
+                 prompt_len: Union[int, List[int]] = 10,
+                 prev_output_token_len: int = 10,
+                 seq_ids: Optional[List[int]] = None,
+                 num_gpu_blocks: Optional[int] = None,
+                 block_size: Optional[int] = None):
+    if block_size is None:
+        block_size = 8
+
+    if num_gpu_blocks is None:
+        num_gpu_blocks = 2048 // block_size
+
+    iterator = count()
+
+    if isinstance(prompt_len, int):
+        prompt_lens = [prompt_len for _ in range(batch_size)]
+    else:
+        prompt_lens = prompt_len
+
+    prompts = [[next(iterator) for _ in range(p_len)] for p_len in prompt_lens]
+    prev_output_tokens = [[
+        next(iterator) for _ in range(prev_output_token_len)
+    ] for _ in range(batch_size)]
+    final_seq_lens = [
+        len(prompt) + len(prev_output_token) + k + 1
+        for prompt, prev_output_token in zip(prompts, prev_output_tokens)
+    ]
+
+    execute_model_data = create_execute_model_data(
+        create_seq_group_metadata_from_prompts(prompts, num_gpu_blocks,
+                                               block_size, final_seq_lens,
+                                               prev_output_tokens, seq_ids), )
+    return execute_model_data, prompts, prev_output_tokens
diff --git a/tests/test_sequence.py b/tests/test_sequence.py
new file mode 100644
index 0000000000000..e18df059d770f
--- /dev/null
+++ b/tests/test_sequence.py
@@ -0,0 +1,50 @@
+import pytest
+
+from vllm.sequence import SequenceGroupOutput, SamplerOutput, SequenceOutput
+
+
+@pytest.fixture
+def sample_outputs():
+    return [
+        SequenceGroupOutput(samples=[
+            SequenceOutput(parent_seq_id=0, output_token=i, logprobs={})
+        ],
+                            prompt_logprobs=None) for i in range(5)
+    ]
+
+
+@pytest.fixture
+def sampler_output(sample_outputs):
+    return SamplerOutput(outputs=sample_outputs)
+
+
+def test_sampler_output_initialization(sampler_output, sample_outputs):
+    assert len(sampler_output) == len(sample_outputs)
+    assert sampler_output.sampled_token_probs is None
+    assert sampler_output.sampled_token_ids is None
+    assert sampler_output.spec_decode_worker_metrics is None
+
+
+def test_sampler_output_getitem(sampler_output, sample_outputs):
+    assert sampler_output[2] == sample_outputs[2]
+
+
+def test_sampler_output_setitem(sampler_output):
+    new_output = SequenceGroupOutput(samples=[
+        SequenceOutput(parent_seq_id=0, output_token=99, logprobs={})
+    ],
+                                     prompt_logprobs=None)
+    sampler_output[2] = new_output
+    assert sampler_output[2] == new_output
+
+
+def test_sampler_output_len(sampler_output, sample_outputs):
+    assert len(sampler_output) == len(sample_outputs)
+
+
+def test_sampler_output_eq(sample_outputs):
+    sampler_output1 = SamplerOutput(outputs=sample_outputs)
+    sampler_output2 = SamplerOutput(outputs=sample_outputs.copy())
+    sampler_output3 = SamplerOutput(outputs=sample_outputs[:-1])
+    assert sampler_output1 == sampler_output2
+    assert sampler_output1 != sampler_output3
diff --git a/vllm/model_executor/layers/rejection_sampler.py b/vllm/model_executor/layers/rejection_sampler.py
index 3e1cfc783b8ef..5643454060251 100644
--- a/vllm/model_executor/layers/rejection_sampler.py
+++ b/vllm/model_executor/layers/rejection_sampler.py
@@ -21,8 +21,6 @@ def __init__(self, strict_mode: bool = False):
                 nontrivial latency.
         """
         super().__init__()
-        self.probs_dtype = torch.float32
-        self.token_id_dtype = torch.int64
         self._strict_mode = strict_mode
 
         # NOTE: A "bonus token" is accepted iff all proposal tokens are
@@ -44,6 +42,14 @@ def init_gpu_tensors(self, rank: int) -> None:
                                                dtype=torch.long,
                                                device=device)
 
+    @property
+    def probs_dtype(self):
+        return torch.float32
+
+    @property
+    def token_id_dtype(self):
+        return torch.int64
+
     def forward(
         self,
         target_probs: torch.Tensor,
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index 320cb443524ca..19e7f630c4620 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -587,4 +587,4 @@ def _build_sampler_output(
                 SequenceOutput(seq_ids[parent_id], next_token_id, logprobs))
         sampler_output.append(
             SequenceGroupOutput(seq_outputs, group_prompt_logprobs))
-    return sampler_output
+    return SamplerOutput(outputs=sampler_output)
diff --git a/vllm/sequence.py b/vllm/sequence.py
index fee96a875dde5..37c102407a5f2 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -2,12 +2,16 @@
 import copy
 import enum
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, Union, TYPE_CHECKING
 
 from vllm.block import LogicalTokenBlock
 from vllm.sampling_params import SamplingParams
 from vllm.lora.request import LoRARequest
 
+if TYPE_CHECKING:
+    import torch
+    from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics
+
 
 @dataclass
 class Logprob:
@@ -81,6 +85,8 @@ class SequenceData:
 
     Args:
         prompt_token_ids: The token IDs of the prompt.
+        output_token_ids: The token IDs of the output. Set to an empty list if
+            None.
 
     Attributes:
         prompt_token_ids: The token IDs of the prompt.
@@ -91,9 +97,13 @@ class SequenceData:
     def __init__(
         self,
         prompt_token_ids: List[int],
+        output_token_ids: Optional[List[int]] = None,
     ) -> None:
+        if output_token_ids is None:
+            output_token_ids = []
+
         self.prompt_token_ids = prompt_token_ids
-        self.output_token_ids: List[int] = []
+        self.output_token_ids = output_token_ids
         self.cumulative_logprob = 0.0
 
     def append_token_id(self, token_id: int, logprob: float) -> None:
@@ -117,6 +127,12 @@ def get_last_token_id(self) -> int:
             return self.prompt_token_ids[-1]
         return self.output_token_ids[-1]
 
+    def get_prompt_token_ids(self) -> int:
+        return self.prompt_token_ids
+
+    def get_output_token_ids(self) -> int:
+        return self.output_token_ids
+
     def __repr__(self) -> str:
         return (f"SequenceData("
                 f"prompt_token_ids={self.prompt_token_ids}, "
@@ -506,6 +522,35 @@ def __eq__(self, other: object) -> bool:
                 and self.prompt_logprobs == other.prompt_logprobs)
 
 
-# For each sequence group, we generate a list of SequenceOutput object,
-# each of which contains one possible candidate for the next token.
-SamplerOutput = List[SequenceGroupOutput]
+@dataclass
+class SamplerOutput:
+    """For each sequence group, we generate a list of SequenceOutput object,
+    each of which contains one possible candidate for the next token.
+
+    This datastructure implements methods so it can be used like a list, but
+    also has optional fields for device tensors.
+    """
+
+    outputs: List[SequenceGroupOutput]
+
+    # On-device tensor containing probabilities of each token.
+    sampled_token_probs: Optional["torch.Tensor"] = None
+
+    # On-device tensor containing the sampled token ids.
+    sampled_token_ids: Optional["torch.Tensor"] = None
+
+    # Spec decode metrics populated by workers.
+    spec_decode_worker_metrics: Optional["SpecDecodeWorkerMetrics"] = None
+
+    def __getitem__(self, idx: int):
+        return self.outputs[idx]
+
+    def __setitem__(self, idx: int, value):
+        self.outputs[idx] = value
+
+    def __len__(self):
+        return len(self.outputs)
+
+    def __eq__(self, other: object):
+        return isinstance(other,
+                          self.__class__) and self.outputs == other.outputs
diff --git a/vllm/spec_decode/batch_expansion.py b/vllm/spec_decode/batch_expansion.py
new file mode 100644
index 0000000000000..478c950f52873
--- /dev/null
+++ b/vllm/spec_decode/batch_expansion.py
@@ -0,0 +1,351 @@
+from typing import Iterator, List, Tuple, Optional, Dict
+from itertools import chain, count
+
+import torch
+
+from vllm.sequence import (SamplerOutput, SequenceGroupMetadata, SequenceData)
+from vllm.worker.worker import Worker
+from vllm.spec_decode.util import nvtx_range, sampler_output_to_torch, get_all_seq_ids, split_batch_by_proposal_len
+from vllm.spec_decode.interfaces import SpeculativeScorer, SpeculativeProposals, SpeculativeScores
+
+SeqId = int
+TargetSeqId = int
+TokenId = int
+
+
+class BatchExpansionTop1Scorer(SpeculativeScorer):
+    """Implements a speculative scorer that uses batch expansion to get
+    probabilities of speculative tokens according to the scoring model.
+
+    Batch expansion converts a list of sequences and multiple query positions
+    to a new batch of sequences, each with a single query position. This allows
+    for MQA-like scoring in speculative decoding without requiring an MQA
+    kernel.
+
+    It is strictly less efficient than MQA scoring.
+
+    It only supports scoring the top1 proposal tokens of the proposer, instead
+    of topk/tree.
+    """
+
+    def __init__(self, scorer_worker: Worker, device: str, vocab_size: int):
+        self._scorer_worker = scorer_worker
+        self._device = device
+        self._vocab_size = vocab_size
+
+    @nvtx_range("BatchExpansionTop1Scorer.score_proposals")
+    def score_proposals(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Optional[Dict[int, int]],
+        blocks_to_swap_out: Optional[Dict[int, int]],
+        blocks_to_copy: Optional[Dict[int, List[int]]],
+        k: int,
+        proposals: SpeculativeProposals,
+    ) -> SpeculativeScores:
+        """Score the proposed tokens via the scorer model.
+
+        This converts each input sequence to a set of k+1 target sequences. The
+        target sequences have the unique continuations to be scored and a
+        unique sequence ID that is different from all input sequence ids.
+
+        If a speculative sequence length would exceed the max model length, then
+        no speculation is produced for that sequence.
+
+        Args:
+            seq_group_metadata_list: The input sequence group metadata.
+            blocks_to_swap_in: This is passed to the worker during scoring.
+            blocks_to_swap_out: This is passed to the worker during scoring.
+            blocks_to_copy: This is passed to the worker during scoring.
+            k: The fixed proposal length.
+            proposals: The speculative proposals to score.
+        Returns:
+            SpeculativeScores: The scores of each speculative token, along with
+                which sequences were ignored during scoring.
+        """
+
+        # TODO(cade) perform this on GPU to remove blocking call.
+        proposal_lens_list = proposals.proposal_lens.tolist()
+        proposal_token_ids_list = proposals.proposal_token_ids.tolist()
+
+        spec_indices, non_spec_indices, target_seq_group_metadata_list, num_scoring_tokens = self._expand_batch(
+            seq_group_metadata_list=seq_group_metadata_list,
+            proposal_token_ids_list=proposal_token_ids_list,
+            proposal_lens_list=proposal_lens_list,
+        )
+
+        target_sampler_output = self._scorer_worker.execute_model(
+            seq_group_metadata_list=target_seq_group_metadata_list,
+            blocks_to_swap_in=blocks_to_swap_in,
+            blocks_to_swap_out=blocks_to_swap_out,
+            blocks_to_copy=blocks_to_copy,
+            return_python_output=False)
+
+        all_tokens, all_probs = self._contract_batch(
+            original_bs=len(seq_group_metadata_list),
+            target_sampler_output=target_sampler_output,
+            proposals=proposals,
+            num_scoring_tokens=num_scoring_tokens,
+            non_spec_indices=non_spec_indices,
+            spec_indices=spec_indices,
+            k=k,
+        )
+
+        return SpeculativeScores(
+            probs=all_probs,
+            token_ids=all_tokens,
+        )
+
+    def _expand_batch(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        proposal_token_ids_list: List[TokenId],
+        proposal_lens_list: List[int],
+    ) -> Tuple[List[int], List[int], List[SequenceGroupMetadata], int]:
+        """Given the input sequences and potentially multiple corresponding
+        proposal tokens, create a new batch where each sequence has a single
+        query token.
+        """
+
+        # vLLM currently only supports proposal lens equal to zero or the batch
+        # proposal len. This adds some complexity (splitting the batch into spec
+        # and non spec sequences) and should be removed in the future. It can be
+        # done by supporting per-sequence proposal lens.
+        spec_seqs, spec_indices = split_batch_by_proposal_len(
+            seq_group_metadata_list,
+            proposal_lens_list,
+            select_proposal_len_zero=False)
+        non_spec_seqs, non_spec_indices = split_batch_by_proposal_len(
+            seq_group_metadata_list,
+            proposal_lens_list,
+            select_proposal_len_zero=True)
+
+        target_seq_group_metadata_list = self._create_scoring_model_input(
+            spec_seqs, proposal_token_ids_list)
+        num_scoring_tokens = len(target_seq_group_metadata_list)
+        target_seq_group_metadata_list.extend(non_spec_seqs)
+
+        return spec_indices, non_spec_indices, target_seq_group_metadata_list, num_scoring_tokens
+
+    def _contract_batch(self, original_bs: int,
+                        target_sampler_output: List[SamplerOutput],
+                        proposals: SpeculativeProposals,
+                        num_scoring_tokens: int, non_spec_indices: List[int],
+                        spec_indices: List[int],
+                        k: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Contract the expanded batch back into its original size.
+        This maps the scores of speculative tokens back to their original
+        sequences.
+        """
+        (target_token_ids, target_probs, non_spec_target_token_ids,
+         non_spec_target_probs) = self._split_scoring_output(
+             target_sampler_output, num_scoring_tokens)
+
+        # Map distinct sequences used to score each token
+        # of shape [batch_size * k + 1] back to [batch_size, k + 1].
+        batch_size, k = proposals.proposal_token_ids.shape
+
+        target_token_ids = target_token_ids.squeeze().reshape(
+            batch_size, k + 1)
+        target_probs = target_probs.squeeze().reshape(batch_size, k + 1,
+                                                      self._vocab_size)
+
+        all_tokens = torch.full(size=(original_bs, k + 1),
+                                fill_value=-1,
+                                device=self._device,
+                                dtype=torch.long)
+        all_probs = torch.zeros(original_bs,
+                                k + 1,
+                                self._vocab_size,
+                                device=self._device,
+                                dtype=torch.float32)
+
+        if non_spec_indices:
+            all_tokens[non_spec_indices, 0] = non_spec_target_token_ids
+            all_probs[non_spec_indices, :1, :] = non_spec_target_probs
+
+        if spec_indices:
+            all_tokens[spec_indices] = target_token_ids
+            all_probs[spec_indices] = target_probs
+
+        return all_tokens, all_probs
+
+    def _create_scoring_model_input(
+            self,
+            seq_group_metadata_list: List[SequenceGroupMetadata],
+            proposal_token_ids: List[List[TokenId]],  # shape: [batch_size, k]
+    ) -> List[SequenceGroupMetadata]:
+        """Given the original input sequences and proposed tokens from the draft
+        model, create a list of target sequences that can be used for scoring.
+        """
+
+        if not seq_group_metadata_list:
+            return []
+
+        target_seq_ids_iter = self._create_target_seq_id_iterator(
+            get_all_seq_ids(seq_group_metadata_list))
+
+        target_seq_group_metadata = list(
+            chain.from_iterable(
+                self._create_target_seq_group_metadata(
+                    seq_group_metadata,
+                    proposal_token_ids,
+                    i,
+                    target_seq_ids_iter,
+                ) for i, seq_group_metadata in enumerate(
+                    seq_group_metadata_list)))
+
+        return target_seq_group_metadata
+
+    def _create_target_seq_group_metadata(
+        self,
+        input_seq_group_metadata: SequenceGroupMetadata,
+        proposal_token_ids: List[TokenId],  # shape: [batch_size, k]
+        batch_index: int,
+        target_seq_ids_iter: Iterator[TargetSeqId],
+    ) -> List[SequenceGroupMetadata]:
+        """Given an input sequence group metadata and a list of draft tokens,
+        create a list of target SequenceGroupMetadata, one for each
+        token id that needs to be scored.
+
+        Naive speculative decoding requires K target model scores, one for each
+        draft model token. However one can add a bonus token such that if each
+        token is accepted, then a final token may be sampled from the model.
+        This function creates K+1 target SequenceGroupMetadata to take
+        advantage of the bonus token.
+        """
+        assert not input_seq_group_metadata.is_prompt, (
+            "Speculating on "
+            "prompts not yet supported")
+        assert len(input_seq_group_metadata.seq_data) == 1, (
+            "Beam search "
+            "not supported in speculative decoding")
+        input_seq_id = next(iter(input_seq_group_metadata.seq_data.keys()))
+
+        token_ids_to_score = self._get_token_ids_to_score(
+            proposal_token_ids[batch_index])
+
+        target_seq_group_metadata_list: List[SequenceGroupMetadata] = []
+        for token_ids in token_ids_to_score:
+            target_seq_group_metadata_list.append(
+                self._create_single_target_seq_group_metadata(
+                    input_seq_group_metadata,
+                    input_seq_id,
+                    next(target_seq_ids_iter),
+                    token_ids,
+                ))
+
+        return target_seq_group_metadata_list
+
+    def _create_single_target_seq_group_metadata(
+        self,
+        seq_group_metadata: SequenceGroupMetadata,
+        seq_id: SeqId,
+        target_seq_id: TargetSeqId,
+        token_ids: List[TokenId],
+    ) -> SequenceGroupMetadata:
+        """Create a single target SequenceGroupMetadata.
+
+        Args:
+            seq_group_metadata: The metadata for the input sequence.
+            seq_id: The input sequence ID.
+            target_seq_id: The corresponding target sequence ID.
+            token_ids: The list of token ids that are to be appended to the
+                input sequence.
+        """
+        seq_data = seq_group_metadata.seq_data[seq_id]
+        prompt_token_ids = seq_data.get_prompt_token_ids()
+        new_output_token_ids = [*seq_data.get_output_token_ids(), *token_ids]
+
+        return SequenceGroupMetadata(
+            request_id=seq_group_metadata.request_id,
+            is_prompt=seq_group_metadata.is_prompt,
+            seq_data={
+                target_seq_id:
+                SequenceData(
+                    prompt_token_ids=prompt_token_ids,
+                    output_token_ids=new_output_token_ids,
+                ),
+            },
+            sampling_params=seq_group_metadata.sampling_params,
+            block_tables={
+                target_seq_id: seq_group_metadata.block_tables[seq_id],
+            },
+            lora_request=None,
+        )
+
+    def _split_scoring_output(
+        self, sampler_output: SamplerOutput, num_scoring_tokens: int
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Split the target model output into speculative and non-speculative
+        output.
+        """
+
+        # vLLM currently only supports proposal lens equal to zero or the batch
+        # proposal len. This adds some complexity (splitting the batch into spec
+        # and non spec sequences) and should be removed in the future. It can be
+        # done by supporting per-sequence proposal lens.
+        #
+        # First samples are from speculative scoring, latter samples are non-
+        # speculative samples.
+        split_sizes = [
+            num_scoring_tokens,
+            sampler_output.sampled_token_ids.numel() - num_scoring_tokens
+        ]
+        (spec_probs, non_spec_probs
+         ) = sampler_output.sampled_token_probs.split(split_sizes)
+        (spec_sampled_tokens, non_spec_sampled_tokens
+         ) = sampler_output.sampled_token_ids.flatten().split(split_sizes)
+
+        # Convert scores to tensors.
+        sampler_output.sampled_token_probs = spec_probs
+        sampler_output.sampled_token_ids = spec_sampled_tokens
+        target_token_ids, target_probs = sampler_output_to_torch(
+            [sampler_output])
+
+        # Convert non-speculative output tokens to tensors.
+        sampler_output.sampled_token_probs = non_spec_probs
+        sampler_output.sampled_token_ids = non_spec_sampled_tokens
+        non_spec_target_token_ids, non_spec_target_probs = sampler_output_to_torch(
+            [sampler_output])
+
+        return target_token_ids, target_probs, non_spec_target_token_ids, non_spec_target_probs
+
+    def _create_target_seq_id_iterator(
+            self, seq_ids: List[SeqId]) -> Iterator[TargetSeqId]:
+        """Create an iterator for creating target sequence ids.
+        Target sequence ids are distinct from sequence ids because we create a
+        distinct target sequence id for each proposal token to be scored.
+
+        This implementation increments a counter starting at 1 + max of all
+        provided input sequence ids.
+        """
+        return count(start=max(seq_ids) + 1)
+
+    def _get_token_ids_to_score(
+        self,
+        full_spec_token_ids: List[TokenId]  # shape: [k]
+    ) -> List[List[TokenId]]:
+        """Given an int tensor of proposal token ids, return a list of
+        token ids that should be scored.
+
+        Returns k+1 output lists. The additional one is used for generating the
+        bonus token.
+
+        Example:
+            Input: [0, 1, 2, 3] (k=4)
+            Output: (k+1 lists)
+                []
+                [0]
+                [0, 1]
+                [0, 1, 2]
+                [0, 1, 2, 3]
+        """
+        empty_token_ids = []
+
+        token_ids_to_score = [empty_token_ids]
+        token_ids_to_score.extend([
+            full_spec_token_ids[:i + 1]
+            for i in range(len(full_spec_token_ids))
+        ])
+        return token_ids_to_score
diff --git a/vllm/spec_decode/interfaces.py b/vllm/spec_decode/interfaces.py
new file mode 100644
index 0000000000000..9e53ffb60ac32
--- /dev/null
+++ b/vllm/spec_decode/interfaces.py
@@ -0,0 +1,77 @@
+from typing import List, Tuple, Optional, Dict
+from dataclasses import dataclass
+from abc import ABC, abstractmethod
+
+import torch
+
+from vllm.sequence import SequenceGroupMetadata
+
+
+@dataclass
+class SpeculativeProposals:
+    """Datastructure used to represent proposal tokens from some proposer. It
+    also tracks how many speculative tokens each sequence has.
+    """
+
+    # Speculative proposal tokens.
+    proposal_token_ids: torch.Tensor
+
+    # Probabilities of the proposal tokens according to the proposer.
+    proposal_probs: torch.Tensor
+
+    # The valid length of each proposal; can be zero.
+    proposal_lens: torch.Tensor
+
+    def __repr__(self):
+        return (f"SpeculativeProposals("
+                f"proposal_token_ids={self.proposal_token_ids.shape}, "
+                f"proposal_probs={self.proposal_probs.shape}, "
+                f"proposal_lens={self.proposal_lens.shape})")
+
+
+@dataclass
+class SpeculativeScores:
+    """Datastructure used to represent the scores of speculative tokens
+    according to the scoring model.
+    """
+
+    # Probabilities of the speculative tokens according to the scoring model.
+    probs: torch.Tensor
+
+    # Token ids sampled from the scoring model. Used for speculative bonus
+    # tokens and also non-speculative normal decoding.
+    token_ids: torch.Tensor
+
+    def __repr__(self):
+        return (f"SpeculativeScores("
+                f"probs={self.probs.shape}, "
+                f"token_ids={self.token_ids.shape})")
+
+
+class SpeculativeProposer(ABC):
+
+    @abstractmethod
+    def get_proposals(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+        max_proposal_len: int,
+    ) -> SpeculativeProposals:
+        raise NotImplementedError
+
+
+class SpeculativeScorer(ABC):
+
+    @abstractmethod
+    def score_proposals(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Optional[Dict[int, int]],
+        blocks_to_swap_out: Optional[Dict[int, int]],
+        blocks_to_copy: Optional[Dict[int, List[int]]],
+        k: int,
+        proposals: SpeculativeProposals,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        raise NotImplementedError
diff --git a/vllm/spec_decode/metrics.py b/vllm/spec_decode/metrics.py
new file mode 100644
index 0000000000000..65a2a4a63a98f
--- /dev/null
+++ b/vllm/spec_decode/metrics.py
@@ -0,0 +1,174 @@
+import torch
+from dataclasses import dataclass
+from vllm.model_executor.layers.rejection_sampler import RejectionSampler
+from typing import Optional
+from vllm.utils import in_wsl
+import time
+from typing import Callable
+
+
+@dataclass
+class SpecDecodeWorkerMetrics:
+    """Dataclass holding metrics emitted from the spec decode worker.
+    """
+
+    # The empirical acceptance rate of the proposal method on a per-token basis.
+    # This is useful for evaluating how well the proposal method aligns with the
+    # scoring method.
+    draft_acceptance_rate: float
+
+    # The empirical efficiency, measured as the number of tokens emitted by the
+    # system divided by the number of tokens that could be emitted by the system
+    # if the proposal method were perfect.
+    system_efficiency: float
+
+    # The number of speculative tokens produced by the proposal method.
+    draft_tokens: int
+
+    # The number of tokens emitted by the entire system.
+    emitted_tokens: int
+
+    # The number of tokens accepted by the scoring model and verification
+    # routine, e.g. Llama2-70B and lossless rejection sampling.
+    #
+    # NOTE: Any token accepted by the verification routine is considered
+    # accepted (regardless of if the speculative prefix is also accepted). The
+    # user will usually see less accepted tokens. This metric is helpful when
+    # evaluating alignment of the proposal method with the scoring model.
+    accepted_tokens: int
+
+    # The number of speculative tokens per sequence.
+    num_spec_tokens: int
+
+
+Timer = Callable[[], float]
+
+
+class AsyncMetricsCollector:
+    """Class which copies rejection sampler metrics from the device to CPU on a
+    non-default Torch stream.
+    """
+
+    def __init__(self,
+                 rejection_sampler: RejectionSampler,
+                 timer: Optional[Timer] = None,
+                 collect_interval_s: float = 5.0):
+        self._rejection_sampler = rejection_sampler
+        self._timer = time.time if timer is None else timer
+
+        self._rank: Optional[int] = None
+
+        # We don't have a device set yet.
+        self._copy_stream: Optional[torch.cuda.Stream] = None
+
+        self._in_flight_copy: Optional[torch.cuda.Event] = None
+
+        pin_memory = not in_wsl()
+        self._aggregate_num_accepted_tokens = torch.tensor(
+            0, dtype=torch.long, device="cpu", pin_memory=pin_memory)
+        self._aggregate_num_emitted_tokens = torch.tensor(
+            0, dtype=torch.long, device="cpu", pin_memory=pin_memory)
+        self._aggregate_num_draft_tokens = 0
+
+        self._rejsample_metrics_collect_interval_s = collect_interval_s
+        self._last_metrics_collect_time = self._timer()
+
+    def init_gpu_tensors(self, rank: int) -> None:
+        self._rank = rank
+        self._copy_stream = torch.cuda.Stream()
+
+    def maybe_collect_rejsample_metrics(
+            self, k: int) -> Optional[SpecDecodeWorkerMetrics]:
+
+        # If a copy was initiated in the previous call, collect and return.
+        if self._in_flight_copy is not None:
+            ready_event = self._in_flight_copy
+            self._in_flight_copy = None
+            return self._collect_rejsample_metrics(k, ready_event)
+
+        # Otherwise, check if we should start a new copy.
+        if self._should_collect_rejsample_metrics(self._timer()):
+            assert self._in_flight_copy is None
+            self._in_flight_copy = self._copy_rejsample_metrics_async()
+
+        return None
+
+    def _should_collect_rejsample_metrics(self, now: float) -> bool:
+        """Return whether or not this iteration should print rejection sampling
+        metrics.
+        """
+        if self._rank != 0:
+            return False
+
+        if (now - self._last_metrics_collect_time <
+                self._rejsample_metrics_collect_interval_s):
+            return False
+        return True
+
+    def _copy_rejsample_metrics_async(self) -> torch.cuda.Event:
+        """Copy rejection sampling metrics (number of accepted tokens, etc) to
+        CPU asynchronously.
+
+        Returns a CUDA event recording when the copy is complete.
+        """
+        self._copy_stream.wait_stream(torch.cuda.current_stream())
+
+        with torch.cuda.stream(self._copy_stream):
+            self._aggregate_num_accepted_tokens.copy_(
+                self._rejection_sampler.num_accepted_tokens, non_blocking=True)
+            self._aggregate_num_emitted_tokens.copy_(
+                self._rejection_sampler.num_emitted_tokens, non_blocking=True)
+            # Number of draft tokens is calculated on CPU, so no copy is
+            # required.
+            self._aggregate_num_draft_tokens = (
+                self._rejection_sampler.num_draft_tokens)
+
+        aggregate_metrics_ready = torch.cuda.Event()
+        aggregate_metrics_ready.record(self._copy_stream)
+
+        return aggregate_metrics_ready
+
+    def _collect_rejsample_metrics(
+            self, k: int,
+            ready_event: torch.cuda.Event) -> SpecDecodeWorkerMetrics:
+        """Create metrics object from statistics copied asynchronously.
+
+        Args:
+            k: int. The number of speculative tokens; used to determine system
+                efficiency.
+            ready_event: torch.cuda.Event. The CUDA event recording when the
+                async GPU->CPU copy is complete.
+        """
+
+        ready_event.synchronize()
+        accepted_tokens = self._aggregate_num_accepted_tokens.item()
+        emitted_tokens = self._aggregate_num_emitted_tokens.item()
+        draft_tokens = self._aggregate_num_draft_tokens
+
+        num_possible_tokens = self.get_max_num_accepted_tokens(draft_tokens, k)
+
+        if draft_tokens > 0:
+            draft_acceptance_rate = accepted_tokens / draft_tokens
+        else:
+            draft_acceptance_rate = float("nan")
+
+        if num_possible_tokens > 0:
+            system_efficiency = emitted_tokens / num_possible_tokens
+        else:
+            system_efficiency = float("nan")
+
+        return SpecDecodeWorkerMetrics(
+            num_spec_tokens=k,
+            draft_acceptance_rate=draft_acceptance_rate,
+            system_efficiency=system_efficiency,
+            accepted_tokens=accepted_tokens,
+            draft_tokens=draft_tokens,
+            emitted_tokens=emitted_tokens,
+        )
+
+    @staticmethod
+    def get_max_num_accepted_tokens(draft_tokens: int, k: int) -> int:
+        # Divide by k since batch size can be variable.
+        total_num_spec_seqs = draft_tokens / k
+        num_accepted_per_seq_if_all_accepted = k + 1
+        return int(total_num_spec_seqs / num_accepted_per_seq_if_all_accepted)
diff --git a/vllm/spec_decode/multi_step_worker.py b/vllm/spec_decode/multi_step_worker.py
new file mode 100644
index 0000000000000..f7be14d3d22c2
--- /dev/null
+++ b/vllm/spec_decode/multi_step_worker.py
@@ -0,0 +1,366 @@
+from typing import List, Dict, Optional, Tuple
+import copy
+
+import torch
+
+from vllm.sequence import SamplerOutput, SequenceGroupMetadata
+from vllm.worker.worker import Worker
+from vllm.spec_decode.interfaces import SpeculativeProposals, SpeculativeProposer
+from vllm.spec_decode.util import sampler_output_to_torch
+
+
+class MultiStepWorker(Worker):
+    """The MultiStepWorker is equivalent to a Worker except that it allows
+    multiple forward passes in a single call, assuming the scheduler has
+    allocated enough space to store the additional KV. This reduces overhead
+    by invoking the scheduler less.
+
+    The MultiStepWorker does not support cache swap operations, or beam search.
+    Cache swap operations do not require large modifications. On the other hand,
+    beam search requires memory allocations during sequence forks and thus
+    requires more thought for MultiStepWorker support.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._proposer: Optional[DraftModelTop1Proposer] = None
+
+    def init_model(self):
+        super().init_model()
+
+        self._proposer = DraftModelTop1Proposer(
+            self,
+            self.device,
+            self.max_model_len,
+            self.vocab_size,
+        )
+
+    @torch.inference_mode()
+    def execute_model_multi_step(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+        num_steps: int,
+    ) -> List[SamplerOutput]:
+        """Run the model forward pass num_steps times. Returns the list of
+        sampler output, one per model forward pass.
+        """
+        self._raise_if_unsupported(seq_group_metadata_list, blocks_to_swap_in,
+                                   blocks_to_swap_out, blocks_to_copy)
+
+        # Shallow copy input data so modifications (such as appending tokens)
+        # do not cause side-effects.
+        copied_seq_group_metadata_list = self._shallow_copy_inputs(
+            seq_group_metadata_list)
+
+        # Assert enough KV space for num_steps tokens per sequence.
+        self._assert_enough_kv_space(seq_group_metadata_list, num_steps)
+
+        # Run model num_steps times.
+        model_outputs = []
+        for _ in range(num_steps):
+            model_output = super().execute_model(
+                seq_group_metadata_list=copied_seq_group_metadata_list,
+                blocks_to_swap_in=blocks_to_swap_in,
+                blocks_to_swap_out=blocks_to_swap_out,
+                blocks_to_copy=blocks_to_copy,
+            )
+
+            self._append_new_tokens(model_output,
+                                    copied_seq_group_metadata_list)
+            model_outputs.append(model_output)
+
+        return model_outputs
+
+    def get_spec_proposals(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+        max_proposal_len: int,
+    ) -> SpeculativeProposals:
+        """Produce speculations given an input batch of sequences. The number of
+        speculative tokens per sequence is determined by max_proposal_len.
+        """
+
+        return self._proposer.get_proposals(
+            seq_group_metadata_list,
+            blocks_to_swap_in,
+            blocks_to_swap_out,
+            blocks_to_copy,
+            max_proposal_len,
+        )
+
+    def _append_new_tokens(
+            self, model_output: SamplerOutput,
+            seq_group_metadata_list: SequenceGroupMetadata) -> None:
+        """Given model output from a single run, append the tokens to the
+        sequences. This is normally done outside of the worker, but it is
+        required if the worker is to perform multiple forward passes.
+        """
+        for seq_group_metadata, sequence_group_outputs in zip(
+                seq_group_metadata_list, model_output):
+            seq_group_metadata.is_prompt = False
+
+            for seq_output in sequence_group_outputs.samples:
+                # NOTE: Beam search is not supported, so we can assume that
+                # parent_seq_id == seq_id.
+                seq = seq_group_metadata.seq_data[seq_output.parent_seq_id]
+
+                token_id = seq_output.output_token
+                token_logprob = seq_output.logprobs[token_id]
+
+                seq.append_token_id(token_id, token_logprob.logprob)
+
+    def _shallow_copy_inputs(
+        self, seq_group_metadata_list: List[SequenceGroupMetadata]
+    ) -> List[SequenceGroupMetadata]:
+        """Copy input data structures to remove side-effects when input data
+        structures are shared with other modules.
+
+        Helpful when the vLLM scheduler runs in the same process as the worker.
+        The alternative is deep-copying (or other form of deep copy); this has
+        performance downsides.
+        """
+
+        # Shallow-copy the list of SequenceGroupMetadata. This allows us to
+        # append tokens and change is_prompt without external side-effects.
+        new_seq_group_metadata_list = []
+
+        for old_seq_group_metadata in seq_group_metadata_list:
+            # We must shallow-copy seq_group_metadata as is_prompt could change.
+            seq_group_metadata = copy.copy(old_seq_group_metadata)
+            new_seq_group_metadata_list.append(seq_group_metadata)
+
+            # We must shallow-copy seq_data as we will append token ids
+            new_seq_data = {}
+            for seq_id, old_seq_data in seq_group_metadata.seq_data.items():
+                new_seq_data[seq_id] = copy.copy(old_seq_data)
+                new_seq_data[
+                    seq_id].output_token_ids = old_seq_data.output_token_ids[:]
+
+            seq_group_metadata.seq_data = new_seq_data
+
+        return new_seq_group_metadata_list
+
+    def _assert_enough_kv_space(
+            self, seq_group_metadata_list: List[SequenceGroupMetadata],
+            num_steps: int) -> None:
+        """Assert there are enough physical blocks per sequence to store the
+        current KV plus additional KV from num_steps tokens.
+        """
+        assert self.model_runner.block_size is not None
+        for seq_group_metadata in seq_group_metadata_list:
+            # Only one seq_id is guaranteed because there is no beam search.
+            seq_id = list(seq_group_metadata.seq_data.keys())[0]
+            seq = seq_group_metadata.seq_data[seq_id]
+
+            # After num_steps, the seq len will be the current seq len
+            # plus one token per step.
+            final_seq_len = seq.get_len() + num_steps
+
+            # We will have final_seq_len - 1 KV because vLLM saves KV for a
+            # token in the iteration after the token was generated.
+            required_num_kv_slots = final_seq_len - 1
+
+            # The allocated number of kv slots is the number of allocated blocks
+            # times the number of slots of block.
+            number_physical_blocks = len(
+                seq_group_metadata.block_tables[seq_id])
+            allocated_kv_slots = (number_physical_blocks *
+                                  self.model_runner.block_size)
+
+            if required_num_kv_slots > allocated_kv_slots:
+                request_id = seq_group_metadata.request_id
+                raise ValueError(
+                    "The worker attempted to run "
+                    f"{num_steps} times but found insufficient KV space for "
+                    f"{request_id=} {seq_id=}. ({allocated_kv_slots=} "
+                    f"{required_num_kv_slots=}).")
+
+    def _raise_if_unsupported(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+    ) -> None:
+        """MultiStepWorker does not yet implement support for cache swap
+        operations or beam search.
+        """
+        if any([blocks_to_swap_in, blocks_to_swap_out, blocks_to_copy]):
+            raise NotImplementedError(
+                "MultiStepWorker does not support cache operations")
+
+        if any(
+                len(seq_group_metadata.seq_data.keys()) != 1
+                for seq_group_metadata in seq_group_metadata_list):
+            raise NotImplementedError(
+                "MultiStepWorker does not support beam search.")
+
+
+class DraftModelTop1Proposer(SpeculativeProposer):
+    """Helper class which separates out sequences which would exceed the max
+    model length when speculated upon.
+
+    This allows combinations of models such as JackFram/llama-68m draft with
+    meta-llama/Llama2-13b-chat-hf, as llama-68m has max_position_embeddings of
+    2048 while Llama2-13b has max_position_embeddings of 4096.
+
+    We treat the sequences which exceed the proposal draft model length as
+    "non-spec sequences". Essentially they skip the draft model and go through
+    normal decoding in the target model.
+
+    Currently, only proposal_lens of 0 and k are supported, where k is a global
+    batch proposal length. In the future vLLM should support per-sequence
+    proposal lengths.
+    """
+
+    def __init__(
+        self,
+        draft_worker: MultiStepWorker,
+        device: str,
+        max_model_len: int,
+        vocab_size: int,
+    ):
+        self._draft_worker = draft_worker
+        self._device = device
+        self._max_model_len = max_model_len
+        self._vocab_size = vocab_size
+
+    def get_proposals(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+        max_proposal_len: int,
+    ) -> SpeculativeProposals:
+        """Get speculative proposals given the input batch.
+
+        Sequences which would exceed the max model length are skipped during
+        speculation.
+        """
+
+        # Split speculative- and non-speculative- sequences.
+        proposal_lens, nonzero_proposal_len_seqs, nonzero_proposal_len_indices = self._split_by_max_model_len(
+            seq_group_metadata_list, max_proposal_len)
+
+        if nonzero_proposal_len_seqs:
+            # Speculate tokens using the draft worker for the speculative
+            # sequences.
+            maybe_sampler_output = self._draft_worker.execute_model_multi_step(
+                seq_group_metadata_list=nonzero_proposal_len_seqs,
+                blocks_to_swap_in=blocks_to_swap_in,
+                blocks_to_swap_out=blocks_to_swap_out,
+                blocks_to_copy=blocks_to_copy,
+                num_steps=max_proposal_len,
+            )
+        else:
+            # If no sequences can be speculated, set sampler output to None.
+            maybe_sampler_output = None
+
+        # Combine speculative- and non-speculative sequences into the same
+        # representation.
+        proposal_tokens, proposal_probs, proposal_lens = self._merge_outputs(
+            batch_size=len(seq_group_metadata_list),
+            max_proposal_len=max_proposal_len,
+            maybe_sampler_output=maybe_sampler_output,
+            proposal_lens=proposal_lens,
+            nonzero_proposal_len_indices=nonzero_proposal_len_indices,
+        )
+
+        proposals = SpeculativeProposals(
+            proposal_token_ids=proposal_tokens,
+            proposal_probs=proposal_probs,
+            proposal_lens=proposal_lens,
+        )
+
+        return proposals
+
+    def _split_by_max_model_len(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        max_proposal_len: int,
+    ) -> Tuple[List[int], List[SequenceGroupMetadata], List[int]]:
+        """Determine which sequences would exceed the max model length.
+        """
+
+        proposal_lens: List[int] = []
+        nonzero_proposal_len_seqs: List[SequenceGroupMetadata] = []
+        nonzero_proposal_len_indices: List[int] = []
+        for i, seq_group_metadata in enumerate(seq_group_metadata_list):
+            seq_data = next(iter(seq_group_metadata.seq_data.values()))
+            seq_len = seq_data.get_len()
+
+            # Currently only proposal lens of 0 or the global batch proposal len
+            # are supported.
+            if seq_len + max_proposal_len < self._max_model_len:
+                proposal_lens.append(max_proposal_len)
+                nonzero_proposal_len_seqs.append(seq_group_metadata)
+                nonzero_proposal_len_indices.append(i)
+            else:
+                proposal_lens.append(0)
+
+        return proposal_lens, nonzero_proposal_len_seqs, nonzero_proposal_len_indices
+
+    def _merge_outputs(
+        self,
+        batch_size: int,
+        max_proposal_len: int,
+        maybe_sampler_output: Optional[SamplerOutput],
+        proposal_lens: List[int],
+        nonzero_proposal_len_indices: List[int],
+    ) -> Tuple[torch.Tensor, torch.tensor, torch.Tensor]:
+        """After speculations are produced, merge the speculation results with
+        the skipped sequences.
+        """
+        if maybe_sampler_output is None:
+            # If no speculative tokens, the sampler output will be None.
+            # In this case we return empty tensors.
+            proposal_tokens = torch.zeros(0,
+                                          max_proposal_len,
+                                          dtype=torch.long,
+                                          device=self._device)
+            proposal_probs = torch.zeros(0,
+                                         max_proposal_len,
+                                         self._vocab_size,
+                                         dtype=torch.float32,
+                                         device=self._device)
+            proposal_lens = torch.zeros(len(proposal_lens),
+                                        dtype=torch.long,
+                                        device=self._device)
+            return proposal_tokens, proposal_probs, proposal_lens
+
+        sampler_output = maybe_sampler_output
+
+        proposal_tokens, proposal_probs = sampler_output_to_torch(
+            sampler_output)
+
+        # Now, reformat the output GPU tensors such that each sequence has
+        # a proposal. the proposal can be empty, e.g. [-1, -1, -1]
+
+        entire_proposal_tokens = torch.full(size=(batch_size,
+                                                  *proposal_tokens.shape[1:]),
+                                            fill_value=-1,
+                                            dtype=torch.long,
+                                            device=self._device)
+        entire_proposal_tokens[nonzero_proposal_len_indices] = proposal_tokens
+        entire_proposal_probs = torch.zeros(batch_size,
+                                            *proposal_probs.shape[1:],
+                                            dtype=torch.float32,
+                                            device=self._device)
+        entire_proposal_probs[nonzero_proposal_len_indices] = proposal_probs
+
+        proposal_tokens, proposal_probs = entire_proposal_tokens, entire_proposal_probs
+
+        proposal_lens = torch.zeros(batch_size,
+                                    dtype=torch.long,
+                                    device=self._device)
+        proposal_lens[nonzero_proposal_len_indices] = max_proposal_len
+
+        return proposal_tokens, proposal_probs, proposal_lens
diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py
new file mode 100644
index 0000000000000..890e479202372
--- /dev/null
+++ b/vllm/spec_decode/spec_decode_worker.py
@@ -0,0 +1,372 @@
+from typing import List, Tuple, Optional, Dict
+from functools import cached_property
+
+import torch
+
+from vllm.spec_decode.metrics import AsyncMetricsCollector
+from vllm.sequence import (SamplerOutput, SequenceGroupMetadata,
+                           SequenceGroupOutput, SequenceOutput)
+from vllm.worker.worker import Worker
+from vllm.spec_decode.multi_step_worker import MultiStepWorker
+from vllm.model_executor.layers.rejection_sampler import RejectionSampler
+from vllm.config import CacheConfig
+from vllm.spec_decode.util import nvtx_range, get_all_seq_ids, split_batch_by_proposal_len
+from vllm.spec_decode.interfaces import SpeculativeProposals, SpeculativeScores
+from vllm.spec_decode.batch_expansion import BatchExpansionTop1Scorer
+from vllm.spec_decode.interfaces import SpeculativeScorer
+
+
+class SpecDecodeWorker:
+    """Worker which implements speculative decoding.
+
+    Speculative decoding reduces decoding per-token latency by using a proposal
+    method, such as a small draft model, to speculate ahead of a larger LLM. The
+    probabilities of the speculative tokens are then determined by the larger
+    LLM, after which some verification routine determines which (if any) of the
+    speculative tokens are accepted by the larger LLM.
+
+    See https://github.com/vllm-project/vllm/pull/2188 and 
+    https://github.com/vllm-project/vllm/pull/3103 for more info.
+
+    The current implementation has the following limitations:
+    * Only draft-model proposal is implemented (contributions for more forms are
+        welcome!).
+    * Only top-1 proposal and scoring are implemented. Tree-attention is left as
+        future work.
+    * Only lossless rejection sampling is supported. Contributions adding lossy
+        verification routines are welcome (e.g. Medusa's typical acceptance).
+    * All sequences in a batch must have the same proposal length, or zero. This
+        can be improved by having per-sequence speculation in the future.
+    * The scoring forward pass is done without an MQA kernel, which is
+        suboptimal especially as the batch size, proposal length, and sequence
+        lengths grow. Contributions to add a MQA scoring are welcome once
+        correctness tests pass.
+        More info here https://docs.google.com/document/d/1T-JaS2T1NRfdP51qzqpyakoCXxSXTtORppiwaj5asxA/edit.
+    """
+
+    def __init__(
+        self,
+        proposer_worker: MultiStepWorker,
+        scorer_worker: Worker,
+        rejection_sampler: RejectionSampler,
+        metrics_collector: Optional[AsyncMetricsCollector] = None,
+    ):
+        """
+        Create a SpecDecodeWorker.
+
+        Args:
+            proposer_worker: A worker that can produce speculative tokens for
+                sequences.
+            scorer_worker: A worker that produces probabilities of speculative
+                tokens according to some base model. Typically a vanilla vLLM
+                Worker.
+            rejection_sampler: A Torch module used to perform modified rejection
+                sampling for speculative decoding.
+            metrics_collector: Helper class for collecting metrics; can be set
+                for testing purposes.
+        """
+        self.proposer_worker = proposer_worker
+        self.scorer_worker = scorer_worker
+        self.rejection_sampler = rejection_sampler
+
+        self._metrics = AsyncMetricsCollector(
+            rejection_sampler
+        ) if metrics_collector is None else metrics_collector
+
+        self.probs_dtype = self.rejection_sampler.probs_dtype
+        self.token_id_dtype = self.rejection_sampler.token_id_dtype
+
+        self.scorer: SpeculativeScorer = None
+
+    def init_model(self) -> None:
+        """Initialize both scorer and proposer models.
+        """
+        # The scorer worker model is initialized first in case the proposer
+        # model has a smaller TP degree than the target worker.
+        self.scorer_worker.init_model()
+        self.proposer_worker.init_model()
+
+        self._metrics.init_gpu_tensors(self.rank)
+        self.rejection_sampler.init_gpu_tensors(self.rank)
+        self.scorer = BatchExpansionTop1Scorer(
+            scorer_worker=self.scorer_worker,
+            device=self.device,
+            vocab_size=self._vocab_size)
+
+    def profile_num_available_blocks(self, block_size: int,
+                                     gpu_memory_utilization: float,
+                                     cpu_swap_space: int,
+                                     cache_dtype: str) -> Tuple[int, int]:
+        """Determine the number of cache blocks to use.
+
+        This is done by profiling the scorer model (which is typically the
+        larger of the two). Then the total memory which would be used by the
+        scorer cache is divided evenly between the proposer and scorer model KV,
+        such that the number of blocks is equal in both KV caches.
+        """
+        num_gpu_blocks, num_cpu_blocks = (
+            self.scorer_worker.profile_num_available_blocks(
+                block_size, gpu_memory_utilization, cpu_swap_space,
+                cache_dtype))
+
+        scorer_cache_block_size_bytes = self.scorer_worker.get_cache_block_size_bytes(
+            block_size, cache_dtype)
+        proposer_cache_block_size_bytes = self.proposer_worker.get_cache_block_size_bytes(
+            block_size, cache_dtype)
+
+        new_num_gpu_blocks = split_num_cache_blocks_evenly(
+            scorer_cache_block_size_bytes, proposer_cache_block_size_bytes,
+            num_gpu_blocks)
+        return new_num_gpu_blocks, num_cpu_blocks
+
+    def init_cache_engine(self, cache_config: CacheConfig):
+        """Initialize the cache engine of the scorer and proposer workers.
+        """
+        self.scorer_worker.init_cache_engine(cache_config)
+        self.proposer_worker.init_cache_engine(cache_config)
+
+    @torch.inference_mode()
+    def execute_model(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Optional[Dict[int, int]],
+        blocks_to_swap_out: Optional[Dict[int, int]],
+        blocks_to_copy: Optional[Dict[int, List[int]]],
+        num_spec_tokens: int,
+    ) -> List[SamplerOutput]:
+        """Perform speculative decoding on the input batch.
+        """
+
+        assert seq_group_metadata_list is not None, (
+            "speculative decoding "
+            "requires non-None seq_group_metadata_list")
+
+        # If no spec tokens, call the proposer and scorer workers normally.
+        # Used for prefill.
+        if num_spec_tokens == 0 or len(seq_group_metadata_list) == 0:
+            return self._run_no_spec(
+                seq_group_metadata_list=seq_group_metadata_list,
+                blocks_to_swap_in=blocks_to_swap_in,
+                blocks_to_swap_out=blocks_to_swap_out,
+                blocks_to_copy=blocks_to_copy,
+            )
+
+        return self._run_speculative_decoding_step(
+            seq_group_metadata_list=seq_group_metadata_list,
+            blocks_to_swap_in=blocks_to_swap_in,
+            blocks_to_swap_out=blocks_to_swap_out,
+            blocks_to_copy=blocks_to_copy,
+            k=num_spec_tokens,
+        )
+
+    @nvtx_range("spec_decode_worker._run_no_spec")
+    def _run_no_spec(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Optional[Dict[int, int]],
+        blocks_to_swap_out: Optional[Dict[int, int]],
+        blocks_to_copy: Optional[Dict[int, List[int]]],
+    ) -> List[SamplerOutput]:
+        """Run a prefill step, without any speculation. The input is sent to the
+        proposer and scorer model so that the KV cache is consistent between the
+        two.
+        """
+
+        self.proposer_worker.execute_model(
+            seq_group_metadata_list=seq_group_metadata_list,
+            blocks_to_swap_in=blocks_to_swap_in,
+            blocks_to_swap_out=blocks_to_swap_out,
+            blocks_to_copy=blocks_to_copy,
+            return_python_output=False)
+
+        sampler_output = self.scorer_worker.execute_model(
+            seq_group_metadata_list=seq_group_metadata_list,
+            blocks_to_swap_in=blocks_to_swap_in,
+            blocks_to_swap_out=blocks_to_swap_out,
+            blocks_to_copy=blocks_to_copy,
+        )
+
+        # Clear device tensors from sampler output. This reduces communication
+        # overhead when the engine runs in a different process than the workers.
+        sampler_output.probs = None
+        sampler_output.sampled_tokens = None
+        return [sampler_output]
+
+    @nvtx_range("spec_decode_worker._run_speculative_decoding_step")
+    def _run_speculative_decoding_step(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Optional[Dict[int, int]],
+        blocks_to_swap_out: Optional[Dict[int, int]],
+        blocks_to_copy: Optional[Dict[int, List[int]]],
+        k: int,
+    ) -> List[SamplerOutput]:
+        """Execute a single step of speculative decoding.
+
+        This invokes the proposer worker to get k speculative tokens for each
+        sequence, then scores each speculative token using the scoring worker.
+
+        Returns a list of SamplerOutput, each containing a single token per
+        sequence.
+        """
+
+        # Generate proposals using draft worker.
+        proposals = self.proposer_worker.get_spec_proposals(
+            seq_group_metadata_list, blocks_to_swap_in, blocks_to_swap_out,
+            blocks_to_copy, k)
+
+        proposal_scores = self.scorer.score_proposals(
+            seq_group_metadata_list,
+            blocks_to_swap_in,
+            blocks_to_swap_out,
+            blocks_to_copy,
+            k,
+            proposals,
+        )
+
+        accepted_token_ids = self._verify_tokens(seq_group_metadata_list,
+                                                 proposal_scores, proposals, k)
+
+        return self._create_output_sampler_list(seq_group_metadata_list,
+                                                accepted_token_ids, k)
+
+    @nvtx_range("spec_decode_worker._verify_tokens")
+    def _verify_tokens(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        proposal_scores: SpeculativeScores,
+        proposals: SpeculativeProposals,
+        max_proposal_len: int,
+    ) -> torch.Tensor:
+        """Determine which speculative tokens are accepted using the
+        probabilities of each token according to the proposer and scorer models.
+        """
+        proposal_lens_list = proposals.proposal_lens.tolist()
+
+        # vLLM currently only supports proposal lens equal to zero or the batch
+        # proposal len. This adds some complexity (splitting the batch into spec
+        # and non spec sequences) and should be removed in the future. It can be
+        # done by supporting per-sequence proposal lens.
+        _, spec_indices = split_batch_by_proposal_len(
+            seq_group_metadata_list,
+            proposal_lens_list,
+            select_proposal_len_zero=False)
+        _, non_spec_indices = split_batch_by_proposal_len(
+            seq_group_metadata_list,
+            proposal_lens_list,
+            select_proposal_len_zero=True)
+        original_indices = spec_indices + non_spec_indices
+
+        proposal_probs = proposal_scores.probs[spec_indices, :-1]
+        bonus_token_ids = proposal_scores.token_ids[spec_indices, -1:]
+        non_spec_token_ids = proposal_scores.token_ids[non_spec_indices]
+
+        accepted_token_ids = self.rejection_sampler(
+            proposal_probs,
+            bonus_token_ids,
+            proposals.proposal_probs,
+            proposals.proposal_token_ids,
+        )
+
+        # Append output tokens from non-speculative sequences to
+        # the accepted token ids tensor.
+        non_spec_token_ids = non_spec_token_ids.expand(-1, max_proposal_len +
+                                                       1).clone()
+        non_spec_token_ids[:, 1:] = -1
+        accepted_token_ids = torch.cat(
+            [accepted_token_ids, non_spec_token_ids])
+
+        # Rearrange so that results are in the order of the original seq group
+        # metadata.
+        accepted_token_ids[original_indices] = accepted_token_ids.clone()
+
+        return accepted_token_ids
+
+    def _create_output_sampler_list(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        accepted_token_ids: torch.Tensor,  # shape: [batch_size, k+1]
+        k: int,
+    ) -> List[SamplerOutput]:
+        """Given the accepted token ids, create a list of SamplerOutput.
+
+        The output is padded with -1 tokens such that each sequence has
+        the same number of outputs.
+        """
+        seq_ids = get_all_seq_ids(seq_group_metadata_list)
+
+        # shape: [k+1, batch_size]
+        accepted_token_ids_by_step = accepted_token_ids.transpose(0,
+                                                                  1).tolist()
+        sampler_output_list = []
+        for token_ids_by_step in accepted_token_ids_by_step:
+            if all(token_id == -1 for token_id in token_ids_by_step):
+                break
+
+            step_output_token_ids = []
+            for token_id, seq_id in zip(token_ids_by_step, seq_ids):
+                step_output_token_ids.append(
+                    SequenceGroupOutput(
+                        samples=[
+                            SequenceOutput(
+                                parent_seq_id=seq_id,
+                                output_token=token_id,
+                                # TODO Add verifier logprobs.
+                                logprobs={token_id: 0.0},
+                            )
+                        ],
+                        prompt_logprobs=None,
+                    ))
+            sampler_output_list.append(
+                SamplerOutput(outputs=step_output_token_ids))
+
+        maybe_rejsample_metrics = self._metrics.maybe_collect_rejsample_metrics(
+            k)
+        if maybe_rejsample_metrics is not None:
+            sampler_output_list[
+                0].spec_decode_worker_metrics = maybe_rejsample_metrics
+
+        return sampler_output_list
+
+    @cached_property
+    def _vocab_size(self) -> int:
+        """Get the vocab size of the model and make sure it's consistent between
+        draft and target workers.
+        """
+        vocab_sizes = [
+            worker.vocab_size
+            for worker in [self.proposer_worker, self.scorer_worker]
+        ]
+        assert all(vocab_sizes[0] == vocab_size for vocab_size in vocab_sizes)
+        return vocab_sizes[0]
+
+    @property
+    def rank(self):
+        return self.scorer_worker.rank
+
+    @property
+    def device(self):
+        return self.scorer_worker.device
+
+
+def split_num_cache_blocks_evenly(scorer_cache_block_size_bytes: int,
+                                  proposer_cache_block_size_bytes: int,
+                                  total_num_gpu_blocks: int) -> int:
+    """Given total_num_gpu_blocks, the number of GPU blocks that could be
+    allocate to the target model, this function calculates how many blocks
+    should be given to the draft and target model.
+
+    Note that usually the block size, in bytes, of each model is different,
+    as it's a function of number of KV/layer, number of heads, and hidden
+    dimension size.
+
+    Since the target and draft models allocate the same number of blocks, we
+    simply calculate the number of blocks where if allocated by both models,
+    the total memory usage from KV cache is no larger than the number of
+    blocks allocatable by the target model alone.
+    """
+    new_num_gpu_blocks = int(
+        total_num_gpu_blocks * scorer_cache_block_size_bytes /
+        (proposer_cache_block_size_bytes + scorer_cache_block_size_bytes))
+
+    return new_num_gpu_blocks
diff --git a/vllm/spec_decode/util.py b/vllm/spec_decode/util.py
new file mode 100644
index 0000000000000..2c5f954551905
--- /dev/null
+++ b/vllm/spec_decode/util.py
@@ -0,0 +1,99 @@
+import torch
+from typing import List, Tuple
+from vllm.sequence import SequenceGroupMetadata, SamplerOutput
+from contextlib import contextmanager
+from itertools import chain
+
+SeqId = int
+
+
+def get_all_seq_ids(
+        seq_group_metadata_list: List[SequenceGroupMetadata]) -> List[SeqId]:
+    """Given a list of SequenceGroupMetadata, create a list of all
+    sequence ids.
+    """
+    return list(
+        chain.from_iterable([
+            seq_group_metadata.seq_data.keys()
+            for seq_group_metadata in seq_group_metadata_list
+        ]))
+
+
+def split_batch_by_proposal_len(
+    seq_group_metadata_list: List[SequenceGroupMetadata],
+    proposal_lens: List[int], select_proposal_len_zero: bool
+) -> Tuple[List[SequenceGroupMetadata], List[int]]:
+    """Utility function that splits a batch based on whether the proposal len is
+    zero or not. We should remove this once vLLM supports per-sequence proposal
+    lens in a batch.
+    """
+
+    if select_proposal_len_zero:
+        predicate = lambda proposal_len: proposal_len == 0
+    else:
+        predicate = lambda proposal_len: proposal_len != 0
+
+    indices = [
+        i for i, (_, proposal_len
+                  ) in enumerate(zip(seq_group_metadata_list, proposal_lens))
+        if predicate(proposal_len)
+    ]
+    seq_groups = [
+        seq_group for seq_group, proposal_len in zip(
+            seq_group_metadata_list, proposal_lens) if predicate(proposal_len)
+    ]
+
+    return seq_groups, indices
+
+
+def sampler_output_to_torch(
+    sampler_output_list: List[SamplerOutput],
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Utility function which converts a list of SamplerOutput to tensors.
+
+        Returns:
+            sampled_token_ids: torch.Tensor
+                shape: [batch_size, len(sampler_output_list)]
+
+            sampled_token_probs: torch.Tensor
+                shape: [batch_size, len(sampler_output_list), vocab_size]
+        """
+
+    # shape: [batch_size, num_sampler_output, vocab_size]
+    sampled_token_probs = torch.stack(
+        [
+            sampler_output.sampled_token_probs
+            for sampler_output in sampler_output_list
+        ],
+        dim=0,
+    ).transpose(0, 1)
+
+    # shape: [batch_size, num_sampler_output]
+    sampled_token_ids = torch.stack(
+        [
+            sampler_output.sampled_token_ids.flatten()
+            for sampler_output in sampler_output_list
+        ],
+        dim=0,
+    ).transpose(0, 1)
+
+    return sampled_token_ids, sampled_token_probs
+
+
+@contextmanager
+def nvtx_range(msg, *args, **kwargs):
+    """ 
+    Context manager / decorator that pushes an NVTX range at the beginning
+    of its scope, and pops it at the end. If extra arguments are given,
+    they are passed as arguments to msg.format().
+
+    If running with cuda graphs, you must enable nsys cuda graph profiling.
+
+    Arguments:
+        msg (string): message to associate with the range
+    """
+    torch.cuda.nvtx.range_push(msg.format(*args, **kwargs))
+    try:
+        yield
+    finally:
+        torch.cuda.nvtx.range_pop()
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 9023b0c59b3fb..0dd2309079403 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -97,8 +97,6 @@ def load_model(self) -> None:
             f"Loading model weights took {self.model_memory_usage / float(2**30):.4f} GB"
         )
 
-        vocab_size = self.model.config.vocab_size
-
         if self.lora_config:
             assert hasattr(
                 self.model, "supported_lora_modules"
@@ -111,7 +109,7 @@ def load_model(self) -> None:
             self.lora_manager = LRUCacheWorkerLoRAManager(
                 self.scheduler_config.max_num_seqs,
                 self.scheduler_config.max_num_batched_tokens +
-                self.scheduler_config.max_paddings, vocab_size,
+                self.scheduler_config.max_paddings, self.vocab_size,
                 self.lora_config, self.device, self.model.embedding_modules,
                 self.model.embedding_padding_modules)
             self.model = self.lora_manager.create_lora_manager(self.model)
@@ -607,8 +605,7 @@ def execute_model(
     @torch.inference_mode()
     def profile_run(self) -> None:
         # Enable top-k sampling to reflect the accurate memory usage.
-        vocab_size = self.model_config.get_vocab_size()
-        sampling_params = SamplingParams(top_p=0.99, top_k=vocab_size - 1)
+        sampling_params = SamplingParams(top_p=0.99, top_k=self.vocab_size - 1)
         max_num_batched_tokens = self.scheduler_config.max_num_batched_tokens
         max_num_seqs = self.scheduler_config.max_num_seqs
 
@@ -774,6 +771,10 @@ def __del__(self) -> None:
         self.graph_runners.clear()
         self.cupy_nccl_backend = None
 
+    @property
+    def vocab_size(self) -> int:
+        return self.model_config.get_vocab_size()
+
 
 class CUDAGraphRunner:
 
diff --git a/vllm/worker/spec_decode/multi_step_worker.py b/vllm/worker/spec_decode/multi_step_worker.py
deleted file mode 100644
index ab3e28389a04c..0000000000000
--- a/vllm/worker/spec_decode/multi_step_worker.py
+++ /dev/null
@@ -1,178 +0,0 @@
-from typing import List, Dict
-import copy
-
-import torch
-
-from vllm.sequence import SamplerOutput, SequenceGroupMetadata
-from vllm.worker.worker import Worker
-
-
-class MultiStepWorker(Worker):
-    """The MultiStepWorker is equivalent to a Worker except that it allows
-    multiple forward passes in a single call, assuming the scheduler has
-    allocated enough space to store the additional KV. This reduces overhead
-    by invoking the scheduler less.
-
-    The MultiStepWorker does not support cache swap operations, or beam search.
-    Cache swap operations do not require large modifications. On the other hand,
-    beam search requires memory allocations during sequence forks and thus
-    requires more thought for MultiStepWorker support.
-    """
-
-    @torch.inference_mode()
-    def execute_model_multi_step(
-        self,
-        seq_group_metadata_list: List[SequenceGroupMetadata],
-        blocks_to_swap_in: Dict[int, int],
-        blocks_to_swap_out: Dict[int, int],
-        blocks_to_copy: Dict[int, List[int]],
-        num_steps: int,
-    ) -> List[SamplerOutput]:
-        """Run the model forward pass num_steps times. Returns the list of
-        sampler output, one per model forward pass.
-        """
-        self._raise_if_unsupported(seq_group_metadata_list, blocks_to_swap_in,
-                                   blocks_to_swap_out, blocks_to_copy)
-
-        # Shallow copy input data so modifications (such as appending tokens)
-        # do not cause side-effects.
-        copied_seq_group_metadata_list = self._shallow_copy_inputs(
-            seq_group_metadata_list)
-
-        # Assert enough KV space for num_steps tokens per sequence.
-        self._assert_enough_kv_space(seq_group_metadata_list, num_steps)
-
-        # Run model num_steps times.
-        model_outputs = []
-        for _ in range(num_steps):
-            model_output = super().execute_model(
-                seq_group_metadata_list=copied_seq_group_metadata_list,
-                blocks_to_swap_in=blocks_to_swap_in,
-                blocks_to_swap_out=blocks_to_swap_out,
-                blocks_to_copy=blocks_to_copy,
-            )
-
-            self._append_new_tokens(model_output,
-                                    copied_seq_group_metadata_list)
-            model_outputs.append(model_output)
-
-        return model_outputs
-
-    def _append_new_tokens(
-            self, model_output: SamplerOutput,
-            seq_group_metadata_list: SequenceGroupMetadata) -> None:
-        """Given model output from a single run, append the tokens to the
-        sequences. This is normally done outside of the worker, but it is
-        required if the worker is to perform multiple forward passes.
-        """
-        for seq_group_metadata, sequence_group_outputs in zip(
-                seq_group_metadata_list, model_output):
-            seq_group_metadata.is_prompt = False
-
-            for seq_output in sequence_group_outputs.samples:
-                # NOTE: Beam search is not supported, so we can assume that
-                # parent_seq_id == seq_id.
-                seq = seq_group_metadata.seq_data[seq_output.parent_seq_id]
-
-                token_id = seq_output.output_token
-                token_logprob = seq_output.logprobs[token_id]
-
-                seq.append_token_id(token_id, token_logprob.logprob)
-
-    def _shallow_copy_inputs(
-        self, seq_group_metadata_list: List[SequenceGroupMetadata]
-    ) -> List[SequenceGroupMetadata]:
-        """Copy input data structures to remove side-effects when input data
-        structures are shared with other modules.
-
-        The multi-step worker must be able to append tokens to sequences after
-        a forward pass. This necessitates modification of the data structures
-        used by the worker. Since these data structures are shared with other
-        parts of vLLM, like the scheduler, we must take care not to introduce
-        unexpected side-effects.
-
-        When Ray is used to orchestrate worker processes (such as when the
-        tensor-parallel degree is >1), this is not a problem because the input
-        datastructures will be serialized and created anew in the worker
-        process.
-
-        However, when Ray is not used to orchestrate the worker processes (such
-        as when the tensor-parallel degree is 1), this is a problem. We avoid
-        the problem by shallow-copying the input datastructures (specifically,
-        the parts that will change in multiple steps).
-        """
-
-        # Shallow-copy the list of SequenceGroupMetadata. This allows us to
-        # append tokens and change is_prompt without external side-effects.
-        new_seq_group_metadata_list = []
-
-        for old_seq_group_metadata in seq_group_metadata_list:
-            # We must shallow-copy seq_group_metadata as is_prompt could change.
-            seq_group_metadata = copy.copy(old_seq_group_metadata)
-            new_seq_group_metadata_list.append(seq_group_metadata)
-
-            # We must shallow-copy seq_data as we will append token ids
-            new_seq_data = {}
-            for seq_id, old_seq_data in seq_group_metadata.seq_data.items():
-                new_seq_data[seq_id] = copy.copy(old_seq_data)
-                new_seq_data[
-                    seq_id].output_token_ids = old_seq_data.output_token_ids[:]
-
-            seq_group_metadata.seq_data = new_seq_data
-
-        return new_seq_group_metadata_list
-
-    def _assert_enough_kv_space(
-            self, seq_group_metadata_list: List[SequenceGroupMetadata],
-            num_steps: int) -> None:
-        """Assert there are enough physical blocks per sequence to store the
-        current KV plus additional KV from num_steps tokens.
-        """
-        assert self.model_runner.block_size is not None
-        for seq_group_metadata in seq_group_metadata_list:
-            # Only one seq_id is guaranteed because there is no beam search.
-            seq_id = list(seq_group_metadata.seq_data.keys())[0]
-            seq = seq_group_metadata.seq_data[seq_id]
-
-            # After num_steps, the seq len will be the current seq len
-            # plus one token per step.
-            final_seq_len = seq.get_len() + num_steps
-
-            # We will have final_seq_len - 1 KV because vLLM saves KV for a
-            # token in the iteration after the token was generated.
-            required_num_kv_slots = final_seq_len - 1
-
-            # The allocated number of kv slots is the number of allocated blocks
-            # times the number of slots of block.
-            number_physical_blocks = len(
-                seq_group_metadata.block_tables[seq_id])
-            allocated_kv_slots = (number_physical_blocks *
-                                  self.model_runner.block_size)
-
-            if required_num_kv_slots > allocated_kv_slots:
-                request_id = seq_group_metadata.request_id
-                raise ValueError(
-                    "The worker attempted to run "
-                    f"{num_steps} times but found insufficient KV space for "
-                    f"{request_id=} {seq_id=}. ({allocated_kv_slots=} "
-                    f"{required_num_kv_slots=}).")
-
-    def _raise_if_unsupported(
-        self,
-        seq_group_metadata_list: List[SequenceGroupMetadata],
-        blocks_to_swap_in: Dict[int, int],
-        blocks_to_swap_out: Dict[int, int],
-        blocks_to_copy: Dict[int, List[int]],
-    ) -> None:
-        """MultiStepWorker does not yet implement support for cache swap
-        operations or beam search.
-        """
-        if any([blocks_to_swap_in, blocks_to_swap_out, blocks_to_copy]):
-            raise NotImplementedError(
-                "MultiStepWorker does not support cache operations")
-
-        if any(
-                len(seq_group_metadata.seq_data.keys()) != 1
-                for seq_group_metadata in seq_group_metadata_list):
-            raise NotImplementedError(
-                "MultiStepWorker does not support beam search.")
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
index 157e8c45836b1..0dcd4018afa5f 100644
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -130,8 +130,8 @@ def profile_num_available_blocks(
         # GPU did not change their memory usage during the profiling.
         peak_memory = self.init_gpu_memory - free_gpu_memory
 
-        cache_block_size = CacheEngine.get_cache_block_size(
-            block_size, cache_dtype, self.model_config, self.parallel_config)
+        cache_block_size = self.get_cache_block_size_bytes(
+            block_size, cache_dtype)
         num_gpu_blocks = int(
             (total_gpu_memory * gpu_memory_utilization - peak_memory) //
             cache_block_size)
@@ -232,6 +232,22 @@ def remove_lora(self, lora_id: int) -> bool:
     def list_loras(self) -> Set[int]:
         return self.model_runner.list_loras()
 
+    @property
+    def max_model_len(self) -> int:
+        return self.model_config.max_model_len
+
+    @property
+    def vocab_size(self) -> int:
+        return self.model_runner.vocab_size
+
+    def get_cache_block_size_bytes(self, block_size: int,
+                                   cache_dtype: str) -> int:
+        """Get the size of the KV cache block size in bytes.
+        """
+        return CacheEngine.get_cache_block_size(block_size, cache_dtype,
+                                                self.model_config,
+                                                self.parallel_config)
+
 
 def init_distributed_environment(
     parallel_config: ParallelConfig,

From 0bba88df03754c40bd9135fc2ff9554ffca59c87 Mon Sep 17 00:00:00 2001
From: Terry <149540247+tterrysun@users.noreply.github.com>
Date: Sat, 9 Mar 2024 17:14:16 -0800
Subject: [PATCH 076/196] Enhance lora tests with more layer and rank
 variations (#3243)

---
 csrc/punica/bgmv/bgmv_config.h     |   1 +
 requirements-dev.txt               |   1 +
 tests/lora/test_layer_variation.py | 104 +++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+)
 create mode 100644 tests/lora/test_layer_variation.py

diff --git a/csrc/punica/bgmv/bgmv_config.h b/csrc/punica/bgmv/bgmv_config.h
index 3eb84ceb4d534..4dc90de1ab42a 100644
--- a/csrc/punica/bgmv/bgmv_config.h
+++ b/csrc/punica/bgmv/bgmv_config.h
@@ -14,6 +14,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
     f(in_T, out_T, W_T, narrow, 128) \
     f(in_T, out_T, W_T, narrow, 256) \
     f(in_T, out_T, W_T, narrow, 512) \
+    f(in_T, out_T, W_T, narrow, 768) \
     f(in_T, out_T, W_T, narrow, 1024) \
     f(in_T, out_T, W_T, narrow, 1280) \
     f(in_T, out_T, W_T, narrow, 1728) \
diff --git a/requirements-dev.txt b/requirements-dev.txt
index dfcbfa4253f1c..5502c97d014ac 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -21,6 +21,7 @@ einops # required for MPT
 openai
 requests
 ray
+peft
 
 # Benchmarking
 aiohttp
diff --git a/tests/lora/test_layer_variation.py b/tests/lora/test_layer_variation.py
new file mode 100644
index 0000000000000..1a1da517b2276
--- /dev/null
+++ b/tests/lora/test_layer_variation.py
@@ -0,0 +1,104 @@
+from typing import List, Optional
+import peft
+import pytest
+from random import sample
+import tempfile
+from transformers import AutoModelForCausalLM
+
+import vllm
+from vllm.lora.request import LoRARequest
+from .conftest import cleanup
+
+MODEL_PATH = "Felladrin/Llama-68M-Chat-v1"
+PROMPTS = [
+    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]",
+    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]",
+    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nBioShock is a good role-playing, action-adventure, shooter that released for PlayStation, Xbox, and PC in 2007. It is available on Steam, and it has a Mac release but not a Linux release. [/user] [assistant]",
+]
+
+
+def get_lora_model(model_id: str, target_modules: List[str], rank: int):
+    model = AutoModelForCausalLM.from_pretrained(model_id)
+    lora_config = peft.tuners.lora.LoraConfig(target_modules, rank)
+    lora_model = peft.PeftModel(model, lora_config)
+    return lora_model
+
+
+def do_sample(llm,
+              lora_path: Optional[str] = None,
+              lora_id: Optional[int] = None,
+              logprobs: int = 0,
+              n_tokens: int = 256):
+    prompts = PROMPTS
+    sampling_params = vllm.SamplingParams(temperature=0,
+                                          max_tokens=n_tokens,
+                                          logprobs=logprobs,
+                                          stop=["[/assistant]"])
+    outputs = llm.generate(
+        prompts,
+        sampling_params,
+        lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
+        if lora_id else None)
+    # Print the outputs.
+    generated_texts = []
+    generated_logprobs = []
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        generated_texts.append(generated_text)
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+        generated_logprobs.append([
+            list(logprob.keys()) for out in output.outputs
+            for logprob in out.logprobs
+        ])
+    return generated_logprobs if logprobs else generated_texts
+
+
+SUPPORTED_MODULES = [
+    "qkv_proj", "o_proj", "gate_up_proj", "down_proj", "embed_tokens",
+    "lm_head"
+]
+TARGET_MODULES_LIST = []
+for length in range(2, 6):
+    TARGET_MODULES_LIST.extend(
+        [sample(SUPPORTED_MODULES, length) for _ in range(3)])
+
+
+# Test the correctness when layer and rank are varied
+# step 1: init a base model and serve with LoRA to get the reference results
+# step 2: merge the same LoRA to the base model, serve the merged model
+# step 3: compare the results from step 1 and step 2
+@pytest.mark.parametrize("tp_size", [1])
+@pytest.mark.parametrize("target_modules", TARGET_MODULES_LIST)
+@pytest.mark.parametrize("rank", [8, 16, 32, 64])
+def test_layer_variation_correctness(tp_size, target_modules, rank):
+    llm = vllm.LLM(MODEL_PATH,
+                   enable_lora=True,
+                   max_num_seqs=16,
+                   max_loras=4,
+                   tensor_parallel_size=tp_size,
+                   worker_use_ray=True)
+    model = get_lora_model(MODEL_PATH, target_modules, rank)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        model.save_pretrained(tmpdir)
+        merged_probs = do_sample(llm, tmpdir, 1, logprobs=5, n_tokens=32)
+    del llm
+    cleanup()
+    reference_id_sets = [set(prob[0]) for prob in merged_probs]
+
+    model = get_lora_model(MODEL_PATH, target_modules, rank)
+    with tempfile.TemporaryDirectory() as tmpdir:
+        merged_model = model.merge_and_unload()
+        merged_model.save_pretrained(tmpdir)
+        llm = vllm.LLM(tmpdir,
+                       tokenizer=MODEL_PATH,
+                       enable_lora=False,
+                       max_num_seqs=16,
+                       tensor_parallel_size=tp_size,
+                       worker_use_ray=True)
+    probs = do_sample(llm, logprobs=5, n_tokens=32)
+    del llm
+    cleanup()
+    # verify the top-5 tokens are identical for each token
+    id_sets = [set(prob[0]) for prob in probs]
+    assert id_sets == reference_id_sets

From e4a28e53165902ffc5daf20977c70885d0c05768 Mon Sep 17 00:00:00 2001
From: Douglas Lehr <91553416+dllehr-amd@users.noreply.github.com>
Date: Sun, 10 Mar 2024 17:27:45 -0500
Subject: [PATCH 077/196] [ROCM] Fix blockReduceSum to use correct warp counts
 for ROCm and CUDA (#3262)

---
 csrc/attention/attention_kernels.cu |  8 --------
 csrc/cuda_compat.h                  | 10 ++++++++++
 csrc/reduction_utils.cuh            |  6 +++---
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/csrc/attention/attention_kernels.cu b/csrc/attention/attention_kernels.cu
index b5be3befa07e2..5e61668d5cc1a 100644
--- a/csrc/attention/attention_kernels.cu
+++ b/csrc/attention/attention_kernels.cu
@@ -15,9 +15,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifdef USE_ROCM
-#include <hip/hip_runtime.h>
-#endif
 
 #include <torch/extension.h>
 #include <ATen/cuda/CUDAContext.h>
@@ -31,11 +28,6 @@
 
 #include <algorithm>
 
-#ifndef USE_ROCM
-#define WARP_SIZE 32
-#else
-#define WARP_SIZE warpSize
-#endif
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b))
diff --git a/csrc/cuda_compat.h b/csrc/cuda_compat.h
index aa58dd73c148a..c711d8d1b24b9 100644
--- a/csrc/cuda_compat.h
+++ b/csrc/cuda_compat.h
@@ -1,5 +1,15 @@
 #pragma once
 
+#ifdef USE_ROCM
+#include <hip/hip_runtime.h>
+#endif
+
+#ifndef USE_ROCM
+  #define WARP_SIZE 32
+#else
+  #define WARP_SIZE warpSize
+#endif
+
 #ifndef USE_ROCM
   #define VLLM_LDG(arg) __ldg(arg)
 #else
diff --git a/csrc/reduction_utils.cuh b/csrc/reduction_utils.cuh
index b95ccef162072..210bf0b023ab2 100644
--- a/csrc/reduction_utils.cuh
+++ b/csrc/reduction_utils.cuh
@@ -24,7 +24,7 @@ namespace vllm {
 template<typename T>
 __inline__ __device__ T warpReduceSum(T val) {
 #pragma unroll
-  for (int mask = 16; mask > 0; mask >>= 1)
+  for (int mask = WARP_SIZE/2; mask > 0; mask >>= 1)
     val += VLLM_SHFL_XOR_SYNC(val, mask);
   return val;
 }
@@ -32,7 +32,7 @@ __inline__ __device__ T warpReduceSum(T val) {
 /* Calculate the sum of all elements in a block */
 template<typename T>
 __inline__ __device__ T blockReduceSum(T val) {
-  static __shared__ T shared[32];
+  static __shared__ T shared[WARP_SIZE];
   int lane = threadIdx.x & 0x1f;
   int wid = threadIdx.x >> 5;
 
@@ -45,7 +45,7 @@ __inline__ __device__ T blockReduceSum(T val) {
 
   // Modify from blockDim.x << 5 to blockDim.x / 32. to prevent
   // blockDim.x is not divided by 32
-  val = (threadIdx.x < (blockDim.x / 32.f)) ? shared[lane] : (T)(0.0f);
+  val = (threadIdx.x < (blockDim.x / (WARP_SIZE * 1.0f))) ? shared[lane] : (T)(0.0f);
   val = warpReduceSum<T>(val);
   return val;
 }

From 9e8744a545f34ca636a5113ae98cec5851af56eb Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Mon, 11 Mar 2024 10:17:16 +0800
Subject: [PATCH 078/196] [BugFix] Fix get tokenizer when using ray (#3301)

---
 tests/async_engine/test_async_llm_engine.py   | 3 +++
 vllm/engine/async_llm_engine.py               | 9 +++++++--
 vllm/engine/llm_engine.py                     | 8 +++++++-
 vllm/entrypoints/openai/serving_chat.py       | 2 +-
 vllm/entrypoints/openai/serving_completion.py | 2 +-
 vllm/transformers_utils/tokenizer.py          | 6 ++++--
 6 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/tests/async_engine/test_async_llm_engine.py b/tests/async_engine/test_async_llm_engine.py
index 1e31ff7373031..cb125a7bfec30 100644
--- a/tests/async_engine/test_async_llm_engine.py
+++ b/tests/async_engine/test_async_llm_engine.py
@@ -89,3 +89,6 @@ async def test_new_requests_event():
     await asyncio.sleep(0.01)
     assert engine.engine.add_request_calls == 3
     assert engine.engine.step_calls == old_step_calls + 1
+
+    engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
+    assert engine.get_tokenizer() is not None
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 65ab0c0634176..5629d1a863d04 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -5,6 +5,8 @@
 from typing import (Any, Dict, Iterable, List, Optional, Set, Tuple, Type,
                     Union, AsyncIterator, Callable)
 
+from transformers import PreTrainedTokenizer
+
 from vllm.lora.request import LoRARequest
 from vllm.config import ModelConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
@@ -372,8 +374,11 @@ def _error_callback(self, exc: Exception) -> None:
         self.set_errored(exc)
         self._request_tracker.propagate_exception(exc)
 
-    def get_tokenizer(self):
-        return self.engine.tokenizer.tokenizer
+    async def get_tokenizer(self) -> "PreTrainedTokenizer":
+        if self.engine_use_ray:
+            return await self.engine.get_tokenizer.remote()
+        else:
+            return self.engine.get_tokenizer()
 
     def start_background_loop(self) -> None:
         """Start the background loop."""
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 8484014c9a13f..5b46d9db5649a 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -7,6 +7,8 @@
 from typing import (TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple,
                     Union)
 
+from transformers import PreTrainedTokenizer
+
 import vllm
 from vllm.lora.request import LoRARequest
 from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
@@ -163,7 +165,11 @@ def __reduce__(self):
         # the closure used to initialize Ray worker actors
         raise RuntimeError("LLMEngine should not be pickled!")
 
-    def get_tokenizer_for_seq(self, sequence: Sequence):
+    def get_tokenizer(self) -> "PreTrainedTokenizer":
+        return self.tokenizer.get_lora_tokenizer()
+
+    def get_tokenizer_for_seq(self,
+                              sequence: Sequence) -> "PreTrainedTokenizer":
         return self.tokenizer.get_lora_tokenizer(sequence.lora_request)
 
     def _dispatch_worker(self):
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index ba352f18f6454..7d5603c85e4e9 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -65,7 +65,7 @@ async def create_chat_completion(
             lora_request = self._maybe_get_lora(request)
             guided_decode_logits_processor = (
                 await get_guided_decoding_logits_processor(
-                    request, self.engine.get_tokenizer()))
+                    request, await self.engine.get_tokenizer()))
             if guided_decode_logits_processor:
                 if sampling_params.logits_processors is None:
                     sampling_params.logits_processors = []
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index a8244fd150753..c673b2582c47b 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -126,7 +126,7 @@ async def create_completion(self, request: CompletionRequest,
             lora_request = self._maybe_get_lora(request)
             guided_decode_logit_processor = (
                 await get_guided_decoding_logits_processor(
-                    request, self.engine.get_tokenizer()))
+                    request, await self.engine.get_tokenizer()))
             if guided_decode_logit_processor is not None:
                 if sampling_params.logits_processors is None:
                     sampling_params.logits_processors = []
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 6edc225cdfc80..2600ea2642da2 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -120,7 +120,8 @@ async def encode_async(
 
     def get_lora_tokenizer(
             self,
-            lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer":
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
         if not lora_request or not self.enable_lora:
             return self.tokenizer
         if lora_request.lora_int_id not in self.lora_tokenizers:
@@ -133,7 +134,8 @@ def get_lora_tokenizer(
 
     async def get_lora_tokenizer_async(
             self,
-            lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer":
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
         if not lora_request or not self.enable_lora:
             return self.tokenizer
         if lora_request.lora_int_id not in self.lora_tokenizers:

From 4b59f00e917679337169c88c981f268e6ab96cd6 Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Sun, 10 Mar 2024 19:17:46 -0700
Subject: [PATCH 079/196] [Fix] Fix best_of behavior when n=1 (#3298)

---
 vllm/outputs.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/outputs.py b/vllm/outputs.py
index 4f9eddee11cd4..b8173fd7a0638 100644
--- a/vllm/outputs.py
+++ b/vllm/outputs.py
@@ -87,12 +87,12 @@ def __init__(
 
     @classmethod
     def from_seq_group(cls, seq_group: SequenceGroup) -> "RequestOutput":
-        # Get the top-n sequences.
-        n = seq_group.sampling_params.n
         seqs = seq_group.get_seqs()
-        if n == 1:
+        if len(seqs) == 1:
             top_n_seqs = seqs
         else:
+            # Get the top-n sequences.
+            n = seq_group.sampling_params.n
             if seq_group.sampling_params.use_beam_search:
                 sorting_key = lambda seq: seq.get_beam_search_score(
                     seq_group.sampling_params.length_penalty)

From 2f8844ba08d77af8a64784317055b03a475f6051 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Sun, 10 Mar 2024 19:49:14 -0700
Subject: [PATCH 080/196] Re-enable the 80 char line width limit (#3305)

---
 pyproject.toml                                |   6 +-
 setup.py                                      |   4 +-
 tests/async_engine/test_chat_template.py      |   6 +-
 tests/core/test_block_manager.py              |   3 +-
 tests/entrypoints/test_guided_processors.py   |   4 +-
 tests/entrypoints/test_openai_server.py       |  36 +++---
 tests/kernels/test_moe.py                     |   3 +-
 tests/kernels/test_prefix_prefill.py          |   3 +-
 tests/lora/test_layer_variation.py            |   6 +-
 tests/lora/test_layers.py                     |  15 ++-
 tests/lora/test_llama.py                      |  47 ++++----
 tests/lora/test_mixtral.py                    |  12 +-
 tests/metrics/test_metrics.py                 |  14 ++-
 tests/models/test_marlin.py                   |  15 +--
 tests/prefix_caching/test_prefix_caching.py   |  15 ++-
 tests/samplers/test_logprobs.py               |   4 +-
 tests/samplers/test_sampler.py                |  17 +--
 tests/spec_decode/test_metrics.py             |   6 +-
 tests/spec_decode/test_multi_step_worker.py   |   3 +-
 tests/spec_decode/test_spec_decode_worker.py  |  18 ++-
 vllm/config.py                                |  14 ++-
 vllm/core/block_manager.py                    |  15 ++-
 vllm/core/evictor.py                          |   6 +-
 vllm/core/scheduler.py                        |   8 +-
 vllm/engine/llm_engine.py                     |  27 +++--
 vllm/engine/metrics.py                        |  22 ++--
 vllm/entrypoints/api_server.py                |   8 +-
 vllm/entrypoints/openai/api_server.py         |  33 +++---
 vllm/entrypoints/openai/serving_chat.py       |  25 ++--
 vllm/entrypoints/openai/serving_completion.py |  28 +++--
 vllm/entrypoints/openai/serving_engine.py     |  13 ++-
 vllm/lora/layers.py                           |  14 ++-
 vllm/lora/models.py                           |   3 +-
 vllm/lora/worker_manager.py                   |   7 +-
 vllm/model_executor/guided_decoding.py        |   6 +-
 .../guided_logits_processors.py               |  15 ++-
 .../layers/attention/attention.py             |   4 +-
 .../layers/fused_moe/fused_moe.py             | 107 ++++++++++++------
 vllm/model_executor/layers/linear.py          |  12 +-
 .../layers/quantization/__init__.py           |   3 +-
 .../model_executor/layers/quantization/awq.py |   6 +-
 .../layers/quantization/gptq.py               |  10 +-
 .../layers/quantization/marlin.py             |  39 ++++---
 .../layers/quantization/squeezellm.py         |   3 +-
 vllm/model_executor/layers/sampler.py         |   3 +-
 vllm/model_executor/models/baichuan.py        |   3 +-
 vllm/model_executor/models/deepseek.py        |   8 +-
 vllm/model_executor/models/gpt_j.py           |   3 +-
 vllm/model_executor/models/internlm2.py       |   3 +-
 vllm/model_executor/models/olmo.py            |  19 ++--
 vllm/model_executor/models/qwen2.py           |   3 +-
 vllm/model_executor/models/stablelm.py        |  13 ++-
 vllm/model_executor/models/starcoder2.py      |   3 +-
 vllm/model_executor/neuron_model_loader.py    |   3 +-
 .../parallel_utils/communication_op.py        |   5 +-
 vllm/model_executor/sampling_metadata.py      |   3 +-
 vllm/sampling_params.py                       |   4 +-
 vllm/sequence.py                              |   3 +-
 vllm/spec_decode/batch_expansion.py           |  29 +++--
 vllm/spec_decode/multi_step_worker.py         |  14 ++-
 vllm/spec_decode/spec_decode_worker.py        |  19 ++--
 vllm/transformers_utils/configs/mpt.py        |  89 +++------------
 vllm/transformers_utils/configs/starcoder2.py |  72 ------------
 .../transformers_utils/tokenizers/baichuan.py |  92 +++++++--------
 vllm/utils.py                                 |  12 +-
 vllm/worker/model_runner.py                   |  11 +-
 vllm/worker/neuron_worker.py                  |   6 +-
 67 files changed, 557 insertions(+), 528 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c5db016cebdb7..d6fa5d7a035ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,6 +9,10 @@ requires = [
 ]
 build-backend = "setuptools.build_meta"
 
+[tool.ruff]
+# Allow lines to be as long as 80.
+line-length = 80
+
 [tool.ruff.lint]
 select = [
     # pycodestyle
@@ -29,8 +33,6 @@ ignore = [
     "F405", "F403",
     # lambda expression assignment
     "E731",
-    # line too long, handled by black formatting
-    "E501",
     # .strip() with multi-character strings
     "B005",
     # Loop control variable not used within loop body
diff --git a/setup.py b/setup.py
index 745b5a9b2d02a..023c3cde1910c 100644
--- a/setup.py
+++ b/setup.py
@@ -142,8 +142,8 @@ def get_pytorch_rocm_arch() -> Set[str]:
     # If we don't have PYTORCH_ROCM_ARCH specified pull the list from rocm_agent_enumerator
     if env_arch_list is None:
         command = "rocm_agent_enumerator"
-        env_arch_list = subprocess.check_output([command]).decode('utf-8')\
-                        .strip().replace("\n", ";")
+        env_arch_list = (subprocess.check_output(
+            [command]).decode('utf-8').strip().replace("\n", ";"))
         arch_source_str = "rocm_agent_enumerator"
     else:
         arch_source_str = "PYTORCH_ROCM_ARCH env variable"
diff --git a/tests/async_engine/test_chat_template.py b/tests/async_engine/test_chat_template.py
index 32d110e0f0b47..e98bba8d43b49 100644
--- a/tests/async_engine/test_chat_template.py
+++ b/tests/async_engine/test_chat_template.py
@@ -73,7 +73,7 @@ def test_load_chat_template():
     assert template_content is not None
     # Hard coded value for template_chatml.jinja
     assert template_content == """{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}
-{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""
+{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}"""  # noqa: E501
 
 
 def test_no_load_chat_template():
@@ -117,4 +117,6 @@ async def test_get_gen_prompt(model, template, add_generation_prompt,
         add_generation_prompt=mock_request.add_generation_prompt)
 
     # Test assertion
-    assert result == expected_output, f"The generated prompt does not match the expected output for model {model} and template {template}"
+    assert result == expected_output, (
+        f"The generated prompt does not match the expected output for "
+        f"model {model} and template {template}")
diff --git a/tests/core/test_block_manager.py b/tests/core/test_block_manager.py
index 04d01f7724e4f..b280fd1d73c2f 100644
--- a/tests/core/test_block_manager.py
+++ b/tests/core/test_block_manager.py
@@ -4,7 +4,8 @@
 
 from vllm import SamplingParams
 from vllm.block import PhysicalTokenBlock
-from vllm.core.block_manager import BlockAllocator, BlockSpaceManager, AllocStatus
+from vllm.core.block_manager import (BlockAllocator, BlockSpaceManager,
+                                     AllocStatus)
 from vllm.utils import Device
 from vllm.sequence import Sequence, SequenceGroup, SequenceStatus, Logprob
 
diff --git a/tests/entrypoints/test_guided_processors.py b/tests/entrypoints/test_guided_processors.py
index 5b39269916f8b..4a0e3e759e25a 100644
--- a/tests/entrypoints/test_guided_processors.py
+++ b/tests/entrypoints/test_guided_processors.py
@@ -46,8 +46,8 @@
     "required": ["name", "age", "skills", "work history"]
 }
 
-TEST_REGEX = r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}" + \
-             r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
+TEST_REGEX = (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
+              r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)")
 
 
 def test_guided_logits_processors():
diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py
index f4a6e44d88a87..a5b2bf4c0f0c9 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -5,9 +5,12 @@
 import sys
 import pytest
 import requests
-import ray  # using Ray for overall ease of process management, parallel requests, and debugging.
+# using Ray for overall ease of process management, parallel requests,
+# and debugging.
+import ray
 import openai  # use the official client for correctness check
-from huggingface_hub import snapshot_download  # downloading lora to test lora requests
+# downloading lora to test lora requests
+from huggingface_hub import snapshot_download
 
 # imports for guided decoding tests
 import json
@@ -17,8 +20,11 @@
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
 MAX_SERVER_START_WAIT_S = 600  # wait for server to start for 60 seconds
-MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"  # any model with a chat template should work here
-LORA_NAME = "typeof/zephyr-7b-beta-lora"  # technically this needs Mistral-7B-v0.1 as base, but we're not testing generation quality here
+# any model with a chat template should work here
+MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
+# technically this needs Mistral-7B-v0.1 as base, but we're not testing
+# generation quality here
+LORA_NAME = "typeof/zephyr-7b-beta-lora"
 
 TEST_SCHEMA = {
     "type": "object",
@@ -59,8 +65,8 @@
     "required": ["name", "age", "skills", "work history"]
 }
 
-TEST_REGEX = r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}" + \
-             r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)"
+TEST_REGEX = (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
+              r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)")
 
 TEST_CHOICE = [
     "Python", "Java", "JavaScript", "C++", "C#", "PHP", "TypeScript", "Ruby",
@@ -120,8 +126,9 @@ def server(zephyr_lora_files):
     server_runner = ServerRunner.remote([
         "--model",
         MODEL_NAME,
+        # use half precision for speed and memory savings in CI environment
         "--dtype",
-        "bfloat16",  # use half precision for speed and memory savings in CI environment
+        "bfloat16",
         "--max-model-len",
         "8192",
         "--enforce-eager",
@@ -392,7 +399,8 @@ async def test_batch_completions(server, client: openai.AsyncOpenAI,
         max_tokens=5,
         temperature=0.0,
         extra_body=dict(
-            # NOTE: this has to be true for n > 1 in vLLM, but not necessary for official client.
+            # NOTE: this has to be true for n > 1 in vLLM, but not necessary
+            # for official client.
             use_beam_search=True),
     )
     assert len(batch.choices) == 4
@@ -469,8 +477,8 @@ async def test_logits_bias(server, client: openai.AsyncOpenAI):
 async def test_guided_json_completion(server, client: openai.AsyncOpenAI):
     completion = await client.completions.create(
         model=MODEL_NAME,
-        prompt=
-        f"Give an example JSON for an employee profile that fits this schema: {TEST_SCHEMA}",
+        prompt=f"Give an example JSON for an employee profile "
+        f"that fits this schema: {TEST_SCHEMA}",
         n=3,
         temperature=1.0,
         max_tokens=500,
@@ -489,9 +497,11 @@ async def test_guided_json_chat(server, client: openai.AsyncOpenAI):
         "role": "system",
         "content": "you are a helpful assistant"
     }, {
-        "role": "user",
-        "content": "Give an example JSON for an employee profile that " + \
-                    f"fits this schema: {TEST_SCHEMA}"
+        "role":
+        "user",
+        "content":
+        f"Give an example JSON for an employee profile that "
+        f"fits this schema: {TEST_SCHEMA}"
     }]
     chat_completion = await client.chat.completions.create(
         model=MODEL_NAME,
diff --git a/tests/kernels/test_moe.py b/tests/kernels/test_moe.py
index c402fe3e98c7f..6165225d2d819 100644
--- a/tests/kernels/test_moe.py
+++ b/tests/kernels/test_moe.py
@@ -57,7 +57,8 @@ def test_fused_moe(
                          [torch.float32, torch.float16, torch.bfloat16])
 @torch.inference_mode()
 def test_mixtral_moe(dtype: torch.dtype):
-    "Make sure our Mixtral MoE implementation agrees with the one from huggingface."
+    """Make sure our Mixtral MoE implementation agrees with the one from
+    huggingface."""
 
     # Instantiate our and huggingface's MoE blocks
     config = MixtralConfig()
diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index e881cd1ec3753..a0be658acac7b 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -114,7 +114,8 @@ def test_contexted_kv_attention(
     v_cache = v_cache.view(-1, block_size, num_kv_heads,
                            head_size).permute(0, 2, 3, 1).contiguous()
 
-    # Warm up the Triton kernel by calling it once before actually measuring generation time
+    # Warm up the Triton kernel by calling it once before actually measuring
+    # generation time
     context_attention_fwd(query, k, v, output, k_cache, v_cache, block_table,
                           b_start_loc, b_seq_len, b_ctx_len, max_input_len)
     torch.cuda.synchronize()
diff --git a/tests/lora/test_layer_variation.py b/tests/lora/test_layer_variation.py
index 1a1da517b2276..95cf0cede8729 100644
--- a/tests/lora/test_layer_variation.py
+++ b/tests/lora/test_layer_variation.py
@@ -11,9 +11,9 @@
 
 MODEL_PATH = "Felladrin/Llama-68M-Chat-v1"
 PROMPTS = [
-    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]",
-    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]",
-    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nBioShock is a good role-playing, action-adventure, shooter that released for PlayStation, Xbox, and PC in 2007. It is available on Steam, and it has a Mac release but not a Linux release. [/user] [assistant]",
+    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]",  # noqa: E501
+    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]",  # noqa: E501
+    "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nBioShock is a good role-playing, action-adventure, shooter that released for PlayStation, Xbox, and PC in 2007. It is available on Steam, and it has a Mac release but not a Linux release. [/user] [assistant]",  # noqa: E501
 ]
 
 
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index 18ce300449dbf..46f054c5b84ef 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -17,14 +17,16 @@
     LoRAMapping,
     BaseLayerWithLoRA,
 )
-from vllm.lora.models import LoRALayerWeights, convert_mapping, PackedLoRALayerWeights
+from vllm.lora.models import (LoRALayerWeights, convert_mapping,
+                              PackedLoRALayerWeights)
 from vllm.config import LoRAConfig
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                MergedColumnParallelLinear,
                                                RowParallelLinear,
                                                QKVParallelLinear)
-from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding, ParallelLMHead
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding, ParallelLMHead)
 from vllm.model_executor.utils import set_random_seed
 
 from .utils import DummyLoRAManager
@@ -258,7 +260,8 @@ def create_random_embedding_layer():
 
 
 @torch.inference_mode()
-# @pytest.mark.skip(reason="Fails when loras are in any slot other than the first.")
+# @pytest.mark.skip(
+#     reason="Fails when loras are in any slot other than the first.")
 @pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_embeddings_with_new_embeddings(dist_init, num_loras, device) -> None:
@@ -674,9 +677,9 @@ class FakeConfig:
             result = linear(input_)[0]
             subloras = sublora_dict[lora_id]
             for i, sublora in enumerate(subloras):
-                result[:, sublora.lora_b.shape[1] * i:sublora.lora_b.shape[1] * (
-                    i + 1
-                )] += input_ @ sublora.lora_a @ sublora.lora_b * sublora.scaling
+                result[:, sublora.lora_b.shape[1] * i:sublora.lora_b.shape[1] *
+                       (i + 1)] += (input_ @ sublora.lora_a @ sublora.lora_b *
+                                    sublora.scaling)
             expected_results.append(result)
         expected_result = torch.cat(expected_results)
 
diff --git a/tests/lora/test_llama.py b/tests/lora/test_llama.py
index dfaf8c700695a..130906c3d584d 100644
--- a/tests/lora/test_llama.py
+++ b/tests/lora/test_llama.py
@@ -10,12 +10,12 @@
 
 def do_sample(llm, lora_path: str, lora_id: int):
     prompts = [
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]",
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]",
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]",
-        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]"
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",  # noqa: E501
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",  # noqa: E501
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_95 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a low tone mora with a gloss of /˩okiru/ [òkìɽɯ́]? [/user] [assistant]",  # noqa: E501
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. [/user] [assistant]",  # noqa: E501
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? [/user] [assistant]",  # noqa: E501
+        "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]"  # noqa: E501
     ]
     sampling_params = vllm.SamplingParams(temperature=0,
                                           max_tokens=256,
@@ -48,20 +48,20 @@ def test_llama_lora(sql_lora_files, tp_size):
                    tensor_parallel_size=tp_size)
 
     expected_no_lora_output = [
-        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_75 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_76 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_77 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_78 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user]",
-        " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? ",
-        "\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m",
-        " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ",
-        " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ",
-        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",
+        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_75 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_76 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_77 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_78 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user]",  # noqa: E501
+        " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? ",  # noqa: E501
+        "\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m",  # noqa: E501
+        " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ",  # noqa: E501
+        " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ",  # noqa: E501
+        "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE",  # noqa: E501
     ]
     expected_lora_output = [
-        "  SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",
-        "  SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ",
-        "  SELECT one_mora FROM table_name_95 WHERE gloss = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] AND accented_mora = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] ",
-        "  SELECT sex FROM people WHERE people_id IN (SELECT people_id FROM candidate GROUP BY sex ORDER BY COUNT(people_id) DESC LIMIT 1) ",
-        "  SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ",
-        "  SELECT womens_doubles FROM table_28138035_4 WHERE mens_singles = 'Werner Schlager' "
+        "  SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ",  # noqa: E501
+        "  SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ",  # noqa: E501
+        "  SELECT one_mora FROM table_name_95 WHERE gloss = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] AND accented_mora = 'low tone mora with a gloss of /˩okiru/' [òkìɽɯ́] ",  # noqa: E501
+        "  SELECT sex FROM people WHERE people_id IN (SELECT people_id FROM candidate GROUP BY sex ORDER BY COUNT(people_id) DESC LIMIT 1) ",  # noqa: E501
+        "  SELECT pick FROM table_name_60 WHERE former_wnba_team = 'Minnesota Lynx' ",  # noqa: E501
+        "  SELECT womens_doubles FROM table_28138035_4 WHERE mens_singles = 'Werner Schlager' "  # noqa: E501
     ]
 
     print("lora adapter created")
@@ -121,7 +121,8 @@ def test_llama_tensor_parallel_equality(sql_lora_files):
 
 
 def test_llama_lora_warmup(sql_lora_files):
-    """Test that the LLM initialization works with a warmup LORA path and is more conservative"""
+    """Test that the LLM initialization works with a warmup LORA path and
+    is more conservative"""
 
     @ray.remote(num_gpus=1)
     def get_num_gpu_blocks_lora():
@@ -132,13 +133,15 @@ def get_num_gpu_blocks_lora():
     @ray.remote(num_gpus=1)
     def get_num_gpu_blocks_no_lora():
         llm = vllm.LLM(MODEL_PATH, max_num_seqs=16)
-        num_gpu_blocks_no_lora_warmup = llm.llm_engine.cache_config.num_gpu_blocks
+        num_gpu_blocks_no_lora_warmup = (
+            llm.llm_engine.cache_config.num_gpu_blocks)
         return num_gpu_blocks_no_lora_warmup
 
     num_gpu_blocks_lora_warmup = ray.get(get_num_gpu_blocks_lora.remote())
     num_gpu_blocks_no_lora_warmup = ray.get(
         get_num_gpu_blocks_no_lora.remote())
     assert num_gpu_blocks_lora_warmup < num_gpu_blocks_no_lora_warmup, (
-        "The warmup with lora should be more"
-        " conservative than without lora, therefore the number of memory blocks for the KV cache should be "
+        "The warmup with lora should be more "
+        "conservative than without lora, therefore the number of "
+        "memory blocks for the KV cache should be "
         "less when using lora than when not using lora")
diff --git a/tests/lora/test_mixtral.py b/tests/lora/test_mixtral.py
index e45fb92ab7edf..4d74722aaa926 100644
--- a/tests/lora/test_mixtral.py
+++ b/tests/lora/test_mixtral.py
@@ -9,9 +9,9 @@
 
 def do_sample(llm, lora_path: str, lora_id: int):
     prompts = [
-        "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]",
-        "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]",
-        "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nBioShock is a good role-playing, action-adventure, shooter that released for PlayStation, Xbox, and PC in 2007. It is available on Steam, and it has a Mac release but not a Linux release. [/user] [assistant]",
+        "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]",  # noqa: E501
+        "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]",  # noqa: E501
+        "[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nBioShock is a good role-playing, action-adventure, shooter that released for PlayStation, Xbox, and PC in 2007. It is available on Steam, and it has a Mac release but not a Linux release. [/user] [assistant]",  # noqa: E501
     ]
     sampling_params = vllm.SamplingParams(temperature=0, max_tokens=256)
     outputs = llm.generate(
@@ -42,9 +42,9 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
                    worker_use_ray=True)
 
     expected_lora_output = [
-        "give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])",
-        "give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])",
-        "inform(name[BioShock], release_year[2007], rating[good], genres[action-adventure, role-playing, shooter], platforms[PlayStation, Xbox, PC], available_on_steam[yes], has_linux_release[no], has_mac_release[yes])",
+        "give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])",  # noqa: E501
+        "give_opinion(name[SpellForce 3], release_year[2017], developer[Grimlore Games], rating[poor])",  # noqa: E501
+        "inform(name[BioShock], release_year[2007], rating[good], genres[action-adventure, role-playing, shooter], platforms[PlayStation, Xbox, PC], available_on_steam[yes], has_linux_release[no], has_mac_release[yes])",  # noqa: E501
     ]
 
     assert do_sample(llm, mixtral_lora_files,
diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py
index 410bdfa5c69e2..0ab9c63ce4377 100644
--- a/tests/metrics/test_metrics.py
+++ b/tests/metrics/test_metrics.py
@@ -21,7 +21,8 @@ def test_metric_counter_prompt_tokens(
                              gpu_memory_utilization=0.4)
     tokenizer = vllm_model.model.get_tokenizer()
     prompt_token_counts = [len(tokenizer.encode(p)) for p in example_prompts]
-    # This test needs at least 2 prompts in a batch of different lengths to verify their token count is correct despite padding.
+    # This test needs at least 2 prompts in a batch of different lengths to
+    # verify their token count is correct despite padding.
     assert len(example_prompts) > 1, "at least 2 prompts are required"
     assert prompt_token_counts[0] != prompt_token_counts[1], (
         "prompts of different lengths are required")
@@ -33,8 +34,8 @@ def test_metric_counter_prompt_tokens(
         **stat_logger.labels)._value.get()
 
     assert vllm_prompt_token_count == metric_count, (
-        f"prompt token count: {vllm_prompt_token_count!r}\nmetric: {metric_count!r}"
-    )
+        f"prompt token count: {vllm_prompt_token_count!r}\n"
+        f"metric: {metric_count!r}")
 
 
 @pytest.mark.parametrize("model", MODELS)
@@ -60,9 +61,10 @@ def test_metric_counter_generation_tokens(
     for i in range(len(example_prompts)):
         vllm_output_ids, vllm_output_str = vllm_outputs[i]
         prompt_ids = tokenizer.encode(example_prompts[i])
-        # vllm_output_ids contains both prompt tokens and generation tokens. We're interested only in the count of the generation tokens.
+        # vllm_output_ids contains both prompt tokens and generation tokens.
+        # We're interested only in the count of the generation tokens.
         vllm_generation_count += len(vllm_output_ids) - len(prompt_ids)
 
     assert vllm_generation_count == metric_count, (
-        f"generation token count: {vllm_generation_count!r}\nmetric: {metric_count!r}"
-    )
+        f"generation token count: {vllm_generation_count!r}\n"
+        f"metric: {metric_count!r}")
diff --git a/tests/models/test_marlin.py b/tests/models/test_marlin.py
index f3cc517364f06..a3a1487e62e05 100644
--- a/tests/models/test_marlin.py
+++ b/tests/models/test_marlin.py
@@ -1,7 +1,7 @@
 """Compare the outputs of a GPTQ model to a Marlin model.
 
-Note: GPTQ and Marlin do not have bitwise correctness. 
-As a result, in this test, we just confirm that the top selected tokens of the 
+Note: GPTQ and Marlin do not have bitwise correctness.
+As a result, in this test, we just confirm that the top selected tokens of the
 Marlin/GPTQ models are in the top 3 selections of each other.
 
 Note: Marlin internally uses locks to synchronize the threads. This can
@@ -14,7 +14,8 @@
 import pytest
 import torch
 from dataclasses import dataclass
-from vllm.model_executor.layers.quantization import _QUANTIZATION_CONFIG_REGISTRY
+from vllm.model_executor.layers.quantization import (
+    _QUANTIZATION_CONFIG_REGISTRY)
 
 capability = torch.cuda.get_device_capability()
 capability = capability[0] * 10 + capability[1]
@@ -87,11 +88,11 @@ def test_models(
             if marlin_output_id != gptq_output_id:
                 # Each predicted token must be in top 5 of the other's
                 assert gptq_output_id in marlin_logprobs[idx], (
-                    f"Test{prompt_idx}:\nGPTQ:\t{gptq_output_str!r}\nMarlin:\t{marlin_output_str!r}"
-                )
+                    f"Test{prompt_idx}:\nGPTQ:\t{gptq_output_str!r}\n"
+                    f"Marlin:\t{marlin_output_str!r}")
                 assert marlin_output_id in gptq_logprobs[idx], (
-                    f"Test{prompt_idx}:\nGPTQ:\t{gptq_output_str!r}\nMarlin:\t{marlin_output_str!r}"
-                )
+                    f"Test{prompt_idx}:\nGPTQ:\t{gptq_output_str!r}\n"
+                    f"Marlin:\t{marlin_output_str!r}")
 
                 # Break out since sequences will now diverge.
                 break
diff --git a/tests/prefix_caching/test_prefix_caching.py b/tests/prefix_caching/test_prefix_caching.py
index 7ef8dde7bb8f6..c83551c36ef10 100644
--- a/tests/prefix_caching/test_prefix_caching.py
+++ b/tests/prefix_caching/test_prefix_caching.py
@@ -20,20 +20,23 @@ def test_block_allocator(
                                      num_blocks,
                                      enable_caching=True)
 
-    # Allocate two PysicalTokenBlocks with the same hash and check that they are the same PhysicalTokenBlock
+    # Allocate two PysicalTokenBlocks with the same hash and check
+    # that they are the same PhysicalTokenBlock
     first_block = block_allocator.allocate(block_hash, 0)
     second_block = block_allocator.allocate(block_hash, 0)
     assert (first_block == second_block)
     assert (second_block.ref_count == 2)
 
-    # Free the first_block and confirm that the ref_count is correctly decremented on the second block
+    # Free the first_block and confirm that the ref_count is correctly
+    # decremented on the second block
     block_allocator.free(first_block)
     assert (second_block.ref_count == 1)
 
     # Free the second block
     block_allocator.free(second_block)
 
-    # Reallocate the first block and confirm that, even after the block had its ref_count go to 0, we still get the same block back
+    # Reallocate the first block and confirm that, even after the block
+    # had its ref_count go to 0, we still get the same block back
     first_block = block_allocator.allocate(block_hash, 0)
     assert (first_block == second_block)
     assert (first_block.block_hash == block_hash)
@@ -56,7 +59,8 @@ def test_eviction(num_blocks: int, ):
     for block in blocks:
         block_allocator.free(block)
 
-    # Allocate a new block and confirm that it's the first block freed. I.E The Least Recently Used block
+    # Allocate a new block and confirm that it's the first block freed.
+    # I.E The Least Recently Used block
     new_block_hash = block_size
     new_block = block_allocator.allocate(new_block_hash, 0)
     assert (new_block == blocks[0])
@@ -68,7 +72,8 @@ def test_eviction(num_blocks: int, ):
     assert (realloc_block == blocks[realloc_block_hash])
     assert (realloc_block.block_hash == realloc_block_hash)
 
-    # Allocate a new block and confirm that it's not the realloc_block, since the realloc_block shouldn't be in the free list
+    # Allocate a new block and confirm that it's not the realloc_block,
+    # since the realloc_block shouldn't be in the free list
     new_block_hash = block_size + 1
     new_block = block_allocator.allocate(new_block_hash, 0)
     assert (realloc_block != new_block)
diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py
index 1abb55f021214..14f1872c45258 100644
--- a/tests/samplers/test_logprobs.py
+++ b/tests/samplers/test_logprobs.py
@@ -70,8 +70,8 @@ def test_get_prompt_logprobs(
                                            hf_logprob[i][-1][token_id].item(),
                                            atol=1e-2,
                                            rtol=1e-2)
-                assert isinstance(sample_logprob.decoded_token, str), \
-                    ("The token should be decoded by the time it is returned "
+                assert isinstance(sample_logprob.decoded_token, str), (
+                    "The token should be decoded by the time it is returned "
                     " to the user.")
 
 
diff --git a/tests/samplers/test_sampler.py b/tests/samplers/test_sampler.py
index 31e865f42ff3b..1bc8703d1a8e0 100644
--- a/tests/samplers/test_sampler.py
+++ b/tests/samplers/test_sampler.py
@@ -255,9 +255,10 @@ def test_sampling(model_runner: ModelRunner):
             if metadata.sampling_params.use_beam_search:
                 continue
 
-            if metadata.sampling_params.seed is not None \
-                    and expected_tokens[i] is None:
-                # Record seeded random result to compare with results of second invocation
+            if (metadata.sampling_params.seed is not None
+                    and expected_tokens[i] is None):
+                # Record seeded random result to compare with results of
+                # second invocation
                 expected_tokens[i] = [
                     nth_output.output_token
                     for nth_output in sequence_output.samples
@@ -265,11 +266,13 @@ def test_sampling(model_runner: ModelRunner):
                 continue
 
             for n, nth_output in enumerate(sequence_output.samples):
-                if metadata.sampling_params.temperature == 0 or metadata.sampling_params.seed is not None:
+                if (metadata.sampling_params.temperature == 0
+                        or metadata.sampling_params.seed is not None):
                     # Ensure exact matches for greedy or random with seed
                     assert nth_output.output_token == expected_tokens[i][n]
                 else:
-                    # For non-seeded random check that one of the high-logit tokens were chosen
+                    # For non-seeded random check that one of the high-logit
+                    # tokens were chosen
                     assert nth_output.output_token in expected_tokens[i]
 
     # Test batch
@@ -284,8 +287,8 @@ def test_sampling(model_runner: ModelRunner):
     input_tensor.data = input_tensor.index_select(0, target_index)
     fake_logits.data = fake_logits.index_select(0, target_index)
 
-    # This time, results of seeded random samples will be compared with the corresponding
-    # sample in the pre-shuffled batch
+    # This time, results of seeded random samples will be compared with
+    # the corresponding sample in the pre-shuffled batch
     test_sampling(model_runner)
 
     del model_runner
diff --git a/tests/spec_decode/test_metrics.py b/tests/spec_decode/test_metrics.py
index 941ea37aa81e0..09847136d13e9 100644
--- a/tests/spec_decode/test_metrics.py
+++ b/tests/spec_decode/test_metrics.py
@@ -150,8 +150,10 @@ def test_initial_metrics_has_correct_values(has_data: bool):
     assert metrics.emitted_tokens == num_emitted_tokens
 
     if has_data:
-        assert metrics.draft_acceptance_rate == num_accepted_tokens / num_draft_tokens
-        assert metrics.system_efficiency == num_emitted_tokens / num_possible_tokens
+        assert (metrics.draft_acceptance_rate == num_accepted_tokens /
+                num_draft_tokens)
+        assert (metrics.system_efficiency == num_emitted_tokens /
+                num_possible_tokens)
     else:
         assert math.isnan(metrics.draft_acceptance_rate)
         assert math.isnan(metrics.system_efficiency)
diff --git a/tests/spec_decode/test_multi_step_worker.py b/tests/spec_decode/test_multi_step_worker.py
index 88bb7c293fe95..45b43ec59ee8f 100644
--- a/tests/spec_decode/test_multi_step_worker.py
+++ b/tests/spec_decode/test_multi_step_worker.py
@@ -3,7 +3,8 @@
 import pytest
 from unittest.mock import MagicMock
 
-from vllm.spec_decode.multi_step_worker import MultiStepWorker, DraftModelTop1Proposer
+from vllm.spec_decode.multi_step_worker import (MultiStepWorker,
+                                                DraftModelTop1Proposer)
 from vllm.worker.worker import Worker
 from vllm.model_executor.utils import set_random_seed
 from vllm.sequence import SamplerOutput
diff --git a/tests/spec_decode/test_spec_decode_worker.py b/tests/spec_decode/test_spec_decode_worker.py
index e919711c3ed2c..bfc69e01e3eb9 100644
--- a/tests/spec_decode/test_spec_decode_worker.py
+++ b/tests/spec_decode/test_spec_decode_worker.py
@@ -4,12 +4,15 @@
 from unittest.mock import MagicMock
 
 from vllm.spec_decode.multi_step_worker import MultiStepWorker
-from vllm.spec_decode.spec_decode_worker import SpecDecodeWorker, split_num_cache_blocks_evenly
+from vllm.spec_decode.spec_decode_worker import (SpecDecodeWorker,
+                                                 split_num_cache_blocks_evenly)
 from vllm.spec_decode.interfaces import SpeculativeProposals
 from vllm.model_executor.utils import set_random_seed
 from vllm.model_executor.layers.rejection_sampler import RejectionSampler
-from .utils import mock_worker, create_batch, ExecuteModelData, create_sampler_output_list
-from vllm.spec_decode.metrics import SpecDecodeWorkerMetrics, AsyncMetricsCollector
+from .utils import (mock_worker, create_batch, ExecuteModelData,
+                    create_sampler_output_list)
+from vllm.spec_decode.metrics import (SpecDecodeWorkerMetrics,
+                                      AsyncMetricsCollector)
 
 
 @pytest.mark.parametrize('k', [1, 2, 6])
@@ -391,13 +394,15 @@ def test_collects_metrics(k: int, batch_size: int, returns_metrics: bool):
 
     mock_rejsample_metrics = MagicMock(
         spec=SpecDecodeWorkerMetrics) if returns_metrics else None
-    metrics_collector.maybe_collect_rejsample_metrics.return_value = mock_rejsample_metrics
+    metrics_collector.maybe_collect_rejsample_metrics.return_value = (
+        mock_rejsample_metrics)
 
     output = worker.execute_model(**execute_model_data.to_dict(),
                                   num_spec_tokens=k)
     assert output[0].spec_decode_worker_metrics == mock_rejsample_metrics
 
-    call_args_list = metrics_collector.maybe_collect_rejsample_metrics.call_args_list
+    call_args_list = (
+        metrics_collector.maybe_collect_rejsample_metrics.call_args_list)
     assert len(call_args_list) == 1
     args, kwargs = call_args_list[0]
     assert args[0] == k or kwargs.get('k', -1) == k
@@ -547,7 +552,8 @@ def test_profile_num_available_blocks(available_gpu_blocks: int,
 
     target_worker.profile_num_available_blocks.return_value = (
         available_gpu_blocks, available_cpu_blocks)
-    target_worker.get_cache_block_size_bytes.return_value = target_cache_block_size_bytes
+    target_worker.get_cache_block_size_bytes.return_value = (
+        target_cache_block_size_bytes)
     draft_worker.get_cache_block_size_bytes.return_value = draft_kv_size_bytes
 
     worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
diff --git a/vllm/config.py b/vllm/config.py
index ef9a920f29c2a..e893fe702c975 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -45,7 +45,7 @@ class ModelConfig:
             a tag name, or a commit id. If unspecified, will use the default
             version.
         code_revision: The specific revision to use for the model code on
-            Hugging Face Hub. It can be a branch name, a tag name, or a 
+            Hugging Face Hub. It can be a branch name, a tag name, or a
             commit id. If unspecified, will use the default version.
         tokenizer_revision: The specific tokenizer version to use. It can be a
             branch name, a tag name, or a commit id. If unspecified, will use
@@ -189,8 +189,8 @@ def _verify_quantization(self) -> None:
             if is_hip(
             ) and self.quantization in rocm_not_supported_quantization:
                 raise ValueError(
-                    f"{self.quantization} quantization is currently not supported "
-                    f"in ROCm.")
+                    f"{self.quantization} quantization is currently not "
+                    f"supported in ROCm.")
             if self.quantization != "marlin":
                 logger.warning(
                     f"{self.quantization} quantization is not fully "
@@ -321,7 +321,8 @@ def __init__(
         self.num_cpu_blocks = None
 
     def metrics_info(self):
-        # convert cache_config to dict(key: str, value: str) for prometheus metrics info
+        # convert cache_config to dict(key: str, value: str) for prometheus
+        # metrics info
         return {key: str(value) for key, value in self.__dict__.items()}
 
     def _verify_args(self) -> None:
@@ -399,8 +400,9 @@ def __init__(
     ) -> None:
         self.pipeline_parallel_size = pipeline_parallel_size
         if is_neuron():
-            # For Neuron device support, here we assign TP=1 to avoid sharding within vLLM directly.
-            # Transformer-neuronx would take neuron_tp_degree attribute, and distribute the workload
+            # For Neuron device support, here we assign TP=1 to avoid sharding
+            # within vLLM directly. Transformer-neuronx would take
+            # neuron_tp_degree attribute, and distribute the workload
             # to multiple NeuronCores.
             self.tensor_parallel_size = 1
             self.neuron_tp_degree = tensor_parallel_size
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 52b120f227eda..8bfc14999f0a7 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -95,13 +95,15 @@ def free(self, block: PhysicalTokenBlock) -> None:
                 del self.cached_blocks[block.block_hash]
 
     def get_num_free_blocks(self) -> int:
-        return self.num_blocks - self.current_num_blocks + self.evictor.num_blocks
+        return (self.num_blocks - self.current_num_blocks +
+                self.evictor.num_blocks)
 
     def contains_block(self, block_hash: int) -> bool:
         return block_hash in self.cached_blocks or block_hash in self.evictor
 
     def update_hash(self, block_hash: int, block: PhysicalTokenBlock):
-        # If caching is enabled, update the hash of block and the cached_blocks dictionary.
+        # If caching is enabled, update the hash of block and the
+        # cached_blocks dictionary.
         if self.enable_caching:
             assert not self.contains_block(block_hash)
             old_hash = block.block_hash
@@ -218,10 +220,12 @@ def _promote_last_block(
         seq: Sequence,
         last_block: PhysicalTokenBlock,
     ) -> PhysicalTokenBlock:
-        # Compute a new hash for the block so that it can be shared by other Sequences
+        # Compute a new hash for the block so that it can be shared by
+        # other Sequences
         new_hash = seq.hash_of_block(len(seq.logical_token_blocks) - 1)
 
-        # if new_hash is already in the cached table, then free last_block and return the cached version
+        # if new_hash is already in the cached table, then free last_block
+        # and return the cached version
         if self.gpu_allocator.contains_block(new_hash):
             self.gpu_allocator.free(last_block)
             return self.gpu_allocator.allocate(new_hash)
@@ -289,7 +293,8 @@ def append_slot(
         assert last_block.device == Device.GPU
         if last_block.ref_count == 1:
             # Not shared with other sequences. Appendable.
-            # If the last block is now complete, promote it to a full block so that it can be shared
+            # If the last block is now complete, promote it to a full block so
+            # that it can be shared
             new_block = self._maybe_promote_last_block(seq, last_block)
             block_table[-1] = new_block
             return None
diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py
index b538ea574b604..1d81f5a97d71c 100644
--- a/vllm/core/evictor.py
+++ b/vllm/core/evictor.py
@@ -39,9 +39,9 @@ def add(self, block: PhysicalTokenBlock):
     @abstractmethod
     def remove(self, block_hash: int) -> PhysicalTokenBlock:
         """Simply removes the block with the hash value block_hash from the
-        evictor. Caller is responsible for making sure that block_hash is contained
-        in the evictor before calling remove. Should be used to "bring back" blocks
-        that have been freed but not evicted yet.
+        evictor. Caller is responsible for making sure that block_hash is
+        contained in the evictor before calling remove. Should be used to
+        "bring back" blocks that have been freed but not evicted yet.
         """
         pass
 
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index c96c6d62ef19d..9255f91be55cb 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -214,8 +214,8 @@ def _schedule(self) -> SchedulerOutputs:
                 lora_int_id = 0
                 if self.lora_enabled:
                     lora_int_id = seq_group.lora_int_id
-                    if lora_int_id > 0 and lora_int_id not in curr_loras and len(
-                            curr_loras) >= self.lora_config.max_loras:
+                    if (lora_int_id > 0 and lora_int_id not in curr_loras
+                            and len(curr_loras) >= self.lora_config.max_loras):
                         # We don't have a space for another LoRA, so
                         # we ignore this request for now.
                         leftover_waiting_sequences.appendleft(seq_group)
@@ -309,8 +309,8 @@ def _schedule(self) -> SchedulerOutputs:
                 lora_int_id = 0
                 if self.lora_enabled:
                     lora_int_id = seq_group.lora_int_id
-                    if lora_int_id > 0 and lora_int_id not in curr_loras and len(
-                            curr_loras) >= self.lora_config.max_loras:
+                    if (lora_int_id > 0 and lora_int_id not in curr_loras
+                            and len(curr_loras) >= self.lora_config.max_loras):
                         # We don't have a space for another LoRA, so
                         # we ignore this request for now.
                         leftover_swapped.appendleft(seq_group)
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 5b46d9db5649a..6e045cd6d73c6 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -100,7 +100,8 @@ def __init__(
             f"download_dir={model_config.download_dir!r}, "
             f"load_format={model_config.load_format}, "
             f"tensor_parallel_size={parallel_config.tensor_parallel_size}, "
-            f"disable_custom_all_reduce={parallel_config.disable_custom_all_reduce}, "
+            f"disable_custom_all_reduce="
+            f"{parallel_config.disable_custom_all_reduce}, "
             f"quantization={model_config.quantization}, "
             f"enforce_eager={model_config.enforce_eager}, "
             f"kv_cache_dtype={cache_config.cache_dtype}, "
@@ -929,7 +930,8 @@ def _get_stats(self,
             # Latency Timings.
             time_last_iters = []
             for seq_group in scheduler_outputs.scheduled_seq_groups:
-                # Time since last token. (n.b. updates seq_group.metrics.last_token_time)
+                # Time since last token.
+                # (n.b. updates seq_group.metrics.last_token_time)
                 time_last_iters.append(seq_group.get_last_latency(now))
                 # Time since arrival for all finished requests.
                 if seq_group.is_finished():
@@ -961,16 +963,17 @@ def _decode_logprobs(self, seq: Sequence, prms: SamplingParams,
         for token_id, sample_logprob in logprobs.items():
             if (sample_logprob.decoded_token is None and token_id != -1):
                 all_input_ids_with_logprob = all_input_ids[:-1] + [token_id]
-                _, new_text, prefix_offset, read_offset = detokenize_incrementally(
-                    self.get_tokenizer_for_seq(seq),
-                    all_input_ids=all_input_ids_with_logprob,
-                    prev_tokens=seq.tokens,
-                    prefix_offset=seq.prefix_offset,
-                    read_offset=seq.read_offset,
-                    skip_special_tokens=prms.skip_special_tokens,
-                    spaces_between_special_tokens=prms.
-                    spaces_between_special_tokens,
-                )
+                (_, new_text, prefix_offset,
+                 read_offset) = detokenize_incrementally(
+                     self.get_tokenizer_for_seq(seq),
+                     all_input_ids=all_input_ids_with_logprob,
+                     prev_tokens=seq.tokens,
+                     prefix_offset=seq.prefix_offset,
+                     read_offset=seq.read_offset,
+                     skip_special_tokens=prms.skip_special_tokens,
+                     spaces_between_special_tokens=prms.
+                     spaces_between_special_tokens,
+                 )
                 sample_logprob.decoded_token = new_text
 
     def _decode_sequence(self, seq: Sequence, prms: SamplingParams) -> None:
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
index d31542159e4a4..17b1852f5b0a3 100644
--- a/vllm/engine/metrics.py
+++ b/vllm/engine/metrics.py
@@ -1,5 +1,6 @@
 from vllm.logger import init_logger
-from prometheus_client import Counter, Gauge, Histogram, Info, REGISTRY, disable_created_metrics
+from prometheus_client import (Counter, Gauge, Histogram, Info, REGISTRY,
+                               disable_created_metrics)
 
 import time
 import numpy as np
@@ -177,10 +178,12 @@ def _log_prometheus(self, stats: Stats) -> None:
     def _log_prometheus_interval(self, prompt_throughput: float,
                                  generation_throughput: float) -> None:
         # Logs metrics to prometheus that are computed every logging_interval.
-        # Support legacy gauge metrics that make throughput calculations on the vLLM side.
-        # Moving forward, we should use counters like counter_prompt_tokens, counter_generation_tokens
-        # Which log raw data and calculate summaries using rate() on the grafana/prometheus side.
-        # See https://github.com/vllm-project/vllm/pull/2316#discussion_r1464204666
+        # Support legacy gauge metrics that make throughput calculations on
+        # the vLLM side. Moving forward, we should use counters like
+        # counter_prompt_tokens, counter_generation_tokens
+        # Which log raw data and calculate summaries using rate() on the
+        # grafana/prometheus side. See
+        # https://github.com/vllm-project/vllm/pull/2316#discussion_r1464204666
         self.metrics.gauge_avg_prompt_throughput.labels(
             **self.labels).set(prompt_throughput)
         self.metrics.gauge_avg_generation_throughput.labels(
@@ -188,7 +191,7 @@ def _log_prometheus_interval(self, prompt_throughput: float,
 
     def log(self, stats: Stats) -> None:
         """Called by LLMEngine.
-           Logs to prometheus and tracked stats every iteration. 
+           Logs to prometheus and tracked stats every iteration.
            Logs to Stdout every self.local_interval seconds."""
 
         # Log to prometheus.
@@ -200,8 +203,8 @@ def log(self, stats: Stats) -> None:
 
         # Log locally every local_interval seconds.
         if self._local_interval_elapsed(stats.now):
-
-            # Compute summary metrics for tracked stats (and log them to promethus if applicable).
+            # Compute summary metrics for tracked stats (and log them
+            # to promethus if applicable).
             prompt_throughput = self._get_throughput(self.num_prompt_tokens,
                                                      now=stats.now)
             generation_throughput = self._get_throughput(
@@ -213,7 +216,8 @@ def log(self, stats: Stats) -> None:
             # Log to stdout.
             logger.info(
                 f"Avg prompt throughput: {prompt_throughput:.1f} tokens/s, "
-                f"Avg generation throughput: {generation_throughput:.1f} tokens/s, "
+                f"Avg generation throughput: "
+                f"{generation_throughput:.1f} tokens/s, "
                 f"Running: {stats.num_running} reqs, "
                 f"Swapped: {stats.num_swapped} reqs, "
                 f"Pending: {stats.num_waiting} reqs, "
diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py
index 1eb4ab8b06b64..86b6c4c67cfa4 100644
--- a/vllm/entrypoints/api_server.py
+++ b/vllm/entrypoints/api_server.py
@@ -1,7 +1,9 @@
 """
-NOTE: This API server is used only for demonstrating usage of AsyncEngine and simple performance benchmarks.
-It is not intended for production use. For production use, we recommend using our OpenAI compatible server.
-We are also not going to accept PRs modifying this file, please change `vllm/entrypoints/openai/api_server.py` instead.
+NOTE: This API server is used only for demonstrating usage of AsyncEngine
+and simple performance benchmarks. It is not intended for production use.
+For production use, we recommend using our OpenAI compatible server.
+We are also not going to accept PRs modifying this file, please
+change `vllm/entrypoints/openai/api_server.py` instead.
 """
 
 import argparse
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 9f29b4ac92f48..00407bc0e809c 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -18,7 +18,9 @@
 import vllm
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
-from vllm.entrypoints.openai.protocol import CompletionRequest, ChatCompletionRequest, ErrorResponse
+from vllm.entrypoints.openai.protocol import (CompletionRequest,
+                                              ChatCompletionRequest,
+                                              ErrorResponse)
 from vllm.logger import init_logger
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
 from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
@@ -84,13 +86,11 @@ def parse_args():
                         type=json.loads,
                         default=["*"],
                         help="allowed headers")
-    parser.add_argument(
-        "--api-key",
-        type=str,
-        default=None,
-        help=
-        "If provided, the server will require this key to be presented in the header."
-    )
+    parser.add_argument("--api-key",
+                        type=str,
+                        default=None,
+                        help="If provided, the server will require this key "
+                        "to be presented in the header.")
     parser.add_argument("--served-model-name",
                         type=str,
                         default=None,
@@ -103,9 +103,8 @@ def parse_args():
         default=None,
         nargs='+',
         action=LoRAParserAction,
-        help=
-        "LoRA module configurations in the format name=path. Multiple modules can be specified."
-    )
+        help="LoRA module configurations in the format name=path. "
+        "Multiple modules can be specified.")
     parser.add_argument("--chat-template",
                         type=str,
                         default=None,
@@ -138,9 +137,10 @@ def parse_args():
         help="Additional ASGI middleware to apply to the app. "
         "We accept multiple --middleware arguments. "
         "The value should be an import path. "
-        "If a function is provided, vLLM will add it to the server using @app.middleware('http'). "
-        "If a class is provided, vLLM will add it to the server using app.add_middleware(). "
-    )
+        "If a function is provided, vLLM will add it to the server "
+        "using @app.middleware('http'). "
+        "If a class is provided, vLLM will add it to the server "
+        "using app.add_middleware(). ")
 
     parser = AsyncEngineArgs.add_cli_args(parser)
     return parser.parse_args()
@@ -235,9 +235,8 @@ async def authentication(request: Request, call_next):
         elif inspect.iscoroutinefunction(imported):
             app.middleware("http")(imported)
         else:
-            raise ValueError(
-                f"Invalid middleware {middleware}. Must be a function or a class."
-            )
+            raise ValueError(f"Invalid middleware {middleware}. "
+                             f"Must be a function or a class.")
 
     logger.info(f"vLLM API server version {vllm.__version__}")
     logger.info(f"args: {args}")
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index 7d5603c85e4e9..d2fb9ca001b15 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -12,7 +12,8 @@
     UsageInfo)
 from vllm.outputs import RequestOutput
 from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA
-from vllm.model_executor.guided_decoding import get_guided_decoding_logits_processor
+from vllm.model_executor.guided_decoding import (
+    get_guided_decoding_logits_processor)
 
 logger = init_logger(__name__)
 
@@ -37,8 +38,9 @@ async def create_chat_completion(
                ChatCompletionResponse]:
         """Completion API similar to OpenAI's API.
 
-        See  https://platform.openai.com/docs/api-reference/chat/create
-        for the API specification. This API mimics the OpenAI ChatCompletion API.
+        See https://platform.openai.com/docs/api-reference/chat/create
+        for the API specification. This API mimics the OpenAI
+        ChatCompletion API.
 
         NOTE: Currently we do not support the following feature:
             - function_call (Users should implement this by themselves)
@@ -116,7 +118,8 @@ async def chat_completion_stream_generator(
                 # the result_generator, it needs to be sent as the FIRST
                 # response (by the try...catch).
                 if first_iteration:
-                    # Send first response for each request.n (index) with the role
+                    # Send first response for each request.n (index) with
+                    # the role
                     role = self.get_chat_request_role(request)
                     for i in range(request.n):
                         choice_data = ChatCompletionResponseStreamChoice(
@@ -133,7 +136,8 @@ async def chat_completion_stream_generator(
                         data = chunk.model_dump_json(exclude_unset=True)
                         yield f"data: {data}\n\n"
 
-                    # Send response to echo the input portion of the last message
+                    # Send response to echo the input portion of the
+                    # last message
                     if request.echo:
                         last_msg_content = ""
                         if request.messages and isinstance(
@@ -145,11 +149,12 @@ async def chat_completion_stream_generator(
 
                         if last_msg_content:
                             for i in range(request.n):
-                                choice_data = ChatCompletionResponseStreamChoice(
-                                    index=i,
-                                    delta=DeltaMessage(
-                                        content=last_msg_content),
-                                    finish_reason=None)
+                                choice_data = (
+                                    ChatCompletionResponseStreamChoice(
+                                        index=i,
+                                        delta=DeltaMessage(
+                                            content=last_msg_content),
+                                        finish_reason=None))
                                 chunk = ChatCompletionStreamResponse(
                                     id=request_id,
                                     object=chunk_object_type,
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index c673b2582c47b..b78f053800f3c 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -1,7 +1,8 @@
 import asyncio
 import time
 from fastapi import Request
-from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional, Dict, Tuple
+from typing import (AsyncGenerator, AsyncIterator, Callable, List, Optional,
+                    Dict, Tuple)
 from vllm.logger import init_logger
 from vllm.utils import random_uuid
 from vllm.engine.async_llm_engine import AsyncLLMEngine
@@ -16,7 +17,8 @@
 )
 from vllm.outputs import RequestOutput
 from vllm.entrypoints.openai.serving_engine import OpenAIServing, LoRA
-from vllm.model_executor.guided_decoding import get_guided_decoding_logits_processor
+from vllm.model_executor.guided_decoding import (
+    get_guided_decoding_logits_processor)
 
 logger = init_logger(__name__)
 
@@ -44,9 +46,8 @@ def parse_prompt_format(prompt) -> Tuple[bool, list]:
             prompt_is_tokens = True
             prompts = prompt  # case 4: array of token arrays
         else:
-            raise ValueError(
-                "prompt must be a string, array of strings, array of tokens, or array of token arrays"
-            )
+            raise ValueError("prompt must be a string, array of strings, "
+                             "array of tokens, or array of token arrays")
     return prompt_is_tokens, prompts
 
 
@@ -156,7 +157,8 @@ async def create_completion(self, request: CompletionRequest,
             int, RequestOutput]] = merge_async_iterators(*generators)
 
         # Similar to the OpenAI API, when n != best_of, we do not stream the
-        # results. In addition, we do not stream the results when use beam search.
+        # results. In addition, we do not stream the results when use
+        # beam search.
         stream = (request.stream
                   and (request.best_of is None or request.n == request.best_of)
                   and not request.use_beam_search)
@@ -223,7 +225,8 @@ async def completion_stream_generator(
 
                 for output in res.outputs:
                     i = output.index + prompt_idx * request.n
-                    # TODO(simon): optimize the performance by avoiding full text O(n^2) sending.
+                    # TODO(simon): optimize the performance by avoiding full
+                    # text O(n^2) sending.
 
                     if request.echo and request.max_tokens == 0:
                         # only return the prompt
@@ -231,11 +234,12 @@ async def completion_stream_generator(
                         delta_token_ids = res.prompt_token_ids
                         top_logprobs = res.prompt_logprobs
                         has_echoed[i] = True
-                    elif request.echo and request.max_tokens > 0 and not has_echoed[
-                            i]:
+                    elif (request.echo and request.max_tokens > 0
+                          and not has_echoed[i]):
                         # echo the prompt and first token
                         delta_text = res.prompt + output.text
-                        delta_token_ids = res.prompt_token_ids + output.token_ids
+                        delta_token_ids = (res.prompt_token_ids +
+                                           output.token_ids)
                         top_logprobs = res.prompt_logprobs + (output.logprobs
                                                               or [])
                         has_echoed[i] = True
@@ -248,7 +252,9 @@ async def completion_stream_generator(
                             i]:] if output.logprobs else None
 
                     if request.logprobs is not None:
-                        assert top_logprobs is not None, "top_logprobs must be provided when logprobs is requested"
+                        assert top_logprobs is not None, (
+                            "top_logprobs must be provided when logprobs "
+                            "is requested")
                         logprobs = self._create_logprobs(
                             token_ids=delta_token_ids,
                             top_logprobs=top_logprobs,
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 230d13d97dbba..2db884945c491 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -50,10 +50,12 @@ def __init__(self,
         except RuntimeError:
             event_loop = None
 
-        if event_loop is not None and event_loop.is_running(
-        ):  # If the current is instanced by Ray Serve, there is already a running event loop
+        if event_loop is not None and event_loop.is_running():
+            # If the current is instanced by Ray Serve,
+            # there is already a running event loop
             event_loop.create_task(self._post_init())
-        else:  # When using single vLLM without engine_use_ray
+        else:
+            # When using single vLLM without engine_use_ray
             asyncio.run(self._post_init())
 
     async def _post_init(self):
@@ -178,8 +180,9 @@ def _validate_prompt_and_tokenize(
 
         if token_num + request.max_tokens > self.max_model_len:
             raise ValueError(
-                f"This model's maximum context length is {self.max_model_len} tokens. "
-                f"However, you requested {request.max_tokens + token_num} tokens "
+                f"This model's maximum context length is "
+                f"{self.max_model_len} tokens. However, you requested "
+                f"{request.max_tokens + token_num} tokens "
                 f"({token_num} in the messages, "
                 f"{request.max_tokens} in the completion). "
                 f"Please reduce the length of the messages or completion.", )
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index e667d70f71e39..99e6cdeee6364 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -20,10 +20,12 @@
                                                RowParallelLinear,
                                                QKVParallelLinear,
                                                MergedColumnParallelLinear)
-from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding, ParallelLMHead
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding, ParallelLMHead)
 from vllm.model_executor.parallel_utils.parallel_state import (
     get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
-from vllm.model_executor.parallel_utils.utils import split_tensor_along_last_dim
+from vllm.model_executor.parallel_utils.utils import (
+    split_tensor_along_last_dim)
 
 if TYPE_CHECKING:
     pass
@@ -84,7 +86,8 @@ def _apply_lora_packed_nslice(
         lora_b_stacked:    3 element tuple of (num_loras, output_dim, lora_rank)
         indices:           (batch_size)
         output:            (batch_size, q_slice_size + 2*kv_slice_size)
-        output_slices:     n-1 element tuple of (slice_size...), where n is number of slices
+        output_slices:     n-1 element tuple of (slice_size...),
+                           where n is number of slices
     """
     org_output = output
     x = x.view(-1, x.shape[-1])
@@ -819,9 +822,8 @@ def create_lora_weights(
     ) -> None:
         # Keep this in sync with csrc/punica/bgmv/bgmv_config.h
         if 32000 < self.base_layer.vocab_size > 33024:
-            raise ValueError(
-                "When using LoRA, vocab size must be 32000 >= vocab_size <= 33024"
-            )
+            raise ValueError("When using LoRA, vocab size must be "
+                             "32000 >= vocab_size <= 33024")
         self.lora_a_stacked = torch.zeros(
             (
                 max_loras,
diff --git a/vllm/lora/models.py b/vllm/lora/models.py
index 7386d21c58e4e..238da256b7cdc 100644
--- a/vllm/lora/models.py
+++ b/vllm/lora/models.py
@@ -13,7 +13,8 @@
 from vllm.config import LoRAConfig
 from vllm.utils import LRUCache, in_wsl
 
-from vllm.lora.layers import BaseLayerWithLoRA, LoRAMapping, from_layer, from_layer_sampler
+from vllm.lora.layers import (BaseLayerWithLoRA, LoRAMapping, from_layer,
+                              from_layer_sampler)
 from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.utils import parse_fine_tuned_lora_name, replace_submodule
 
diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py
index 7e92bc93ab472..911115d63a639 100644
--- a/vllm/lora/worker_manager.py
+++ b/vllm/lora/worker_manager.py
@@ -154,10 +154,9 @@ def _load_lora(self, lora_request: LoRARequest) -> LoRAModel:
                 f"LoRA rank {lora.rank} is greater than max_lora_rank "
                 f"{self.lora_config.max_lora_rank}.")
         if lora.extra_vocab_size > self.lora_config.lora_extra_vocab_size:
-            raise ValueError(
-                f"LoRA added vocab size {lora.extra_vocab_size} is greater than "
-                f"lora_extra_vocab_size {self.lora_config.lora_extra_vocab_size}."
-            )
+            raise ValueError(f"LoRA added vocab size {lora.extra_vocab_size} "
+                             f"is greater than lora_extra_vocab_size "
+                             f"{self.lora_config.lora_extra_vocab_size}.")
         return lora
 
     def add_dummy_lora(self, lora_request: LoRARequest, rank: int) -> bool:
diff --git a/vllm/model_executor/guided_decoding.py b/vllm/model_executor/guided_decoding.py
index a8573f8bdc6c8..00984460d79a6 100644
--- a/vllm/model_executor/guided_decoding.py
+++ b/vllm/model_executor/guided_decoding.py
@@ -8,8 +8,10 @@
 from typing import Union, Tuple
 from pydantic import BaseModel
 
-from vllm.entrypoints.openai.protocol import CompletionRequest, ChatCompletionRequest
-from vllm.model_executor.guided_logits_processors import JSONLogitsProcessor, RegexLogitsProcessor
+from vllm.entrypoints.openai.protocol import (CompletionRequest,
+                                              ChatCompletionRequest)
+from vllm.model_executor.guided_logits_processors import (JSONLogitsProcessor,
+                                                          RegexLogitsProcessor)
 
 
 class GuidedDecodingMode(Enum):
diff --git a/vllm/model_executor/guided_logits_processors.py b/vllm/model_executor/guided_logits_processors.py
index 1b3e5e71a5911..76d41aa37dd7b 100644
--- a/vllm/model_executor/guided_logits_processors.py
+++ b/vllm/model_executor/guided_logits_processors.py
@@ -107,12 +107,15 @@ def __init__(self,
         Parameters
         ----------
         schema
-            A JSON schema that encodes the structure we want the model to generate
+            A JSON schema that encodes the structure we want the model to
+            generate
         tokenizer
             The model's tokenizer
         whitespace_pattern
-            Pattern to use for JSON syntactic whitespace (doesn't impact string literals)
-            Example: allow only a single space or newline with `whitespace_pattern=r"[\n ]?"`
+            Pattern to use for JSON syntactic whitespace (doesn't impact
+            string literals)
+            Example: allow only a single space or newline with
+            `whitespace_pattern=r"[\n ]?"`
         """
         if isinstance(schema, type(BaseModel)):
             schema_str = json.dumps(schema.model_json_schema())
@@ -122,8 +125,8 @@ def __init__(self,
             schema_str = schema
         else:
             raise ValueError(
-                f"Cannot parse schema {schema}. The schema must be either " +
-                "a Pydantic object, a dictionary or a string that contains the JSON "
-                + "Schema specification")
+                f"Cannot parse schema {schema}. The schema must be either "
+                f"a Pydantic object, a dictionary or a string that contains "
+                f"the JSON Schema specification")
         regex_string = build_regex_from_schema(schema_str, whitespace_pattern)
         super().__init__(regex_string, tokenizer)
diff --git a/vllm/model_executor/layers/attention/attention.py b/vllm/model_executor/layers/attention/attention.py
index 724dd0511c5aa..4b63b9eaf59a7 100644
--- a/vllm/model_executor/layers/attention/attention.py
+++ b/vllm/model_executor/layers/attention/attention.py
@@ -35,12 +35,12 @@ def __init__(
     ) -> None:
         super().__init__()
         if _use_flash_attn():
-            from vllm.model_executor.layers.attention.backends.flash_attn import FlashAttentionBackend
+            from vllm.model_executor.layers.attention.backends.flash_attn import FlashAttentionBackend  # noqa: E501
             self.backend = FlashAttentionBackend(num_heads, head_size, scale,
                                                  num_kv_heads, alibi_slopes,
                                                  sliding_window)
         else:
-            from vllm.model_executor.layers.attention.backends.xformers import XFormersBackend
+            from vllm.model_executor.layers.attention.backends.xformers import XFormersBackend  # noqa: E501
             self.backend = XFormersBackend(num_heads, head_size, scale,
                                            num_kv_heads, alibi_slopes,
                                            sliding_window)
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 08e3c2d5b706e..3e6dd0dfe2eb3 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -30,9 +30,10 @@ def fused_moe_kernel(
     K,
     EM,
     num_valid_tokens,
-    # The stride variables represent how much to increase the ptr by when moving by 1
-    # element in a particular dimension. E.g. `stride_am` is how much to increase `a_ptr`
-    # by to get the element one row down (A has M rows).
+    # The stride variables represent how much to increase the ptr by when
+    # moving by 1 element in a particular dimension. E.g. `stride_am` is
+    # how much to increase `a_ptr` by to get the element one row down
+    # (A has M rows).
     stride_am,
     stride_ak,
     stride_be,
@@ -50,17 +51,30 @@ def fused_moe_kernel(
     compute_type: tl.constexpr,
 ):
     """
-    Implements the fused computation for a Mixture of Experts (MOE) using token and expert matrices.
+    Implements the fused computation for a Mixture of Experts (MOE) using
+    token and expert matrices.
 
     Key Parameters:
-    - A: The input tensor representing tokens with shape (*, K), where '*' can be any shape representing batches and K is the feature dimension of each token.
-    - B: The stacked MOE weight tensor with shape (E, N, K), where E is the number of experts, K is the input feature dimension, and N is the output feature dimension.
-    - C: The output cache tensor with shape (M, topk, N), where M is the total number of tokens post padding, topk is the number of times each token is repeated,
-        and N is the output feature dimension.
-    - sorted_token_ids: A tensor containing the sorted indices of tokens, repeated topk times and arranged by the expert index they are assigned to.
-    - expert_ids: A tensor containing the indices of the expert for each block. It determines which expert matrix from B should be used for each block in A.
-    This kernel performs the multiplication of a token by its corresponding expert matrix as determined by `expert_ids`. The sorting of `sorted_token_ids`
-    by expert index and padding ensures divisibility by BLOCK_SIZE_M, which is necessary to maintain consistency in block matrix multiplication across different blocks processed by the same expert.
+    - A: The input tensor representing tokens with shape (*, K), where '*' can
+        be any shape representing batches and K is the feature dimension of
+        each token.
+    - B: The stacked MOE weight tensor with shape (E, N, K), where E is
+        the number of experts, K is the input feature dimension, and N is
+        the output feature dimension.
+    - C: The output cache tensor with shape (M, topk, N), where M is the
+        total number of tokens post padding, topk is the number of times
+        each token is repeated, and N is the output feature dimension.
+    - sorted_token_ids: A tensor containing the sorted indices of tokens,
+        repeated topk times and arranged by the expert index they are
+        assigned to.
+    - expert_ids: A tensor containing the indices of the expert for each
+        block. It determines which expert matrix from B should be used for
+        each block in A.
+    This kernel performs the multiplication of a token by its corresponding
+    expert matrix as determined by `expert_ids`. The sorting of
+    `sorted_token_ids` by expert index and padding ensures divisibility by
+    BLOCK_SIZE_M, which is necessary to maintain consistency in block matrix
+    multiplication across different blocks processed by the same expert.
     """
     # -----------------------------------------------------------
     # Map program ids `pid` to the block of C it should compute.
@@ -105,7 +119,8 @@ def fused_moe_kernel(
     accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
 
     for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
-        # Load the next block of A and B, generate a mask by checking the K dimension.
+        # Load the next block of A and B, generate a mask by checking the
+        # K dimension.
         a = tl.load(a_ptrs,
                     mask=token_mask[:, None] &
                     (offs_k[None, :] < K - k * BLOCK_SIZE_K),
@@ -139,30 +154,41 @@ def moe_align_block_size(
         topk_ids: torch.Tensor, block_size: int,
         num_experts: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     """
-    Aligns the token distribution across experts to be compatible with block size for matrix multiplication.
+    Aligns the token distribution across experts to be compatible with block
+    size for matrix multiplication.
 
     Parameters:
-    - topk_ids: A tensor of shape [total_tokens, top_k] representing the top-k expert indices for each token.
+    - topk_ids: A tensor of shape [total_tokens, top_k] representing the
+        top-k expert indices for each token.
     - block_size: The block size used in block matrix multiplication.
     - num_experts: The total number of experts.
 
     Returns:
-    - sorted_token_ids: A tensor containing the sorted token indices according to their allocated expert.
+    - sorted_token_ids: A tensor containing the sorted token indices according
+        to their allocated expert.
     - expert_ids: A tensor indicating the assigned expert index for each block.
-    - num_tokens_post_padded: The total number of tokens after padding, ensuring divisibility by block_size.
+    - num_tokens_post_padded: The total number of tokens after padding,
+        ensuring divisibility by block_size.
 
-    This function pads the number of tokens that each expert needs to process so that it is divisible by block_size. 
-    Padding ensures that during block matrix multiplication, the dimensions align correctly.
+    This function pads the number of tokens that each expert needs to process
+    so that it is divisible by block_size.
+    Padding ensures that during block matrix multiplication, the dimensions
+    align correctly.
 
     Example:
-    Given topk_ids = [[2, 3, 4], [1, 2, 4], [1, 3, 4], [1, 2, 3]], block_size = 4, and num_experts = 4:
-    - We initially have 12 tokens (after repeating 'top_k' times) and 4 experts, with each expert needing to process 3 tokens.
+    Given topk_ids = [[2, 3, 4], [1, 2, 4], [1, 3, 4], [1, 2, 3]],
+    block_size = 4, and num_experts = 4:
+    - We initially have 12 tokens (after repeating 'top_k' times) and 4 experts,
+        with each expert needing to process 3 tokens.
     - As block_size is 4, we pad 1 token for each expert.
     - First, flatten topk_ids to [2, 3, 4, 1, 2, 4, 1, 3, 4, 1, 2, 3].
     - Then append padding tokens [12, 12, 12, 12] for each block.
-    - After sorting by expert index, we obtain token_ids [3, 6, 9, 12, 0, 4, 10, 12, 1, 7, 11, 12, 2, 5, 8, 12]. 
-        Tokens 12 are non-existent (padding) and are ignored in the subsequent matrix multiplication.
-    - The padding ensures that the total number of tokens is now divisible by block_size for proper block matrix operations.
+    - After sorting by expert index, we obtain token_ids
+        [3, 6, 9, 12, 0, 4, 10, 12, 1, 7, 11, 12, 2, 5, 8, 12].
+        Tokens 12 are non-existent (padding) and are ignored in
+        the subsequent matrix multiplication.
+    - The padding ensures that the total number of tokens is now divisible
+        by block_size for proper block matrix operations.
     """
     sorted_ids = torch.empty(
         (topk_ids.numel() + num_experts * (block_size - 1), ),
@@ -224,13 +250,14 @@ def get_moe_configs(E: int, N: int) -> Optional[Dict[int, Any]]:
     """
     Return optimized configurations for the fused MoE kernel.
 
-    The return value will be a dictionary that maps an irregular grid of batch sizes
-    to configurations of the fused_moe kernel. To evaluate the kernel on a given batch
-    size bs, the closest batch size in the grid should be picked and the associated
-    configuration chosen to invoke the kernel.
+    The return value will be a dictionary that maps an irregular grid of
+    batch sizes to configurations of the fused_moe kernel. To evaluate the
+    kernel on a given batch size bs, the closest batch size in the grid should
+    be picked and the associated configuration chosen to invoke the kernel.
     """
 
-    # First look up if an optimized configuration is available in the configs directory
+    # First look up if an optimized configuration is available in the configs
+    # directory
     device_name = torch.cuda.get_device_name().replace(" ", "_")
 
     config_file_path = os.path.join(
@@ -243,7 +270,8 @@ def get_moe_configs(E: int, N: int) -> Optional[Dict[int, Any]]:
             # If a configuration has been found, return it
             return {int(key): val for key, val in json.load(f).items()}
 
-    # If no optimized configuration is available, we will use the default configuration
+    # If no optimized configuration is available, we will use the default
+    # configuration
     return None
 
 
@@ -258,18 +286,22 @@ def fused_moe(
     override_config: Optional[Dict[str, Any]] = None,
 ) -> torch.Tensor:
     """
-    This function computes a Mixture of Experts (MoE) layer using two sets of weights, w1 and w2, and top-k gating mechanism.
-    
+    This function computes a Mixture of Experts (MoE) layer using two sets of
+    weights, w1 and w2, and top-k gating mechanism.
+
     Parameters:
     - hidden_states (torch.Tensor): The input tensor to the MoE layer.
     - w1 (torch.Tensor): The first set of expert weights.
     - w2 (torch.Tensor): The second set of expert weights.
-    - gating_output (torch.Tensor): The output of the gating operation (before softmax).
+    - gating_output (torch.Tensor): The output of the gating operation
+        (before softmax).
     - topk (int): The number of top-k experts to select.
     - renormalize (bool): If True, renormalize the top-k weights to sum to 1.
-    - inplace (bool): If True, perform the operation in-place. Defaults to False.
-    - override_config (Optional[Dict[str, Any]]): Optional override for the kernel configuration.
-    
+    - inplace (bool): If True, perform the operation in-place.
+        Defaults to False.
+    - override_config (Optional[Dict[str, Any]]): Optional override
+        for the kernel configuration.
+
     Returns:
     - torch.Tensor: The output tensor after applying the MoE layer.
     """
@@ -325,7 +357,8 @@ def fused_moe(
         configs = get_moe_configs(E, w2.shape[2])
 
         if configs:
-            # If an optimal configuration map has been found, look up the optimal config
+            # If an optimal configuration map has been found, look up the
+            # optimal config
             config = configs[min(configs.keys(), key=lambda x: abs(x - M))]
         else:
             # Else use the default config
diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
index b2396a1d6f141..60f6fc83b200f 100644
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -285,7 +285,8 @@ def weight_loader(self,
                     shard_size = shard_size // param.pack_factor
                     shard_offset = shard_offset // param.pack_factor
 
-                    # If marlin, we need to adjust the offset and size to account for the tiling.
+                    # If marlin, we need to adjust the offset and size to
+                    # account for the tiling.
                     shard_size, shard_offset = adjust_marlin_shard(
                         param, shard_size, shard_offset)
 
@@ -307,7 +308,8 @@ def weight_loader(self,
                 shard_size = shard_size // param.pack_factor
                 shard_offset = shard_offset // param.pack_factor
 
-                # If marlin, we need to adjust the offset and size to account for the tiling.
+                # If marlin, we need to adjust the offset and size to
+                # account for the tiling.
                 shard_size, shard_offset = adjust_marlin_shard(
                     param, shard_size, shard_offset)
 
@@ -413,7 +415,8 @@ def weight_loader(self,
                     shard_size = shard_size // param.pack_factor
                     shard_offset = shard_offset // param.pack_factor
 
-                    # If marlin, we need to adjust the offset and size to account for the tiling.
+                    # If marlin, we need to adjust the offset and size to
+                    # account for the tiling.
                     shard_size, shard_offset = adjust_marlin_shard(
                         param, shard_size, shard_offset)
 
@@ -442,7 +445,8 @@ def weight_loader(self,
                 shard_size = shard_size // param.pack_factor
                 shard_offset = shard_offset // param.pack_factor
 
-                # If marlin, we need to adjust the offset and size to account for the tiling.
+                # If marlin, we need to adjust the offset and size to
+                # account for the tiling.
                 shard_size, shard_offset = adjust_marlin_shard(
                     param, shard_size, shard_offset)
 
diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py
index dc54641878c64..af27b1844cea4 100644
--- a/vllm/model_executor/layers/quantization/__init__.py
+++ b/vllm/model_executor/layers/quantization/__init__.py
@@ -1,6 +1,7 @@
 from typing import Type
 
-from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig)
 from vllm.model_executor.layers.quantization.awq import AWQConfig
 from vllm.model_executor.layers.quantization.gptq import GPTQConfig
 from vllm.model_executor.layers.quantization.squeezellm import SqueezeLLMConfig
diff --git a/vllm/model_executor/layers/quantization/awq.py b/vllm/model_executor/layers/quantization/awq.py
index 3e1c814dd233c..2caef5f1ebf50 100644
--- a/vllm/model_executor/layers/quantization/awq.py
+++ b/vllm/model_executor/layers/quantization/awq.py
@@ -6,7 +6,8 @@
 from vllm._C import ops
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                set_weight_attrs)
-from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig)
 
 
 class AWQConfig(QuantizationConfig):
@@ -50,7 +51,8 @@ def get_min_capability(self) -> int:
     def get_config_filenames() -> List[str]:
         return [
             "quant_config.json",  # E.g., casperhansen/vicuna-7b-v1.5-awq
-            "quantize_config.json",  # E.g., abhinavkulkarni/mosaicml-mpt-7b-instruct-w4-g128-awq
+            # E.g., abhinavkulkarni/mosaicml-mpt-7b-instruct-w4-g128-awq
+            "quantize_config.json",
         ]
 
     @classmethod
diff --git a/vllm/model_executor/layers/quantization/gptq.py b/vllm/model_executor/layers/quantization/gptq.py
index 2e6aabb232673..bb69c7235a133 100644
--- a/vllm/model_executor/layers/quantization/gptq.py
+++ b/vllm/model_executor/layers/quantization/gptq.py
@@ -31,8 +31,8 @@ def __init__(
         self.pack_factor = Fraction(32, self.weight_bits)
         if self.weight_bits not in [2, 3, 4, 8]:
             raise ValueError(
-                "Currently, only 2/3/4/8-bit weight quantization is supported for "
-                f"GPTQ, but got {self.weight_bits} bits.")
+                "Currently, only 2/3/4/8-bit weight quantization is "
+                f"supported for GPTQ, but got {self.weight_bits} bits.")
 
     def __repr__(self) -> str:
         return (f"GPTQConfig(weight_bits={self.weight_bits}, "
@@ -101,7 +101,8 @@ def create_weights(
                 "The input size is not aligned with the quantized "
                 "weight shape. This can be caused by too large "
                 "tensor parallel size.")
-        if output_size_per_partition % self.quant_config.pack_factor.numerator != 0:
+        if (output_size_per_partition % self.quant_config.pack_factor.numerator
+                != 0):
             raise ValueError(
                 "The output size is not aligned with the quantized "
                 "weight shape. This can be caused by too large "
@@ -114,7 +115,8 @@ def create_weights(
         exllama_state = ExllamaState.UNINITIALIZED
         scale_and_zero_size = input_size // group_size
         scale_and_zero_input_dim = None
-        if input_size != input_size_per_partition and self.quant_config.group_size != -1:
+        if (input_size != input_size_per_partition
+                and self.quant_config.group_size != -1):
             # For act-order models, we cannot use Exllama for row parallel layer
             if self.quant_config.desc_act:
                 exllama_state = ExllamaState.UNUSED
diff --git a/vllm/model_executor/layers/quantization/marlin.py b/vllm/model_executor/layers/quantization/marlin.py
index 7566d78a8aba4..0c4f20d9e3a58 100644
--- a/vllm/model_executor/layers/quantization/marlin.py
+++ b/vllm/model_executor/layers/quantization/marlin.py
@@ -5,7 +5,8 @@
 
 from vllm._C import ops
 from vllm.model_executor.layers.linear import LinearMethodBase, set_weight_attrs
-from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig)
 
 
 class MarlinConfig(QuantizationConfig):
@@ -22,8 +23,9 @@ def __init__(
         self.group_size = group_size
         if self.group_size != 128 and self.group_size != -1:
             raise ValueError(
-                "Currently, only group size 128 and -1 (channelwise) is supported for "
-                f"Marlin, but got group_size of {self.group_size}")
+                "Currently, only group size 128 and -1 (channelwise) "
+                "is supported for Marlin, but got group_size of "
+                f"{self.group_size}")
 
         # 4 Bits packed into 32 bit datatype.
         self.pack_factor = 32 // 4
@@ -37,7 +39,8 @@ def __init__(
         # Min in_features dim
         self.min_k_threads = 128
 
-        # Max parallel problems to solve at once (improves large batch performance)
+        # Max parallel problems to solve at once (improves large
+        # batch performance)
         self.max_parallel = 16
 
         # Permutation length used by the marlin kernels.
@@ -102,22 +105,26 @@ def create_weights(
         # Validate output_size_per_partition
         if output_size_per_partition % self.quant_config.min_n_threads != 0:
             raise ValueError(
-                f"Weight output_size_per_partition = {output_size_per_partition} is not divisible by min_n_threads = {self.quant_config.min_n_threads}."
-            )
+                f"Weight output_size_per_partition = "
+                f"{output_size_per_partition} is not divisible by "
+                f"min_n_threads = {self.quant_config.min_n_threads}.")
         if output_size_per_partition % self.quant_config.pack_factor != 0:
             raise ValueError(
-                f"Weight output_size_per_partition = {output_size_per_partition} is not divisible by pack_factor = {self.quant_config.pack_factor}."
-            )
+                f"Weight output_size_per_partition = "
+                f"{output_size_per_partition} is not divisible by "
+                f"pack_factor = {self.quant_config.pack_factor}.")
 
         # Validate input_size_per_partition
         if input_size_per_partition % self.quant_config.min_k_threads != 0:
             raise ValueError(
-                f"Weight input_size_per_partition = {input_size_per_partition} is not divisible by min_k_threads = {self.quant_config.min_k_threads}."
-            )
-        if self.quant_config.group_size != -1 and input_size_per_partition % self.quant_config.group_size != 0:
-            raise ValueError(
-                f"Weight input_size_per_partition = f{input_size_per_partition} is not divisible by group_size = {self.quant_config.group_size}."
-            )
+                f"Weight input_size_per_partition = "
+                f"{input_size_per_partition} is not divisible by "
+                f"min_k_threads = {self.quant_config.min_k_threads}.")
+        if (self.quant_config.group_size != -1 and
+                input_size_per_partition % self.quant_config.group_size != 0):
+            raise ValueError(f"Weight input_size_per_partition = "
+                             f"{input_size_per_partition} is not divisible by "
+                             f"group_size = {self.quant_config.group_size}.")
 
         # Check that we have at least 4 tiles horizontally in the shard
         num_tiles_per_perm = self.quant_config.perm_len // (
@@ -149,7 +156,9 @@ def create_weights(
         )
 
         # Determine if channelwise or not
-        input_groups = 1 if self.quant_config.group_size == -1 else input_size_per_partition // self.quant_config.group_size
+        input_groups = (1 if self.quant_config.group_size == -1 else
+                        input_size_per_partition //
+                        self.quant_config.group_size)
 
         scales = Parameter(
             torch.empty(
diff --git a/vllm/model_executor/layers/quantization/squeezellm.py b/vllm/model_executor/layers/quantization/squeezellm.py
index 9244e88552756..ed25455e6ec1f 100644
--- a/vllm/model_executor/layers/quantization/squeezellm.py
+++ b/vllm/model_executor/layers/quantization/squeezellm.py
@@ -6,7 +6,8 @@
 from vllm._C import ops
 from vllm.model_executor.layers.linear import (LinearMethodBase,
                                                set_weight_attrs)
-from vllm.model_executor.layers.quantization.base_config import QuantizationConfig
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig)
 from vllm.utils import is_hip
 
 
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index 19e7f630c4620..4377b845df628 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -6,7 +6,8 @@
 
 from vllm.model_executor.parallel_utils.communication_op import (
     tensor_model_parallel_gather)
-from vllm.model_executor.sampling_metadata import SamplingMetadata, SamplingTensors
+from vllm.model_executor.sampling_metadata import (SamplingMetadata,
+                                                   SamplingTensors)
 from vllm.sampling_params import SamplingParams, SamplingType
 from vllm.sequence import (Logprob, PromptLogprobs, SampleLogprobs,
                            SamplerOutput, SequenceData, SequenceGroupOutput,
diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py
index 6da0082b94285..cbf472750e294 100644
--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
@@ -333,7 +333,8 @@ def load_weights(self,
             if "rotary_emb.inv_freq" in name:
                 continue
             if name == "lm_head.weight":
-                # Unlike Baichuan, Baichuan2 normalizes the head weights. Refer to:
+                # Unlike Baichuan, Baichuan2 normalizes the head weights.
+                # Refer to:
                 # https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat/blob/84603cde5ebffb6084e476cfaeceaf0b8b91fe54/modeling_baichuan.py#L508
                 # Distinguish between Baichuan and Baichuan2 by checking the
                 # vocab size. This is suggested by
diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py
index f2dca3df27cfb..13c080cb02774 100644
--- a/vllm/model_executor/models/deepseek.py
+++ b/vllm/model_executor/models/deepseek.py
@@ -119,7 +119,8 @@ def __init__(
                                      linear_method=None)
 
         if config.n_shared_experts is not None:
-            intermediate_size = config.moe_intermediate_size * config.n_shared_experts
+            intermediate_size = (config.moe_intermediate_size *
+                                 config.n_shared_experts)
             self.shared_experts = DeepseekMLP(
                 hidden_size=config.hidden_size,
                 intermediate_size=intermediate_size,
@@ -273,8 +274,9 @@ def __init__(
             max_position_embeddings=max_position_embeddings,
             linear_method=linear_method,
         )
-        if (config.n_routed_experts is not None and  \
-            layer_idx >= config.first_k_dense_replace and layer_idx % config.moe_layer_freq == 0):
+        if (config.n_routed_experts is not None
+                and layer_idx >= config.first_k_dense_replace
+                and layer_idx % config.moe_layer_freq == 0):
             self.mlp = DeepseekMoE(config=config, linear_method=linear_method)
         else:
             self.mlp = DeepseekMLP(
diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py
index b8c6822e9825e..93dce7b67a7a5 100644
--- a/vllm/model_executor/models/gpt_j.py
+++ b/vllm/model_executor/models/gpt_j.py
@@ -143,7 +143,8 @@ def __init__(
         linear_method: Optional[LinearMethodBase] = None,
     ):
         super().__init__()
-        inner_dim = 4 * config.n_embd if config.n_inner is None else config.n_inner
+        inner_dim = (4 * config.n_embd
+                     if config.n_inner is None else config.n_inner)
         self.ln_1 = nn.LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
         self.attn = GPTJAttention(config, linear_method)
         self.mlp = GPTJMLP(inner_dim, config, linear_method)
diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py
index 0ae0a85643456..7b2215ef4bda5 100644
--- a/vllm/model_executor/models/internlm2.py
+++ b/vllm/model_executor/models/internlm2.py
@@ -305,7 +305,8 @@ def load_weights(self,
                 param = params_dict[name]
                 if "wqkv" in name:
                     config = self.config
-                    kv_groups = config.num_attention_heads // config.num_key_value_heads
+                    kv_groups = (config.num_attention_heads //
+                                 config.num_key_value_heads)
                     head_dim = config.hidden_size // config.num_attention_heads
                     loaded_weight = loaded_weight.view(-1, 2 + kv_groups,
                                                        head_dim,
diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py
index fa7a6d850051e..2b0a420e82faf 100644
--- a/vllm/model_executor/models/olmo.py
+++ b/vllm/model_executor/models/olmo.py
@@ -52,7 +52,8 @@
 )
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.sampler import Sampler
-from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding)
 from vllm.model_executor.parallel_utils.parallel_state import (
     get_tensor_model_parallel_world_size, )
 from vllm.model_executor.sampling_metadata import SamplingMetadata
@@ -81,7 +82,8 @@ def output_multiplier(self) -> float:
 
 class OlmoAttention(nn.Module):
     """
-    This is the attention block where the output is computed as ``Attention(LN(x))`` in ``MLP(LN(x + Attention(LN(x))))``
+    This is the attention block where the output is computed as
+    ``Attention(LN(x))`` in ``MLP(LN(x + Attention(LN(x))))``
     (plus another skip connection).
     """
 
@@ -94,11 +96,12 @@ def __init__(
         self.config = config
         self.hidden_size = config.d_model
         assert config.d_model % config.n_heads == 0
-        tensor_model_parallel_world_size = get_tensor_model_parallel_world_size(
-        )
+        tensor_model_parallel_world_size = (
+            get_tensor_model_parallel_world_size())
         self.total_num_heads = self.config.n_heads
         assert self.total_num_heads % tensor_model_parallel_world_size == 0
-        self.num_heads = self.total_num_heads // tensor_model_parallel_world_size
+        self.num_heads = (self.total_num_heads //
+                          tensor_model_parallel_world_size)
         self.head_dim = self.hidden_size // self.total_num_heads
 
         # Layer norms.
@@ -158,7 +161,8 @@ def forward(
 
 class OlmoMLP(nn.Module):
     """
-    This is the MLP block where the output is computed as ``MLP(LN(x))`` in ``MLP(LN(x + Attention(LN(x))))``
+    This is the MLP block where the output is computed as
+    ``MLP(LN(x))`` in ``MLP(LN(x + Attention(LN(x))))``
     (plus another skip connection).
     """
 
@@ -217,7 +221,8 @@ def forward(
 
 class OlmoBlock(nn.Module):
     """
-    This is a typical transformer block where the output is computed as ``MLP(LN(x + Attention(LN(x))))``
+    This is a typical transformer block where the output is
+    computed as ``MLP(LN(x + Attention(LN(x))))``
     (plus another skip connection).
     """
 
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 4dd63f923e5f2..3e4f843e649b4 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -170,7 +170,8 @@ def __init__(
         self.hidden_size = config.hidden_size
         # Requires transformers > 4.32.0
         rope_theta = getattr(config, "rope_theta", 1000000)
-        use_sliding_window = config.use_sliding_window and layer_idx < config.max_window_layers
+        use_sliding_window = (config.use_sliding_window
+                              and layer_idx < config.max_window_layers)
         self.self_attn = Qwen2Attention(
             hidden_size=self.hidden_size,
             num_heads=config.num_attention_heads,
diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py
index d1a547f815616..c66f327beee7a 100644
--- a/vllm/model_executor/models/stablelm.py
+++ b/vllm/model_executor/models/stablelm.py
@@ -1,5 +1,6 @@
 # coding=utf-8
-# Copyright 2023 Stability AI, EleutherAI, and The HuggingFace Inc. team. All rights reserved.
+# Copyright 2023 Stability AI, EleutherAI, and The HuggingFace Inc. team.
+# All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +17,8 @@
 # This code is based off the following work:
 # https://huggingface.co/stabilityai/stablelm-3b-4e1t/blob/main/modeling_stablelm_epoch.py
 # https://huggingface.co/stabilityai/stablelm-3b-4e1t/blob/main/config.json
-"""Inference-only StabeLM (https://github.com/Stability-AI/StableLM) model compatible with HuggingFace weights."""
+"""Inference-only StabeLM (https://github.com/Stability-AI/StableLM)
+model compatible with HuggingFace weights."""
 from typing import List, Optional, Tuple
 
 import torch
@@ -102,9 +104,9 @@ def __init__(self,
         self.kv_size = self.num_key_value_heads * self.head_dim
         self.qkv_bias = getattr(config, "use_qkv_bias", False)
         if (self.head_dim * self.num_heads * tp_size) != self.hidden_size:
-            raise ValueError(
-                f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}"
-                f" and `num_heads`: {self.num_heads}).")
+            raise ValueError(f"hidden_size must be divisible by num_heads "
+                             f"(got `hidden_size`: {self.hidden_size}"
+                             f" and `num_heads`: {self.num_heads}).")
 
         self.qkv_proj = QKVParallelLinear(self.hidden_size,
                                           self.head_dim,
@@ -192,7 +194,6 @@ def __init__(self,
                  config: PretrainedConfig,
                  linear_method: Optional[LinearMethodBase] = None) -> None:
         super().__init__()
-        # self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, config.pad_token_id)
         self.embed_tokens = VocabParallelEmbedding(
             config.vocab_size,
             config.hidden_size,
diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py
index efa235233372f..cfbb1bdb7909e 100644
--- a/vllm/model_executor/models/starcoder2.py
+++ b/vllm/model_executor/models/starcoder2.py
@@ -35,7 +35,8 @@
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead, DEFAULT_VOCAB_PADDING_SIZE)
-from vllm.model_executor.parallel_utils.parallel_state import get_tensor_model_parallel_world_size
+from vllm.model_executor.parallel_utils.parallel_state import (
+    get_tensor_model_parallel_world_size)
 from vllm.model_executor.weight_utils import (default_weight_loader,
                                               hf_model_weights_iterator)
 from vllm.sequence import SamplerOutput
diff --git a/vllm/model_executor/neuron_model_loader.py b/vllm/model_executor/neuron_model_loader.py
index b8d63d4ff12fc..c434b270a5562 100644
--- a/vllm/model_executor/neuron_model_loader.py
+++ b/vllm/model_executor/neuron_model_loader.py
@@ -34,7 +34,8 @@ def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
 
 def get_model(model_config: ModelConfig, device_config: DeviceConfig,
               **kwargs) -> nn.Module:
-    from transformers_neuronx.config import NeuronConfig, ContinuousBatchingConfig
+    from transformers_neuronx.config import (NeuronConfig,
+                                             ContinuousBatchingConfig)
 
     parallel_config = kwargs.get("parallel_config")
     scheduler_config = kwargs.get("scheduler_config")
diff --git a/vllm/model_executor/parallel_utils/communication_op.py b/vllm/model_executor/parallel_utils/communication_op.py
index cf805df892fdc..521b6b8a383b0 100644
--- a/vllm/model_executor/parallel_utils/communication_op.py
+++ b/vllm/model_executor/parallel_utils/communication_op.py
@@ -11,7 +11,8 @@
     get_tensor_model_parallel_group,
     is_cupy_nccl_enabled_for_all_reduce,
 )
-from vllm.model_executor.parallel_utils.custom_all_reduce import custom_all_reduce
+from vllm.model_executor.parallel_utils.custom_all_reduce import (
+    custom_all_reduce)
 
 
 def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor:
@@ -24,7 +25,7 @@ def tensor_model_parallel_all_reduce(input_: torch.Tensor) -> torch.Tensor:
     and GPU topology.
 
     TLDR: always assume this function modifies its input, but use the return
-    value as the output. 
+    value as the output.
     """
     # Bypass the function if we are using only 1 GPU.
     if get_tensor_model_parallel_world_size() == 1:
diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
index 7deb80801856e..b23f0170a6ca5 100644
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@@ -114,7 +114,8 @@ def from_sampling_metadata(
                 do_penalties = True
             if (i < sampling_metadata.num_prompts
                     and sampling_params.prompt_logprobs is not None):
-                # For tokens in the prompt that we only need to get their logprobs
+                # For tokens in the prompt that we only need to get
+                # their logprobs
                 prompt_len = sampling_metadata.prompt_lens[i]
                 temperatures += [temperature] * (prompt_len - 1)
                 top_ps += [top_p] * (prompt_len - 1)
diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py
index 8103f3c2b24bf..4aa158878fb96 100644
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -74,8 +74,8 @@ class SamplingParams:
         stop_token_ids: List of tokens that stop the generation when they are
             generated. The returned output will contain the stop tokens unless
             the stop tokens are special tokens.
-        include_stop_str_in_output: Whether to include the stop strings in output
-            text. Defaults to False.
+        include_stop_str_in_output: Whether to include the stop strings in
+            output text. Defaults to False.
         ignore_eos: Whether to ignore the EOS token and continue generating
             tokens after the EOS token is generated.
         max_tokens: Maximum number of tokens to generate per output sequence.
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 37c102407a5f2..4a002edaf580f 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -351,7 +351,8 @@ def maybe_set_first_token_time(self, time: float) -> None:
             self.metrics.first_token_time = time
 
     def maybe_set_first_scheduled_time(self, time: float) -> None:
-        """Sets the first scheduled time and time in queue for Request level timings."""
+        """Sets the first scheduled time and time in queue for Request
+        level timings."""
         if self.metrics.first_scheduled_time is None:
             self.metrics.first_scheduled_time = time
             self.metrics.time_in_queue = time - self.metrics.arrival_time
diff --git a/vllm/spec_decode/batch_expansion.py b/vllm/spec_decode/batch_expansion.py
index 478c950f52873..0f698fa346010 100644
--- a/vllm/spec_decode/batch_expansion.py
+++ b/vllm/spec_decode/batch_expansion.py
@@ -5,8 +5,12 @@
 
 from vllm.sequence import (SamplerOutput, SequenceGroupMetadata, SequenceData)
 from vllm.worker.worker import Worker
-from vllm.spec_decode.util import nvtx_range, sampler_output_to_torch, get_all_seq_ids, split_batch_by_proposal_len
-from vllm.spec_decode.interfaces import SpeculativeScorer, SpeculativeProposals, SpeculativeScores
+from vllm.spec_decode.util import (nvtx_range, sampler_output_to_torch,
+                                   get_all_seq_ids,
+                                   split_batch_by_proposal_len)
+from vllm.spec_decode.interfaces import (SpeculativeScorer,
+                                         SpeculativeProposals,
+                                         SpeculativeScores)
 
 SeqId = int
 TargetSeqId = int
@@ -68,11 +72,12 @@ def score_proposals(
         proposal_lens_list = proposals.proposal_lens.tolist()
         proposal_token_ids_list = proposals.proposal_token_ids.tolist()
 
-        spec_indices, non_spec_indices, target_seq_group_metadata_list, num_scoring_tokens = self._expand_batch(
-            seq_group_metadata_list=seq_group_metadata_list,
-            proposal_token_ids_list=proposal_token_ids_list,
-            proposal_lens_list=proposal_lens_list,
-        )
+        (spec_indices, non_spec_indices, target_seq_group_metadata_list,
+         num_scoring_tokens) = self._expand_batch(
+             seq_group_metadata_list=seq_group_metadata_list,
+             proposal_token_ids_list=proposal_token_ids_list,
+             proposal_lens_list=proposal_lens_list,
+         )
 
         target_sampler_output = self._scorer_worker.execute_model(
             seq_group_metadata_list=target_seq_group_metadata_list,
@@ -125,7 +130,8 @@ def _expand_batch(
         num_scoring_tokens = len(target_seq_group_metadata_list)
         target_seq_group_metadata_list.extend(non_spec_seqs)
 
-        return spec_indices, non_spec_indices, target_seq_group_metadata_list, num_scoring_tokens
+        return (spec_indices, non_spec_indices, target_seq_group_metadata_list,
+                num_scoring_tokens)
 
     def _contract_batch(self, original_bs: int,
                         target_sampler_output: List[SamplerOutput],
@@ -306,10 +312,11 @@ def _split_scoring_output(
         # Convert non-speculative output tokens to tensors.
         sampler_output.sampled_token_probs = non_spec_probs
         sampler_output.sampled_token_ids = non_spec_sampled_tokens
-        non_spec_target_token_ids, non_spec_target_probs = sampler_output_to_torch(
-            [sampler_output])
+        non_spec_target_token_ids, non_spec_target_probs = (
+            sampler_output_to_torch([sampler_output]))
 
-        return target_token_ids, target_probs, non_spec_target_token_ids, non_spec_target_probs
+        return (target_token_ids, target_probs, non_spec_target_token_ids,
+                non_spec_target_probs)
 
     def _create_target_seq_id_iterator(
             self, seq_ids: List[SeqId]) -> Iterator[TargetSeqId]:
diff --git a/vllm/spec_decode/multi_step_worker.py b/vllm/spec_decode/multi_step_worker.py
index f7be14d3d22c2..0915c275b0408 100644
--- a/vllm/spec_decode/multi_step_worker.py
+++ b/vllm/spec_decode/multi_step_worker.py
@@ -5,7 +5,8 @@
 
 from vllm.sequence import SamplerOutput, SequenceGroupMetadata
 from vllm.worker.worker import Worker
-from vllm.spec_decode.interfaces import SpeculativeProposals, SpeculativeProposer
+from vllm.spec_decode.interfaces import (SpeculativeProposals,
+                                         SpeculativeProposer)
 from vllm.spec_decode.util import sampler_output_to_torch
 
 
@@ -247,8 +248,9 @@ def get_proposals(
         """
 
         # Split speculative- and non-speculative- sequences.
-        proposal_lens, nonzero_proposal_len_seqs, nonzero_proposal_len_indices = self._split_by_max_model_len(
-            seq_group_metadata_list, max_proposal_len)
+        (proposal_lens, nonzero_proposal_len_seqs,
+         nonzero_proposal_len_indices) = self._split_by_max_model_len(
+             seq_group_metadata_list, max_proposal_len)
 
         if nonzero_proposal_len_seqs:
             # Speculate tokens using the draft worker for the speculative
@@ -306,7 +308,8 @@ def _split_by_max_model_len(
             else:
                 proposal_lens.append(0)
 
-        return proposal_lens, nonzero_proposal_len_seqs, nonzero_proposal_len_indices
+        return (proposal_lens, nonzero_proposal_len_seqs,
+                nonzero_proposal_len_indices)
 
     def _merge_outputs(
         self,
@@ -356,7 +359,8 @@ def _merge_outputs(
                                             device=self._device)
         entire_proposal_probs[nonzero_proposal_len_indices] = proposal_probs
 
-        proposal_tokens, proposal_probs = entire_proposal_tokens, entire_proposal_probs
+        proposal_tokens, proposal_probs = (entire_proposal_tokens,
+                                           entire_proposal_probs)
 
         proposal_lens = torch.zeros(batch_size,
                                     dtype=torch.long,
diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py
index 890e479202372..1e56741347008 100644
--- a/vllm/spec_decode/spec_decode_worker.py
+++ b/vllm/spec_decode/spec_decode_worker.py
@@ -10,7 +10,8 @@
 from vllm.spec_decode.multi_step_worker import MultiStepWorker
 from vllm.model_executor.layers.rejection_sampler import RejectionSampler
 from vllm.config import CacheConfig
-from vllm.spec_decode.util import nvtx_range, get_all_seq_ids, split_batch_by_proposal_len
+from vllm.spec_decode.util import (nvtx_range, get_all_seq_ids,
+                                   split_batch_by_proposal_len)
 from vllm.spec_decode.interfaces import SpeculativeProposals, SpeculativeScores
 from vllm.spec_decode.batch_expansion import BatchExpansionTop1Scorer
 from vllm.spec_decode.interfaces import SpeculativeScorer
@@ -25,7 +26,7 @@ class SpecDecodeWorker:
     LLM, after which some verification routine determines which (if any) of the
     speculative tokens are accepted by the larger LLM.
 
-    See https://github.com/vllm-project/vllm/pull/2188 and 
+    See https://github.com/vllm-project/vllm/pull/2188 and
     https://github.com/vllm-project/vllm/pull/3103 for more info.
 
     The current implementation has the following limitations:
@@ -109,10 +110,12 @@ def profile_num_available_blocks(self, block_size: int,
                 block_size, gpu_memory_utilization, cpu_swap_space,
                 cache_dtype))
 
-        scorer_cache_block_size_bytes = self.scorer_worker.get_cache_block_size_bytes(
-            block_size, cache_dtype)
-        proposer_cache_block_size_bytes = self.proposer_worker.get_cache_block_size_bytes(
-            block_size, cache_dtype)
+        scorer_cache_block_size_bytes = (
+            self.scorer_worker.get_cache_block_size_bytes(
+                block_size, cache_dtype))
+        proposer_cache_block_size_bytes = (
+            self.proposer_worker.get_cache_block_size_bytes(
+                block_size, cache_dtype))
 
         new_num_gpu_blocks = split_num_cache_blocks_evenly(
             scorer_cache_block_size_bytes, proposer_cache_block_size_bytes,
@@ -320,8 +323,8 @@ def _create_output_sampler_list(
             sampler_output_list.append(
                 SamplerOutput(outputs=step_output_token_ids))
 
-        maybe_rejsample_metrics = self._metrics.maybe_collect_rejsample_metrics(
-            k)
+        maybe_rejsample_metrics = (
+            self._metrics.maybe_collect_rejsample_metrics(k))
         if maybe_rejsample_metrics is not None:
             sampler_output_list[
                 0].spec_decode_worker_metrics = maybe_rejsample_metrics
diff --git a/vllm/transformers_utils/configs/mpt.py b/vllm/transformers_utils/configs/mpt.py
index 5ea0d9122ef11..2c0e45623aa25 100644
--- a/vllm/transformers_utils/configs/mpt.py
+++ b/vllm/transformers_utils/configs/mpt.py
@@ -62,62 +62,6 @@ def __init__(self,
                  fc_type: str = 'torch',
                  verbose: Optional[int] = None,
                  **kwargs: Any):
-        """The MPT configuration class.
-        Args:
-            d_model (int): The size of the embedding dimension of the model.
-            n_heads (int): The number of attention heads.
-            n_layers (int): The number of layers in the model.
-            expansion_ratio (int): The ratio of the up/down scale in the ffn.
-            max_seq_len (int): The maximum sequence length of the model.
-            vocab_size (int): The size of the vocabulary.
-            resid_pdrop (float): The dropout probability applied to the attention output before combining with residual.
-            emb_pdrop (float): The dropout probability for the embedding layer.
-            learned_pos_emb (bool): Whether to use learned positional embeddings
-            attn_config (Dict): A dictionary used to configure the model's attention module:
-                attn_type (str): type of attention to use. Options: multihead_attention, multiquery_attention, grouped_query_attention
-                attn_pdrop (float): The dropout probability for the attention layers.
-                attn_impl (str): The attention implementation to use. One of 'torch', 'flash', or 'triton'.
-                qk_ln (bool): Whether to apply layer normalization to the queries and keys in the attention layer.
-                clip_qkv (Optional[float]): If not None, clip the queries, keys, and values in the attention layer to
-                    this value.
-                softmax_scale (Optional[float]): If not None, scale the softmax in the attention layer by this value. If None,
-                    use the default scale of ``1/sqrt(d_keys)``.
-                prefix_lm (Optional[bool]): Whether the model should operate as a Prefix LM. This requires passing an
-                    extra `prefix_mask` argument which indicates which tokens belong to the prefix. Tokens in the prefix
-                    can attend to one another bi-directionally. Tokens outside the prefix use causal attention.
-                attn_uses_sequence_id (Optional[bool]): Whether to restrict attention to tokens that have the same sequence_id.
-                    When the model is in `train` mode, this requires passing an extra `sequence_id` argument which indicates
-                    which sub-sequence each token belongs to.
-                    Defaults to ``False`` meaning any provided `sequence_id` will be ignored.
-                alibi (bool): Whether to use the alibi bias instead of position embeddings.
-                alibi_bias_max (int): The maximum value of the alibi bias.
-                kv_n_heads (Optional[int]): For grouped_query_attention only, allow user to specify number of kv heads.
-            ffn_config (Dict): A dictionary used to configure the model's ffn module:
-                ffn_type (str): type of ffn to use. Options: mptmlp, te_ln_mlp
-            init_device (str): The device to use for parameter initialization.
-            logit_scale (Optional[Union[float, str]]): If not None, scale the logits by this value.
-            no_bias (bool): Whether to use bias in all layers.
-            verbose (int): The verbosity level. 0 is silent.
-            embedding_fraction (float): The fraction to scale the gradients of the embedding layer by.
-            norm_type (str): choose type of norm to use
-            use_cache (bool): Whether or not the model should return the last key/values attentions
-            init_config (Dict): A dictionary used to configure the model initialization:
-                init_config.name: The parameter initialization scheme to use. Options: 'default_', 'baseline_',
-                    'kaiming_uniform_', 'kaiming_normal_', 'neox_init_', 'small_init_', 'xavier_uniform_', or
-                    'xavier_normal_'. These mimic the parameter initialization methods in PyTorch.
-                init_div_is_residual (Union[int, float, str, bool]): Value to divide initial weights by if ``module._is_residual`` is True.
-                emb_init_std (Optional[float]): The standard deviation of the normal distribution used to initialize the embedding layer.
-                emb_init_uniform_lim (Optional[Union[Tuple[float, float], float]]): The lower and upper limits of the uniform distribution
-                    used to initialize the embedding layer. Mutually exclusive with ``emb_init_std``.
-                init_std (float): The standard deviation of the normal distribution used to initialize the model,
-                    if using the baseline_ parameter initialization scheme.
-                init_gain (float): The gain to use for parameter initialization with kaiming or xavier initialization schemes.
-                fan_mode (str): The fan mode to use for parameter initialization with kaiming initialization schemes.
-                init_nonlinearity (str): The nonlinearity to use for parameter initialization with kaiming initialization schemes.
-                ---
-                See llmfoundry.models.utils.param_init_fns.py for info on other param init config options
-            fc_type (str): choose fc layer implementation. Options: torch and te. te layers support fp8 when using H100 GPUs.
-        """
         self.d_model = d_model
         self.n_heads = n_heads
         self.n_layers = n_layers
@@ -139,8 +83,8 @@ def __init__(self,
         self.fc_type = fc_type
         if verbose is not None:
             warnings.warn(DeprecationWarning(
-                'verbose argument for MPTConfig is now ignored and will be removed. Use python_log_level instead.'
-            ),
+                'verbose argument for MPTConfig is now ignored and '
+                'will be removed. Use python_log_level instead.'),
                           stacklevel=2)
         if 'name' in kwargs:
             del kwargs['name']
@@ -149,7 +93,8 @@ def __init__(self,
         if self.attn_config.get('alibi', False):
             self.learned_pos_emb = False
             warnings.warn(
-                f'alibi is turned on, setting `learned_pos_emb` to {self.learned_pos_emb}`',
+                f'alibi is turned on, setting `learned_pos_emb` '
+                f'to {self.learned_pos_emb}`',
                 stacklevel=2)
         super().__init__(**kwargs)
         self._validate_config()
@@ -176,8 +121,8 @@ def _validate_config(self) -> None:
             [self.attn_config['attn_pdrop'], self.resid_pdrop, self.emb_pdrop]
         )):
             raise ValueError(
-                "self.attn_config['attn_pdrop'], resid_pdrop, emb_pdrop are probabilities and must be between 0 and 1"  # pylint: disable=line-too-long
-            )
+                "self.attn_config['attn_pdrop'], resid_pdrop, emb_pdrop are "
+                "probabilities and must be between 0 and 1")
         if self.attn_config['attn_impl'] not in ['torch', 'flash', 'triton']:
             raise ValueError(
                 f"Unknown attn_impl={self.attn_config['attn_impl']}")
@@ -193,17 +138,17 @@ def _validate_config(self) -> None:
         if self.attn_config['attn_uses_sequence_id'] and self.attn_config[
                 'attn_impl'] not in ['torch', 'triton']:
             raise NotImplementedError(
-                'attn_uses_sequence_id only implemented with torch and triton attention.'  # pylint: disable=line-too-long
-            )
+                'attn_uses_sequence_id only implemented with torch '
+                'and triton attention.')
         if self.embedding_fraction > 1 or self.embedding_fraction <= 0:
             raise ValueError(
-                'model.embedding_fraction must be between 0 (exclusive) and 1 (inclusive)!'  # pylint: disable=line-too-long
-            )
+                'model.embedding_fraction must be between 0 (exclusive) '
+                'and 1 (inclusive)!')
         if isinstance(self.logit_scale,
                       str) and self.logit_scale != 'inv_sqrt_d_model':
             raise ValueError(
-                f"self.logit_scale={self.logit_scale!r} is not recognized as an option; use numeric value or 'inv_sqrt_d_model'."  # pylint: disable=line-too-long
-            )
+                f"self.logit_scale={self.logit_scale!r} is not recognized as "
+                "an option; use numeric value or 'inv_sqrt_d_model'.")
         if self.init_config.get('name', None) is None:
             raise ValueError(
                 f"self.init_config={self.init_config!r} 'name' needs to be set."
@@ -219,11 +164,11 @@ def _validate_config(self) -> None:
                 del te
             except Exception as exc:
                 raise ImportError(
-                    # pylint: disable=line-too-long
-                    'TransformerEngine import fail. `fc_type: te` requires TransformerEngine be installed. '
-                    +
-                    'The required version of transformer_engine also requires FlashAttention v1.0.6 is installed:\n'
-                    + 'pip install flash-attn==1.0.6 --no-build-isolation \n' +
+                    'TransformerEngine import fail. `fc_type: te` requires '
+                    'TransformerEngine be installed. '
+                    'The required version of transformer_engine also requires '
+                    'FlashAttention v1.0.6 is installed:\n'
+                    'pip install flash-attn==1.0.6 --no-build-isolation \n'
                     'pip install git+https://github.com/NVIDIA/TransformerEngine.git@144e4888b2cdd60bd52e706d5b7a79cb9c1a7156'
                 ) from exc
         if self.ffn_config['ffn_type'] == 'mptmlp':
diff --git a/vllm/transformers_utils/configs/starcoder2.py b/vllm/transformers_utils/configs/starcoder2.py
index 4c3b6b8def074..2879cd0445275 100644
--- a/vllm/transformers_utils/configs/starcoder2.py
+++ b/vllm/transformers_utils/configs/starcoder2.py
@@ -2,78 +2,6 @@
 
 
 class Starcoder2Config(PretrainedConfig):
-    r"""
-    This is the configuration class to store the configuration of a [`Starcoder2Model`]. It is used to instantiate a
-    Starcoder2 model according to the specified arguments, defining the model architecture. Instantiating a configuration
-    with the defaults will yield a similar configuration to that of the [bigcode/starcoder2-7b_16k](https://huggingface.co/bigcode/starcoder2-7b_16k) model.
-
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information.
-
-
-    Args:
-        vocab_size (`int`, *optional*, defaults to 49152):
-            Vocabulary size of the Starcoder2 model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`Starcoder2Model`]
-        hidden_size (`int`, *optional*, defaults to 3072):
-            Dimension of the hidden representations.
-        intermediate_size (`int`, *optional*, defaults to 12288):
-            Dimension of the MLP representations.
-        num_hidden_layers (`int`, *optional*, defaults to 30):
-            Number of hidden layers in the Transformer encoder.
-        num_attention_heads (`int`, *optional*, defaults to 24):
-            Number of attention heads for each attention layer in the Transformer encoder.
-        num_key_value_heads (`int`, *optional*, defaults to 2):
-            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
-            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
-            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
-            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
-            by meanpooling all the original heads within that group. For more details checkout [this
-            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to `8`.
-        hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
-            The non-linear activation function (function or string) in the decoder.
-        max_position_embeddings (`int`, *optional*, defaults to 4096):
-            The maximum sequence length that this model might ever be used with. Starcoder2's sliding window attention
-            allows sequence of up to 4096*32 tokens.
-        initializer_range (`float`, *optional*, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        norm_epsilon (`float`, *optional*, defaults to 1e-05):
-            Epsilon value for the layer norm
-        use_cache (`bool`, *optional*, defaults to `True`):
-            Whether or not the model should return the last key/values attentions (not used by all models). Only
-            relevant if `config.is_decoder=True`.
-        bos_token_id (`int`, *optional*, defaults to 50256):
-            The id of the "beginning-of-sequence" token.
-        eos_token_id (`int`, *optional*, defaults to 50256):
-            The id of the "end-of-sequence" token.
-        rope_theta (`float`, *optional*, defaults to 10000.0):
-            The base period of the RoPE embeddings.
-        sliding_window (`int`, *optional*):
-            Sliding window attention window size. If not specified, will default to `None` (no sliding window).
-        attention_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        residual_dropout (`float`, *optional*, defaults to 0.0):
-            Residual connection dropout value.
-        embedding_dropout (`float`, *optional*, defaults to 0.0):
-            Embedding dropout.
-        use_bias (`bool`, *optional*, defaults to `True`):
-            Whether to use bias term on linear layers of the model.
-
-
-    ```python
-    >>> from transformers import Starcoder2Model, Starcoder2Config
-
-    >>> # Initializing a Starcoder2 7B style configuration
-    >>> configuration = Starcoder2Config()
-
-    >>> # Initializing a model from the Starcoder2 7B style configuration
-    >>> model = Starcoder2Model(configuration)
-
-    >>> # Accessing the model configuration
-    >>> configuration = model.config
-    ```"""
-
     model_type = "starcoder2"
     keys_to_ignore_at_inference = ["past_key_values"]
 
diff --git a/vllm/transformers_utils/tokenizers/baichuan.py b/vllm/transformers_utils/tokenizers/baichuan.py
index 1dd241e4a5c4b..02045bdcb2ccf 100644
--- a/vllm/transformers_utils/tokenizers/baichuan.py
+++ b/vllm/transformers_utils/tokenizers/baichuan.py
@@ -1,4 +1,3 @@
-# yapf: disable
 # Adapted from
 # https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat/blob/8f6e343d545c503b91429582231d1d354dac2740/tokenization_baichuan.py
 # This includes a fix suggested in
@@ -13,7 +12,6 @@
 from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
 from transformers.utils import logging
 
-
 logger = logging.get_logger(__name__)
 
 VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
@@ -52,27 +50,16 @@ def __init__(
         clean_up_tokenization_spaces=False,
         **kwargs,
     ):
-        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
-        bos_token = (
-            AddedToken(bos_token, lstrip=False, rstrip=False)
-            if isinstance(bos_token, str)
-            else bos_token
-        )
-        eos_token = (
-            AddedToken(eos_token, lstrip=False, rstrip=False)
-            if isinstance(eos_token, str)
-            else eos_token
-        )
-        unk_token = (
-            AddedToken(unk_token, lstrip=False, rstrip=False)
-            if isinstance(unk_token, str)
-            else unk_token
-        )
-        pad_token = (
-            AddedToken(pad_token, lstrip=False, rstrip=False)
-            if isinstance(pad_token, str)
-            else pad_token
-        )
+        self.sp_model_kwargs = ({} if sp_model_kwargs is None else
+                                sp_model_kwargs)
+        bos_token = (AddedToken(bos_token, lstrip=False, rstrip=False)
+                     if isinstance(bos_token, str) else bos_token)
+        eos_token = (AddedToken(eos_token, lstrip=False, rstrip=False)
+                     if isinstance(eos_token, str) else eos_token)
+        unk_token = (AddedToken(unk_token, lstrip=False, rstrip=False)
+                     if isinstance(unk_token, str) else unk_token)
+        pad_token = (AddedToken(pad_token, lstrip=False, rstrip=False)
+                     if isinstance(pad_token, str) else pad_token)
         self.vocab_file = vocab_file
         self.add_bos_token = add_bos_token
         self.add_eos_token = add_eos_token
@@ -107,7 +94,10 @@ def vocab_size(self):
 
     def get_vocab(self):
         """Returns vocab as a dict"""
-        vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
+        vocab = {
+            self.convert_ids_to_tokens(i): i
+            for i in range(self.vocab_size)
+        }
         vocab.update(self.added_tokens_encoder)
         return vocab
 
@@ -130,7 +120,8 @@ def convert_tokens_to_string(self, tokens):
         out_string = ""
         prev_is_special = False
         for i, token in enumerate(tokens):
-            # make sure that special tokens are not decoded using sentencepiece model
+            # make sure that special tokens are not decoded using
+            # sentencepiece model
             if token in self.all_special_tokens:
                 if not prev_is_special and i != 0:
                     out_string += " "
@@ -143,9 +134,9 @@ def convert_tokens_to_string(self, tokens):
         out_string += self.sp_model.decode(current_sub_tokens)
         return out_string
 
-    def save_vocabulary(
-        self, save_directory, filename_prefix: Optional[str] = None
-    ) -> Tuple[str]:
+    def save_vocabulary(self,
+                        save_directory,
+                        filename_prefix: Optional[str] = None) -> Tuple[str]:
         """
         Save the vocabulary and special tokens file to a directory.
 
@@ -157,24 +148,24 @@ def save_vocabulary(
             `Tuple(str)`: Paths to the files saved.
         """
         if not os.path.isdir(save_directory):
-            logger.error(f"Vocabulary path ({save_directory}) should be a directory")
+            logger.error(f"Vocabulary path ({save_directory}) "
+                         "should be a directory")
             return
         out_vocab_file = os.path.join(
             save_directory,
-            (filename_prefix + "-" if filename_prefix else "")
-            + VOCAB_FILES_NAMES["vocab_file"],
+            (filename_prefix + "-" if filename_prefix else "") +
+            VOCAB_FILES_NAMES["vocab_file"],
         )
 
         if os.path.abspath(self.vocab_file) != os.path.abspath(
-            out_vocab_file
-        ) and os.path.isfile(self.vocab_file):
+                out_vocab_file) and os.path.isfile(self.vocab_file):
             copyfile(self.vocab_file, out_vocab_file)
         elif not os.path.isfile(self.vocab_file):
             with open(out_vocab_file, "wb") as fi:
                 content_spiece_model = self.sp_model.serialized_model_proto()
                 fi.write(content_spiece_model)
 
-        return (out_vocab_file,)
+        return (out_vocab_file, )
 
     def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
         bos_token_id = [self.bos_token_id] if self.add_bos_token else []
@@ -194,7 +185,8 @@ def get_special_tokens_mask(
         already_has_special_tokens: bool = False,
     ) -> List[int]:
         """
-        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
+        Retrieve sequence ids from a token list that has no special tokens
+        added. This method is called when adding
         special tokens using the tokenizer `prepare_for_model` method.
 
         Args:
@@ -202,11 +194,14 @@ def get_special_tokens_mask(
                 List of IDs.
             token_ids_1 (`List[int]`, *optional*):
                 Optional second list of IDs for sequence pairs.
-            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
-                Whether or not the token list is already formatted with special tokens for the model.
+            already_has_special_tokens (`bool`, *optional*, defaults to
+            `False`):
+                Whether or not the token list is already formatted with
+                special tokens for the model.
 
         Returns:
-            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+            `List[int]`: A list of integers in the range [0, 1]:
+            1 for a special token, 0 for a sequence token.
         """
         if already_has_special_tokens:
             return super().get_special_tokens_mask(
@@ -220,20 +215,16 @@ def get_special_tokens_mask(
 
         if token_ids_1 is None:
             return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
-        return (
-            bos_token_id
-            + ([0] * len(token_ids_0))
-            + eos_token_id
-            + bos_token_id
-            + ([0] * len(token_ids_1))
-            + eos_token_id
-        )
+        return (bos_token_id + ([0] * len(token_ids_0)) + eos_token_id +
+                bos_token_id + ([0] * len(token_ids_1)) + eos_token_id)
 
     def create_token_type_ids_from_sequences(
-        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
-    ) -> List[int]:
+            self,
+            token_ids_0: List[int],
+            token_ids_1: Optional[List[int]] = None) -> List[int]:
         """
-        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
+        Creates a mask from the two sequences passed to be used in a
+        sequence-pair classification task. An ALBERT
         sequence pair mask has the following format:
 
         ```
@@ -250,7 +241,8 @@ def create_token_type_ids_from_sequences(
                 Optional second list of IDs for sequence pairs.
 
         Returns:
-            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
+            `List[int]`: List of [token type IDs](../glossary#token-type-ids)
+            according to the given sequence(s).
         """
         bos_token_id = [self.bos_token_id] if self.add_bos_token else []
         eos_token_id = [self.eos_token_id] if self.add_eos_token else []
diff --git a/vllm/utils.py b/vllm/utils.py
index 5b94067cec777..fe6fd27962cd3 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -133,9 +133,10 @@ def get_max_shared_memory_bytes(gpu: int = 0) -> int:
     # the Neuron-X backend does not have the `cuda_utils` module.
     from vllm._C import cuda_utils
 
-    max_shared_mem = cuda_utils.get_max_shared_memory_per_block_device_attribute(
-        gpu)
-    # value 0 will cause MAX_SEQ_LEN become negative and test_attention.py will fail
+    max_shared_mem = (
+        cuda_utils.get_max_shared_memory_per_block_device_attribute(gpu))
+    # value 0 will cause MAX_SEQ_LEN become negative and test_attention.py
+    # will fail
     assert max_shared_mem > 0, "max_shared_mem can not be zero"
     return int(max_shared_mem)
 
@@ -209,9 +210,8 @@ def get_nvcc_cuda_version() -> Optional[Version]:
     if not cuda_home:
         cuda_home = '/usr/local/cuda'
         if os.path.isfile(cuda_home + '/bin/nvcc'):
-            logger.info(
-                f'CUDA_HOME is not found in the environment. Using {cuda_home} as CUDA_HOME.'
-            )
+            logger.info(f'CUDA_HOME is not found in the environment. '
+                        f'Using {cuda_home} as CUDA_HOME.')
         else:
             logger.warning(
                 f'Not found nvcc in {cuda_home}. Skip cuda version check!')
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 0dd2309079403..7eac576e3f0fe 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -93,14 +93,13 @@ def load_model(self) -> None:
                                    scheduler_config=self.scheduler_config)
 
         self.model_memory_usage = m.consumed_memory
-        logger.info(
-            f"Loading model weights took {self.model_memory_usage / float(2**30):.4f} GB"
-        )
+        logger.info(f"Loading model weights took "
+                    f"{self.model_memory_usage / float(2**30):.4f} GB")
 
         if self.lora_config:
-            assert hasattr(
-                self.model, "supported_lora_modules"
-            ) and self.model.supported_lora_modules, "Model does not support LoRA"
+            assert hasattr(self.model, "supported_lora_modules"
+                           ) and self.model.supported_lora_modules, (
+                               "Model does not support LoRA")
             assert hasattr(
                 self.model,
                 "embedding_modules"), "Model does not have embedding_modules"
diff --git a/vllm/worker/neuron_worker.py b/vllm/worker/neuron_worker.py
index 3229a21c11a38..340c079600c78 100644
--- a/vllm/worker/neuron_worker.py
+++ b/vllm/worker/neuron_worker.py
@@ -79,7 +79,8 @@ def profile_num_available_blocks(
         cpu_swap_space: int = 0,
         cache_dtype: str = "float16",
     ) -> Tuple[int, int]:
-        """Simply returns max_num_seqs as num_gpu_blocks, 0 as num_cpu_blocks."""
+        """Simply returns max_num_seqs as num_gpu_blocks, 0 as
+        num_cpu_blocks."""
         num_gpu_blocks = self.scheduler_config.max_num_seqs
         num_cpu_blocks = 0
         return num_gpu_blocks, num_cpu_blocks
@@ -177,7 +178,8 @@ def _init_distributed_environment(
             "distributed_init_method must be set if torch.distributed "
             "is not already initialized")
     else:
-        distributed_backend = distributed_backend if distributed_backend else "nccl"
+        distributed_backend = (distributed_backend
+                               if distributed_backend else "nccl")
         torch.distributed.init_process_group(
             backend=distributed_backend,
             world_size=parallel_config.world_size,

From 657061fdced8a33a60c1b09f5da2525de9da8f03 Mon Sep 17 00:00:00 2001
From: Philipp Moritz <pcmoritz@gmail.com>
Date: Mon, 11 Mar 2024 00:54:51 -0700
Subject: [PATCH 081/196] [docs] Add LoRA support information for models
 (#3299)

---
 docs/source/models/lora.rst             |  3 ++-
 docs/source/models/supported_models.rst | 27 ++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/docs/source/models/lora.rst b/docs/source/models/lora.rst
index 21b18c75fc552..f05fafe9f8279 100644
--- a/docs/source/models/lora.rst
+++ b/docs/source/models/lora.rst
@@ -92,7 +92,8 @@ LoRA adapter requests if they were provided and ``max_loras`` is set high enough
 
 The following is an example request 
 
-.. code-block::bash 
+.. code-block:: bash
+
     curl http://localhost:8000/v1/completions \
         -H "Content-Type: application/json" \
         -d '{
diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
index 9d4ec663a16e5..4019e0bbd90fb 100644
--- a/docs/source/models/supported_models.rst
+++ b/docs/source/models/supported_models.rst
@@ -8,84 +8,109 @@ The following is the list of model architectures that are currently supported by
 Alongside each architecture, we include some popular models that use it.
 
 .. list-table::
-  :widths: 25 25 50
+  :widths: 25 25 50 5
   :header-rows: 1
 
   * - Architecture
     - Models
     - Example HuggingFace Models
+    - :ref:`LoRA <lora>`
   * - :code:`AquilaForCausalLM`
     - Aquila
     - :code:`BAAI/Aquila-7B`, :code:`BAAI/AquilaChat-7B`, etc.
+    - ✅︎
   * - :code:`BaiChuanForCausalLM`
     - Baichuan
     - :code:`baichuan-inc/Baichuan2-13B-Chat`, :code:`baichuan-inc/Baichuan-7B`, etc.
+    - 
   * - :code:`ChatGLMModel`
     - ChatGLM
     - :code:`THUDM/chatglm2-6b`, :code:`THUDM/chatglm3-6b`, etc.
+    - 
   * - :code:`DeciLMForCausalLM`
     - DeciLM
     - :code:`Deci/DeciLM-7B`, :code:`Deci/DeciLM-7B-instruct`, etc.
+    - 
   * - :code:`BloomForCausalLM`
     - BLOOM, BLOOMZ, BLOOMChat
     - :code:`bigscience/bloom`, :code:`bigscience/bloomz`, etc.
+    - 
   * - :code:`FalconForCausalLM`
     - Falcon
     - :code:`tiiuae/falcon-7b`, :code:`tiiuae/falcon-40b`, :code:`tiiuae/falcon-rw-7b`, etc.
+    - 
   * - :code:`GemmaForCausalLM`
     - Gemma
     - :code:`google/gemma-2b`, :code:`google/gemma-7b`, etc.
+    - ✅︎
   * - :code:`GPT2LMHeadModel`
     - GPT-2
     - :code:`gpt2`, :code:`gpt2-xl`, etc.
+    - 
   * - :code:`GPTBigCodeForCausalLM`
     - StarCoder, SantaCoder, WizardCoder
     - :code:`bigcode/starcoder`, :code:`bigcode/gpt_bigcode-santacoder`, :code:`WizardLM/WizardCoder-15B-V1.0`, etc.
+    - 
   * - :code:`GPTJForCausalLM`
     - GPT-J
     - :code:`EleutherAI/gpt-j-6b`, :code:`nomic-ai/gpt4all-j`, etc.
+    - 
   * - :code:`GPTNeoXForCausalLM`
     - GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM
     - :code:`EleutherAI/gpt-neox-20b`, :code:`EleutherAI/pythia-12b`, :code:`OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5`, :code:`databricks/dolly-v2-12b`, :code:`stabilityai/stablelm-tuned-alpha-7b`, etc.
+    - 
   * - :code:`InternLMForCausalLM`
     - InternLM
     - :code:`internlm/internlm-7b`, :code:`internlm/internlm-chat-7b`, etc.
+    - ✅︎
   * - :code:`InternLM2ForCausalLM`
     - InternLM2
     - :code:`internlm/internlm2-7b`, :code:`internlm/internlm2-chat-7b`, etc.
+    - 
   * - :code:`LlamaForCausalLM`
     - LLaMA, LLaMA-2, Vicuna, Alpaca, Yi
     - :code:`meta-llama/Llama-2-13b-hf`, :code:`meta-llama/Llama-2-70b-hf`, :code:`openlm-research/open_llama_13b`, :code:`lmsys/vicuna-13b-v1.3`, :code:`01-ai/Yi-6B`, :code:`01-ai/Yi-34B`, etc.
+    - ✅︎
   * - :code:`MistralForCausalLM`
     - Mistral, Mistral-Instruct
     - :code:`mistralai/Mistral-7B-v0.1`, :code:`mistralai/Mistral-7B-Instruct-v0.1`, etc.
+    - ✅︎
   * - :code:`MixtralForCausalLM`
     - Mixtral-8x7B, Mixtral-8x7B-Instruct
     - :code:`mistralai/Mixtral-8x7B-v0.1`, :code:`mistralai/Mixtral-8x7B-Instruct-v0.1`, etc.
+    - ✅︎
   * - :code:`MPTForCausalLM`
     - MPT, MPT-Instruct, MPT-Chat, MPT-StoryWriter
     - :code:`mosaicml/mpt-7b`, :code:`mosaicml/mpt-7b-storywriter`, :code:`mosaicml/mpt-30b`, etc.
+    - 
   * - :code:`OLMoForCausalLM`
     - OLMo
     - :code:`allenai/OLMo-1B`, :code:`allenai/OLMo-7B`, etc.
+    - 
   * - :code:`OPTForCausalLM`
     - OPT, OPT-IML
     - :code:`facebook/opt-66b`, :code:`facebook/opt-iml-max-30b`, etc.
+    - 
   * - :code:`OrionForCausalLM`
     - Orion
     - :code:`OrionStarAI/Orion-14B-Base`, :code:`OrionStarAI/Orion-14B-Chat`, etc.
+    - 
   * - :code:`PhiForCausalLM`
     - Phi
     - :code:`microsoft/phi-1_5`, :code:`microsoft/phi-2`, etc.
+    - 
   * - :code:`QWenLMHeadModel`
     - Qwen
     - :code:`Qwen/Qwen-7B`, :code:`Qwen/Qwen-7B-Chat`, etc.
+    - 
   * - :code:`Qwen2ForCausalLM`
     - Qwen2
     - :code:`Qwen/Qwen2-beta-7B`, :code:`Qwen/Qwen2-beta-7B-Chat`, etc.
+    - ✅︎
   * - :code:`StableLmForCausalLM`
     - StableLM
     - :code:`stabilityai/stablelm-3b-4e1t/` , :code:`stabilityai/stablelm-base-alpha-7b-v2`, etc.
+    - 
 
 If your model uses one of the above model architectures, you can seamlessly run your model with vLLM.
 Otherwise, please refer to :ref:`Adding a New Model <adding_a_new_model>` for instructions on how to implement support for your model.

From 4c922709b65ff5c0652ae36b93047016bdeaace8 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Mon, 11 Mar 2024 11:03:45 -0700
Subject: [PATCH 082/196] Add distributed model executor abstraction (#3191)

---
 docs/source/dev/engine/llm_engine.rst |   2 +-
 format.sh                             |   8 +-
 tests/lora/conftest.py                |   3 +-
 vllm/__init__.py                      |   4 +-
 vllm/config.py                        |   7 +-
 vllm/engine/async_llm_engine.py       | 106 +++---
 vllm/engine/llm_engine.py             | 446 +++-----------------------
 vllm/engine/ray_utils.py              |  58 ++--
 vllm/executor/__init__.py             |   0
 vllm/executor/executor_base.py        |  75 +++++
 vllm/executor/gpu_executor.py         | 163 ++++++++++
 vllm/executor/ray_gpu_executor.py     | 442 +++++++++++++++++++++++++
 vllm/executor/utils.py                |  13 +
 13 files changed, 818 insertions(+), 509 deletions(-)
 create mode 100644 vllm/executor/__init__.py
 create mode 100644 vllm/executor/executor_base.py
 create mode 100644 vllm/executor/gpu_executor.py
 create mode 100644 vllm/executor/ray_gpu_executor.py
 create mode 100644 vllm/executor/utils.py

diff --git a/docs/source/dev/engine/llm_engine.rst b/docs/source/dev/engine/llm_engine.rst
index b550a9b5faa62..1de6d7adc87c6 100644
--- a/docs/source/dev/engine/llm_engine.rst
+++ b/docs/source/dev/engine/llm_engine.rst
@@ -2,5 +2,5 @@ LLMEngine
 =================================
 
 .. autoclass:: vllm.engine.llm_engine.LLMEngine
-    :members: add_request, abort_request, step, _init_cache
+    :members: add_request, abort_request, step
     :show-inheritance:
\ No newline at end of file
diff --git a/format.sh b/format.sh
index eb2c5ab031626..ff30111123bee 100755
--- a/format.sh
+++ b/format.sh
@@ -95,13 +95,17 @@ echo 'vLLM yapf: Done'
 # echo 'vLLM mypy:'
 # mypy
 
+CODESPELL_EXCLUDES=(
+    '--skip' '*docs/source/_build/**'
+)
+
 # check spelling of specified files
 spell_check() {
     codespell "$@"
 }
 
 spell_check_all(){
-  codespell --toml pyproject.toml
+  codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}"
 }
 
 # Spelling  check of files that differ from main branch.
@@ -116,7 +120,7 @@ spell_check_changed() {
 
     if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
         git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             codespell
+             codespell "${CODESPELL_EXCLUDES[@]}"
     fi
 }
 
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
index 67273144ecd02..30a8ad03c8ada 100644
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -152,4 +152,5 @@ def get_model_patched(model_config, device_config, **kwargs):
 @pytest.fixture
 def llama_2_7b_model_extra_embeddings(
         llama_2_7b_engine_extra_embeddings) -> nn.Module:
-    yield llama_2_7b_engine_extra_embeddings.driver_worker.model_runner.model
+    yield (llama_2_7b_engine_extra_embeddings.model_executor.driver_worker.
+           model_runner.model)
diff --git a/vllm/__init__.py b/vllm/__init__.py
index f1e30f5eb6e6e..5e40c3c20fcd2 100644
--- a/vllm/__init__.py
+++ b/vllm/__init__.py
@@ -3,7 +3,7 @@
 from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
 from vllm.engine.async_llm_engine import AsyncLLMEngine
 from vllm.engine.llm_engine import LLMEngine
-from vllm.engine.ray_utils import initialize_cluster
+from vllm.engine.ray_utils import initialize_ray_cluster
 from vllm.entrypoints.llm import LLM
 from vllm.outputs import CompletionOutput, RequestOutput
 from vllm.sampling_params import SamplingParams
@@ -19,5 +19,5 @@
     "EngineArgs",
     "AsyncLLMEngine",
     "AsyncEngineArgs",
-    "initialize_cluster",
+    "initialize_ray_cluster",
 ]
diff --git a/vllm/config.py b/vllm/config.py
index e893fe702c975..d2b68b6fa1fe2 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union, ClassVar
+from typing import TYPE_CHECKING, Optional, Union, ClassVar
 from dataclasses import dataclass
 import os
 from packaging.version import Version
@@ -10,6 +10,9 @@
 from vllm.transformers_utils.config import get_config
 from vllm.utils import get_cpu_memory, is_hip, is_neuron, get_nvcc_cuda_version
 
+if TYPE_CHECKING:
+    from ray.util.placement_group import PlacementGroup
+
 logger = init_logger(__name__)
 
 _GB = 1 << 30
@@ -397,6 +400,7 @@ def __init__(
         max_parallel_loading_workers: Optional[int] = None,
         disable_custom_all_reduce: bool = False,
         ray_workers_use_nsight: bool = False,
+        placement_group: Optional["PlacementGroup"] = None,
     ) -> None:
         self.pipeline_parallel_size = pipeline_parallel_size
         if is_neuron():
@@ -412,6 +416,7 @@ def __init__(
         self.max_parallel_loading_workers = max_parallel_loading_workers
         self.disable_custom_all_reduce = disable_custom_all_reduce
         self.ray_workers_use_nsight = ray_workers_use_nsight
+        self.placement_group = placement_group
 
         self.world_size = pipeline_parallel_size * self.tensor_parallel_size
         # Ray worker is not supported for Neuron backend.
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 5629d1a863d04..0cee604c14d45 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -2,8 +2,8 @@
 import os
 import time
 from functools import partial
-from typing import (Any, Dict, Iterable, List, Optional, Set, Tuple, Type,
-                    Union, AsyncIterator, Callable)
+from typing import (Callable, Dict, Iterable, List, Optional, Set, Tuple, Type,
+                    Union, AsyncIterator)
 
 from transformers import PreTrainedTokenizer
 
@@ -11,7 +11,7 @@
 from vllm.config import ModelConfig
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.engine.llm_engine import LLMEngine
-from vllm.engine.ray_utils import initialize_cluster, ray
+from vllm.engine.ray_utils import initialize_ray_cluster, ray
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import SamplingParams
@@ -208,17 +208,10 @@ async def step_async(self) -> List[RequestOutput]:
 
         if not scheduler_outputs.is_empty():
             # Execute the model.
-            all_outputs = await self._run_workers_async(
-                "execute_model",
-                driver_kwargs={
-                    "seq_group_metadata_list": seq_group_metadata_list,
-                    "blocks_to_swap_in": scheduler_outputs.blocks_to_swap_in,
-                    "blocks_to_swap_out": scheduler_outputs.blocks_to_swap_out,
-                    "blocks_to_copy": scheduler_outputs.blocks_to_copy,
-                })
-
-            # Only the driver worker returns the sampling results.
-            output = all_outputs[0]
+            output = await self.model_executor.execute_model_async(
+                seq_group_metadata_list, scheduler_outputs.blocks_to_swap_in,
+                scheduler_outputs.blocks_to_swap_out,
+                scheduler_outputs.blocks_to_copy)
         else:
             output = []
 
@@ -268,37 +261,8 @@ async def add_request_async(
             lora_request=lora_request,
         )
 
-    async def _run_workers_async(
-        self,
-        method: str,
-        *args,
-        driver_args: Optional[List[Any]] = None,
-        driver_kwargs: Optional[Dict[str, Any]] = None,
-        **kwargs,
-    ) -> Any:
-        """Runs the given method on all workers."""
-        coros = []
-
-        if driver_args is None:
-            driver_args = args
-        if driver_kwargs is None:
-            driver_kwargs = kwargs
-
-        # Run the driver worker asynchronously.
-        driver_executor = getattr(self.driver_worker, method)
-        coros.append(asyncio.get_event_loop().run_in_executor(
-            None, partial(driver_executor, *driver_args, **driver_kwargs)))
-
-        # Run the ray workers asynchronously.
-        for worker in self.workers:
-            coros.append(worker.execute_method.remote(method, *args, **kwargs))
-
-        all_outputs = await asyncio.gather(*coros)
-        return all_outputs
-
-    async def check_health_async(self):
-        """Raises an error if engine is unhealthy."""
-        self._check_if_any_actor_is_dead()
+    async def check_health_async(self) -> None:
+        self.model_executor.check_health()
 
 
 class AsyncLLMEngine:
@@ -353,6 +317,34 @@ def __init__(self,
         self._request_tracker: Optional[RequestTracker] = None
         self._errored_with: Optional[BaseException] = None
 
+    @classmethod
+    def from_engine_args(cls,
+                         engine_args: AsyncEngineArgs,
+                         start_engine_loop: bool = True) -> "AsyncLLMEngine":
+        """Creates an async LLM engine from the engine arguments."""
+        # Create the engine configs.
+        engine_configs = engine_args.create_engine_configs()
+        parallel_config = engine_configs[2]
+        if parallel_config.worker_use_ray or engine_args.engine_use_ray:
+            initialize_ray_cluster(parallel_config)
+            from vllm.executor.ray_gpu_executor import RayGPUExecutorAsync
+            executor_class = RayGPUExecutorAsync
+        else:
+            assert parallel_config.world_size == 1, (
+                "Ray is required if parallel_config.world_size > 1.")
+            from vllm.executor.gpu_executor import GPUExecutorAsync
+            executor_class = GPUExecutorAsync
+        # Create the async LLM engine.
+        engine = cls(parallel_config.worker_use_ray,
+                     engine_args.engine_use_ray,
+                     *engine_configs,
+                     executor_class,
+                     log_requests=not engine_args.disable_log_requests,
+                     log_stats=not engine_args.disable_log_stats,
+                     max_log_len=engine_args.max_log_len,
+                     start_engine_loop=start_engine_loop)
+        return engine
+
     @property
     def is_running(self) -> bool:
         return (self.background_loop is not None
@@ -670,35 +662,13 @@ async def get_model_config(self) -> ModelConfig:
         else:
             return self.engine.get_model_config()
 
-    @classmethod
-    def from_engine_args(cls,
-                         engine_args: AsyncEngineArgs,
-                         start_engine_loop: bool = True) -> "AsyncLLMEngine":
-        """Creates an async LLM engine from the engine arguments."""
-        # Create the engine configs.
-        engine_configs = engine_args.create_engine_configs()
-        parallel_config = engine_configs[2]
-        # Initialize the cluster.
-        placement_group = initialize_cluster(parallel_config,
-                                             engine_args.engine_use_ray)
-        # Create the async LLM engine.
-        engine = cls(parallel_config.worker_use_ray,
-                     engine_args.engine_use_ray,
-                     *engine_configs,
-                     placement_group,
-                     log_requests=not engine_args.disable_log_requests,
-                     log_stats=not engine_args.disable_log_stats,
-                     max_log_len=engine_args.max_log_len,
-                     start_engine_loop=start_engine_loop)
-        return engine
-
     async def do_log_stats(self) -> None:
         if self.engine_use_ray:
             await self.engine.do_log_stats.remote()
         else:
             self.engine.do_log_stats()
 
-    async def check_health(self):
+    async def check_health(self) -> None:
         """Raises an error if engine is unhealthy."""
         t = time.perf_counter()
         logger.debug("Starting health check...")
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 6e045cd6d73c6..4cdad4180aa14 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1,11 +1,5 @@
-import copy
-from collections import defaultdict
-import os
 import time
-import pickle
-import importlib
-from typing import (TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple,
-                    Union)
+from typing import Dict, Iterable, List, Optional, Tuple, Type, Union
 
 from transformers import PreTrainedTokenizer
 
@@ -15,8 +9,9 @@
                          ParallelConfig, SchedulerConfig, LoRAConfig)
 from vllm.core.scheduler import Scheduler, SchedulerOutputs
 from vllm.engine.arg_utils import EngineArgs
+from vllm.executor.executor_base import ExecutorBase
 from vllm.engine.metrics import StatLogger, Stats
-from vllm.engine.ray_utils import RayWorkerVllm, initialize_cluster, ray
+from vllm.engine.ray_utils import initialize_ray_cluster
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import SamplingParams
@@ -24,29 +19,11 @@
                            SequenceGroupOutput, SequenceOutput, SequenceStatus)
 from vllm.transformers_utils.tokenizer import (detokenize_incrementally,
                                                TokenizerGroup)
-from vllm.utils import (Counter, set_cuda_visible_devices, get_ip,
-                        get_open_port, get_distributed_init_method)
-
-if ray:
-    from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
-
-if TYPE_CHECKING:
-    from ray.util.placement_group import PlacementGroup
+from vllm.utils import Counter
 
 logger = init_logger(__name__)
 _LOCAL_LOGGING_INTERVAL_SEC = 5
 
-# A map between the device type (in device config) to its worker module.
-DEVICE_TO_WORKER_MODULE_MAP = {
-    "cuda": "vllm.worker.worker",
-    "neuron": "vllm.worker.neuron_worker",
-}
-
-# If the env var is set, it uses the Ray's compiled DAG API
-# which optimizes the control plane overhead.
-# Run VLLM with VLLM_USE_RAY_COMPILED_DAG=1 to enable it.
-USE_RAY_COMPILED_DAG = bool(os.getenv("VLLM_USE_RAY_COMPILED_DAG", 0))
-
 
 class LLMEngine:
     """An LLM engine that receives requests and generates texts.
@@ -71,8 +48,8 @@ class LLMEngine:
         parallel_config: The configuration related to distributed execution.
         scheduler_config: The configuration related to the request scheduler.
         device_config: The configuration related to the device.
-        placement_group: Ray placement group for distributed execution.
-            Required for distributed execution.
+        executor_class: The model executor class for managing distributed
+            execution.
         log_stats: Whether to log statistics.
     """
 
@@ -84,7 +61,7 @@ def __init__(
         scheduler_config: SchedulerConfig,
         device_config: DeviceConfig,
         lora_config: Optional[LoRAConfig],
-        placement_group: Optional["PlacementGroup"],
+        executor_class: Type[ExecutorBase],
         log_stats: bool,
     ) -> None:
         logger.info(
@@ -121,33 +98,13 @@ def __init__(
         self._init_tokenizer()
         self.seq_counter = Counter()
 
-        # Create the parallel GPU workers.
-        if self.parallel_config.worker_use_ray:
-            # Disable Ray usage stats collection.
-            ray_usage = os.environ.get("RAY_USAGE_STATS_ENABLED", "0")
-            if ray_usage != "1":
-                os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
-            # Pass additional arguments to initialize the worker
-            additional_ray_args = {}
-            if self.parallel_config.ray_workers_use_nsight:
-                logger.info("Configuring Ray workers to use nsight.")
-                additional_ray_args = {
-                    "runtime_env": {
-                        "nsight": {
-                            "t": "cuda,cudnn,cublas",
-                            "o": "'worker_process_%p'",
-                            "cuda-graph-trace": "node",
-                        }
-                    }
-                }
-            self._init_workers_ray(placement_group, **additional_ray_args)
-        else:
-            self._init_workers()
-
-        # Profile the memory usage and initialize the cache.
-        self._init_cache()
+        self.model_executor = executor_class(model_config, cache_config,
+                                             parallel_config, scheduler_config,
+                                             device_config, lora_config)
 
         # Create the scheduler.
+        # NOTE: the cache_config here have been updated with the numbers of
+        # GPU and CPU blocks, which are profiled in the distributed executor.
         self.scheduler = Scheduler(scheduler_config, cache_config, lora_config)
 
         # Metric Logging.
@@ -157,9 +114,29 @@ def __init__(
                 labels=dict(model_name=model_config.model))
             self.stat_logger.info("cache_config", self.cache_config)
 
-        self.forward_dag = None
-        if USE_RAY_COMPILED_DAG:
-            self.forward_dag = self._compiled_ray_dag()
+    @classmethod
+    def from_engine_args(cls, engine_args: EngineArgs) -> "LLMEngine":
+        """Creates an LLM engine from the engine arguments."""
+        # Create the engine configs.
+        engine_configs = engine_args.create_engine_configs()
+        parallel_config = engine_configs[2]
+
+        # Initialize the cluster and specify the executor class.
+        if parallel_config.worker_use_ray:
+            initialize_ray_cluster(parallel_config)
+            from vllm.executor.ray_gpu_executor import RayGPUExecutor
+            executor_class = RayGPUExecutor
+        else:
+            assert parallel_config.world_size == 1, (
+                "Ray is required if parallel_config.world_size > 1.")
+            from vllm.executor.gpu_executor import GPUExecutor
+            executor_class = GPUExecutor
+
+        # Create the LLM engine.
+        engine = cls(*engine_configs,
+                     executor_class=executor_class,
+                     log_stats=not engine_args.disable_log_stats)
+        return engine
 
     def __reduce__(self):
         # This is to ensure that the LLMEngine is not referenced in
@@ -173,39 +150,6 @@ def get_tokenizer_for_seq(self,
                               sequence: Sequence) -> "PreTrainedTokenizer":
         return self.tokenizer.get_lora_tokenizer(sequence.lora_request)
 
-    def _dispatch_worker(self):
-        worker_module = DEVICE_TO_WORKER_MODULE_MAP[
-            self.device_config.device_type]
-        imported_worker = importlib.import_module(worker_module)
-        Worker = imported_worker.Worker
-        return Worker
-
-    def _init_workers(self):
-        # Lazy import the Worker to avoid importing torch.cuda/xformers
-        # before CUDA_VISIBLE_DEVICES is set in the Worker
-        Worker = self._dispatch_worker()
-
-        assert self.parallel_config.world_size == 1, (
-            "Ray is required if parallel_config.world_size > 1.")
-
-        self.workers: List[Worker] = []
-        distributed_init_method = get_distributed_init_method(
-            get_ip(), get_open_port())
-        self.driver_worker = Worker(
-            self.model_config,
-            self.parallel_config,
-            self.scheduler_config,
-            self.device_config,
-            local_rank=0,
-            rank=0,
-            distributed_init_method=distributed_init_method,
-            lora_config=self.lora_config,
-            kv_cache_dtype=self.cache_config.cache_dtype,
-            is_driver_worker=True,
-        )
-        self._run_workers("init_model")
-        self._run_workers("load_model")
-
     def _init_tokenizer(self, **tokenizer_init_kwargs):
         init_kwargs = dict(
             enable_lora=bool(self.lora_config),
@@ -218,126 +162,6 @@ def _init_tokenizer(self, **tokenizer_init_kwargs):
         self.tokenizer: TokenizerGroup = TokenizerGroup(
             self.model_config.tokenizer, **init_kwargs)
 
-    def _init_workers_ray(self, placement_group: "PlacementGroup",
-                          **ray_remote_kwargs):
-        if self.parallel_config.tensor_parallel_size == 1:
-            num_gpus = self.cache_config.gpu_memory_utilization
-        else:
-            num_gpus = 1
-
-        self.driver_dummy_worker: RayWorkerVllm = None
-        self.workers: List[RayWorkerVllm] = []
-
-        driver_ip = get_ip()
-        for bundle_id, bundle in enumerate(placement_group.bundle_specs):
-            if not bundle.get("GPU", 0):
-                continue
-            scheduling_strategy = PlacementGroupSchedulingStrategy(
-                placement_group=placement_group,
-                placement_group_capture_child_tasks=True,
-                placement_group_bundle_index=bundle_id,
-            )
-            worker = ray.remote(
-                num_cpus=0,
-                num_gpus=num_gpus,
-                scheduling_strategy=scheduling_strategy,
-                **ray_remote_kwargs,
-            )(RayWorkerVllm).remote(self.model_config.trust_remote_code)
-
-            worker_ip = ray.get(worker.get_node_ip.remote())
-            if worker_ip == driver_ip and self.driver_dummy_worker is None:
-                # If the worker is on the same node as the driver, we use it
-                # as the resource holder for the driver process.
-                self.driver_dummy_worker = worker
-            else:
-                self.workers.append(worker)
-
-        if self.driver_dummy_worker is None:
-            raise ValueError(
-                "Ray does not allocate any GPUs on the driver node. Consider "
-                "adjusting the Ray placement group or running the driver on a "
-                "GPU node.")
-
-        driver_node_id, driver_gpu_ids = ray.get(
-            self.driver_dummy_worker.get_node_and_gpu_ids.remote())
-        worker_node_and_gpu_ids = ray.get(
-            [worker.get_node_and_gpu_ids.remote() for worker in self.workers])
-
-        node_workers = defaultdict(list)
-        node_gpus = defaultdict(list)
-
-        node_workers[driver_node_id].append(0)
-        node_gpus[driver_node_id].extend(driver_gpu_ids)
-        for i, (node_id, gpu_ids) in enumerate(worker_node_and_gpu_ids,
-                                               start=1):
-            node_workers[node_id].append(i)
-            node_gpus[node_id].extend(gpu_ids)
-        for node_id, gpu_ids in node_gpus.items():
-            node_gpus[node_id] = sorted(gpu_ids)
-
-        # Set CUDA_VISIBLE_DEVICES for the driver.
-        set_cuda_visible_devices(node_gpus[driver_node_id])
-        for worker, (node_id, _) in zip(self.workers, worker_node_and_gpu_ids):
-            worker.set_cuda_visible_devices.remote(node_gpus[node_id])
-
-        distributed_init_method = get_distributed_init_method(
-            driver_ip, get_open_port())
-
-        # Lazy import the Worker to avoid importing torch.cuda/xformers
-        # before CUDA_VISIBLE_DEVICES is set in the Worker
-        Worker = self._dispatch_worker()
-
-        # Initialize torch distributed process group for the workers.
-        model_config = copy.deepcopy(self.model_config)
-        parallel_config = copy.deepcopy(self.parallel_config)
-        scheduler_config = copy.deepcopy(self.scheduler_config)
-        device_config = copy.deepcopy(self.device_config)
-        lora_config = copy.deepcopy(self.lora_config)
-        kv_cache_dtype = self.cache_config.cache_dtype
-
-        for rank, (worker, (node_id,
-                            _)) in enumerate(zip(self.workers,
-                                                 worker_node_and_gpu_ids),
-                                             start=1):
-            local_rank = node_workers[node_id].index(rank)
-            worker.init_worker.remote(
-                lambda rank=rank, local_rank=local_rank: Worker(
-                    model_config,
-                    parallel_config,
-                    scheduler_config,
-                    device_config,
-                    local_rank,
-                    rank,
-                    distributed_init_method,
-                    lora_config=lora_config,
-                    kv_cache_dtype=kv_cache_dtype,
-                ))
-
-        driver_rank = 0
-        driver_local_rank = node_workers[driver_node_id].index(driver_rank)
-        self.driver_worker = Worker(
-            self.model_config,
-            self.parallel_config,
-            self.scheduler_config,
-            self.device_config,
-            driver_local_rank,
-            driver_rank,
-            distributed_init_method,
-            lora_config=self.lora_config,
-            kv_cache_dtype=kv_cache_dtype,
-            is_driver_worker=True,
-        )
-
-        # don't use cupy for eager mode
-        self._run_workers("init_model",
-                          cupy_port=get_open_port()
-                          if not model_config.enforce_eager else None)
-        self._run_workers(
-            "load_model",
-            max_concurrent_workers=self.parallel_config.
-            max_parallel_loading_workers,
-        )
-
     def _verify_args(self) -> None:
         self.model_config.verify_with_parallel_config(self.parallel_config)
         self.cache_config.verify_with_parallel_config(self.parallel_config)
@@ -346,81 +170,6 @@ def _verify_args(self) -> None:
             self.lora_config.verify_with_scheduler_config(
                 self.scheduler_config)
 
-    def _init_cache(self) -> None:
-        """Profiles the memory usage and initializes the KV cache.
-
-        The engine will first conduct a profiling of the existing memory usage.
-        Then, it calculate the maximum possible number of GPU and CPU blocks
-        that can be allocated with the remaining free memory.
-        More details can be found in the
-        :meth:`~vllm.worker.worker.Worker.profile_num_available_blocks` method
-        from class :class:`~vllm.worker.Worker`.
-
-        Afterwards, as there may be multiple workers,
-        we take the minimum number of blocks across all workers
-        to ensure this can be applied to all of them.
-
-        Finally, the engine will initialize the KV cache
-        with the calculated number of blocks.
-
-        .. tip::
-            You may limit the usage of GPU memory
-            by adjusting the `gpu_memory_utilization` parameters.
-        """
-        # Get the maximum number of blocks that can be allocated on GPU and CPU.
-        num_blocks = self._run_workers(
-            "profile_num_available_blocks",
-            block_size=self.cache_config.block_size,
-            gpu_memory_utilization=self.cache_config.gpu_memory_utilization,
-            cpu_swap_space=self.cache_config.swap_space_bytes,
-            cache_dtype=self.cache_config.cache_dtype,
-        )
-
-        # Since we use a shared centralized controller, we take the minimum
-        # number of blocks across all workers to make sure all the memory
-        # operators can be applied to all workers.
-        num_gpu_blocks = min(b[0] for b in num_blocks)
-        num_cpu_blocks = min(b[1] for b in num_blocks)
-        # FIXME(woosuk): Change to debug log.
-        logger.info(f"# GPU blocks: {num_gpu_blocks}, "
-                    f"# CPU blocks: {num_cpu_blocks}")
-
-        if num_gpu_blocks <= 0:
-            raise ValueError("No available memory for the cache blocks. "
-                             "Try increasing `gpu_memory_utilization` when "
-                             "initializing the engine.")
-        max_seq_len = self.cache_config.block_size * num_gpu_blocks
-        if self.model_config.max_model_len > max_seq_len:
-            raise ValueError(
-                f"The model's max seq len ({self.model_config.max_model_len}) "
-                "is larger than the maximum number of tokens that can be "
-                f"stored in KV cache ({max_seq_len}). Try increasing "
-                "`gpu_memory_utilization` or decreasing `max_model_len` when "
-                "initializing the engine.")
-
-        self.cache_config.num_gpu_blocks = num_gpu_blocks
-        self.cache_config.num_cpu_blocks = num_cpu_blocks
-
-        # Initialize the cache.
-        self._run_workers("init_cache_engine", cache_config=self.cache_config)
-        # Warm up the model. This includes capturing the model into CUDA graph
-        # if enforce_eager is False.
-        self._run_workers("warm_up_model")
-
-    @classmethod
-    def from_engine_args(cls, engine_args: EngineArgs) -> "LLMEngine":
-        """Creates an LLM engine from the engine arguments."""
-        # Create the engine configs.
-        engine_configs = engine_args.create_engine_configs()
-        parallel_config = engine_configs[2]
-        # Initialize the cluster.
-        placement_group = initialize_cluster(parallel_config)
-        # Create the LLM engine.
-        engine = cls(*engine_configs,
-                     placement_group,
-                     log_stats=not engine_args.disable_log_stats)
-        return engine
-
     def encode_request(
         self,
         request_id: str,  # pylint: disable=unused-argument
@@ -826,7 +575,7 @@ def step(self) -> List[RequestOutput]:
                 - A Sequence Group (SG) refer to a group of sequences
                   that are generated from the same prompt.
 
-            - Step 2: Calls the workers to execute the model.
+            - Step 2: Calls the distributed executor to execute the model.
             - Step 3: Processes the model output. This mainly includes:
 
                 - Decodes the relevant outputs.
@@ -862,19 +611,10 @@ def step(self) -> List[RequestOutput]:
         seq_group_metadata_list, scheduler_outputs = self.scheduler.schedule()
 
         if not scheduler_outputs.is_empty():
-            # Execute the model.
-            all_outputs = self._run_workers(
-                "execute_model",
-                driver_kwargs={
-                    "seq_group_metadata_list": seq_group_metadata_list,
-                    "blocks_to_swap_in": scheduler_outputs.blocks_to_swap_in,
-                    "blocks_to_swap_out": scheduler_outputs.blocks_to_swap_out,
-                    "blocks_to_copy": scheduler_outputs.blocks_to_copy,
-                },
-                use_ray_compiled_dag=USE_RAY_COMPILED_DAG)
-
-            # Only the driver worker returns the sampling results.
-            output = all_outputs[0]
+            output = self.model_executor.execute_model(
+                seq_group_metadata_list, scheduler_outputs.blocks_to_swap_in,
+                scheduler_outputs.blocks_to_swap_out,
+                scheduler_outputs.blocks_to_copy)
         else:
             output = []
 
@@ -1043,111 +783,13 @@ def _finalize_sequence(self, seq: Sequence,
             seq.output_text = seq.output_text[:-len(stop_string)]
 
     def add_lora(self, lora_request: LoRARequest) -> bool:
-        assert lora_request.lora_int_id > 0, "lora_id must be greater than 0."
-        return self._run_workers(
-            "add_lora",
-            lora_request=lora_request,
-        )
+        return self.model_executor.add_lora(lora_request)
 
     def remove_lora(self, lora_id: int) -> bool:
-        assert lora_id > 0, "lora_id must be greater than 0."
-        return self._run_workers(
-            "remove_lora",
-            lora_id=lora_id,
-        )
+        return self.model_executor.remove_lora(lora_id)
 
     def list_loras(self) -> List[int]:
-        return self._run_workers("list_loras")
-
-    def _run_workers(
-        self,
-        method: str,
-        *args,
-        driver_args: Optional[List[Any]] = None,
-        driver_kwargs: Optional[Dict[str, Any]] = None,
-        max_concurrent_workers: Optional[int] = None,
-        use_ray_compiled_dag: bool = False,
-        **kwargs,
-    ) -> Any:
-        """Runs the given method on all workers."""
-
-        if max_concurrent_workers:
-            raise NotImplementedError(
-                "max_concurrent_workers is not supported yet.")
-
-        if use_ray_compiled_dag:
-            # Right now, compiled DAG can only accept a single
-            # input. TODO(sang): Fix it.
-            output_channels = self.forward_dag.execute(1)
-        else:
-            # Start the ray workers first.
-            ray_worker_outputs = [
-                worker.execute_method.remote(method, *args, **kwargs)
-                for worker in self.workers
-            ]
-
-        if driver_args is None:
-            driver_args = args
-        if driver_kwargs is None:
-            driver_kwargs = kwargs
-
-        # Start the driver worker after all the ray workers.
-        driver_worker_output = getattr(self.driver_worker,
-                                       method)(*driver_args, **driver_kwargs)
-
-        # Get the results of the ray workers.
-        if self.workers:
-            if use_ray_compiled_dag:
-                try:
-                    ray_worker_outputs = [
-                        pickle.loads(chan.begin_read())
-                        for chan in output_channels
-                    ]
-                finally:
-                    # Has to call end_read in order to reuse the DAG.
-                    for chan in output_channels:
-                        chan.end_read()
-            else:
-                ray_worker_outputs = ray.get(ray_worker_outputs)
-
-        return [driver_worker_output] + ray_worker_outputs
-
-    def _compiled_ray_dag(self):
-        import pkg_resources
-        required_version = "2.9"
-        current_version = pkg_resources.get_distribution("ray").version
-        if current_version < required_version:
-            raise ValueError(f"Ray version {required_version} or greater is "
-                             f"required, but found {current_version}")
-
-        from ray.dag import MultiOutputNode, InputNode
-        assert self.parallel_config.worker_use_ray
-
-        # Right now, compiled DAG requires at least 1 arg. We send
-        # a dummy value for now. It will be fixed soon.
-        with InputNode() as input_data:
-            forward_dag = MultiOutputNode([
-                worker.execute_model_compiled_dag_remote.bind(input_data)
-                for worker in self.workers
-            ])
-        return forward_dag.experimental_compile()
+        return self.model_executor.list_loras()
 
     def check_health(self) -> None:
-        """Raises an error if engine is unhealthy."""
-        self._check_if_any_actor_is_dead()
-
-    def _check_if_any_actor_is_dead(self):
-        if not self.parallel_config.worker_use_ray:
-            return
-
-        if not self.workers:
-            return
-
-        dead_actors = []
-        for actor in self.workers:
-            actor_state = ray.state.actors(actor._ray_actor_id.hex())  # pylint: disable=protected-access
-            if actor_state["State"] == "DEAD":
-                dead_actors.append(actor)
-        if dead_actors:
-            raise RuntimeError("At least one Worker is dead. "
-                               f"Dead Workers: {dead_actors}. ")
+        self.model_executor.check_health()
diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py
index bbcbbdfea2f00..742f3dc575190 100644
--- a/vllm/engine/ray_utils.py
+++ b/vllm/engine/ray_utils.py
@@ -1,6 +1,6 @@
 import pickle
 
-from typing import Optional, List, Tuple, TYPE_CHECKING
+from typing import Optional, List, Tuple
 
 from vllm.config import ParallelConfig
 from vllm.logger import init_logger
@@ -65,45 +65,38 @@ def execute_model_compiled_dag_remote(self, ignored):
     ray = None
     RayWorkerVllm = None
 
-if TYPE_CHECKING:
-    from ray.util.placement_group import PlacementGroup
 
-
-def initialize_cluster(
+def initialize_ray_cluster(
     parallel_config: ParallelConfig,
-    engine_use_ray: bool = False,
     ray_address: Optional[str] = None,
-) -> Optional["PlacementGroup"]:
-    """Initialize the distributed cluster probably with Ray.
+):
+    """Initialize the distributed cluster with Ray.
+
+    it will connect to the Ray cluster and create a placement group
+    for the workers, which includes the specification of the resources
+    for each distributed worker.
 
     Args:
         parallel_config: The configurations for parallel execution.
-        engine_use_ray: Whether to use Ray for async engine.
         ray_address: The address of the Ray cluster. If None, uses
             the default Ray cluster address.
-
-    Returns:
-        An optional `PlacementGroup`. It includes the specification
-        of the resources for each distributed worker. None if Ray is
-        not used.
     """
-    if parallel_config.worker_use_ray or engine_use_ray:
-        if ray is None:
-            raise ImportError(
-                "Ray is not installed. Please install Ray to use distributed "
-                "serving.")
-        # Connect to a ray cluster.
-        if is_hip():
-            ray.init(address=ray_address,
-                     ignore_reinit_error=True,
-                     num_gpus=parallel_config.world_size)
-        else:
-            ray.init(address=ray_address, ignore_reinit_error=True)
-
-    if not parallel_config.worker_use_ray:
-        assert parallel_config.world_size == 1, (
-            "Ray is required if parallel_config.world_size > 1.")
-        return None
+    if ray is None:
+        raise ImportError(
+            "Ray is not installed. Please install Ray to use distributed "
+            "serving.")
+
+    # Connect to a ray cluster.
+    if is_hip():
+        ray.init(address=ray_address,
+                 ignore_reinit_error=True,
+                 num_gpus=parallel_config.world_size)
+    else:
+        ray.init(address=ray_address, ignore_reinit_error=True)
+
+    if parallel_config.placement_group:
+        # Placement group is already set.
+        return
 
     # Create placement group for worker processes
     current_placement_group = ray.util.get_current_placement_group()
@@ -138,4 +131,5 @@ def initialize_cluster(
         # if they cannot be provisioned.
         ray.get(current_placement_group.ready(), timeout=1800)
 
-    return current_placement_group
+    # Set the placement group in the parallel config
+    parallel_config.placement_group = current_placement_group
diff --git a/vllm/executor/__init__.py b/vllm/executor/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py
new file mode 100644
index 0000000000000..30717e8a87358
--- /dev/null
+++ b/vllm/executor/executor_base.py
@@ -0,0 +1,75 @@
+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional
+
+from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
+                         ParallelConfig, SchedulerConfig, LoRAConfig)
+from vllm.lora.request import LoRARequest
+from vllm.sequence import SamplerOutput, SequenceGroupMetadata
+
+
+class ExecutorBase(ABC):
+    """Base class for all executors.
+
+    An executor is responsible for executing the model on a specific device
+    type (e.g., CPU, GPU, Neuron, etc.). Or it can be a distributed executor
+    that can execute the model on multiple devices.
+    """
+
+    @abstractmethod
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        cache_config: CacheConfig,
+        parallel_config: ParallelConfig,
+        scheduler_config: SchedulerConfig,
+        device_config: DeviceConfig,
+        lora_config: Optional[LoRAConfig],
+    ) -> None:
+        raise NotImplementedError
+
+    @abstractmethod
+    def execute_model(self,
+                      seq_group_metadata_list: List[SequenceGroupMetadata],
+                      blocks_to_swap_in: Dict[int, int],
+                      blocks_to_swap_out: Dict[int, int],
+                      blocks_to_copy: Dict[int, List[int]]) -> SamplerOutput:
+        """Executes one model step on the given sequences."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def add_lora(self, lora_request: LoRARequest) -> bool:
+        raise NotImplementedError
+
+    @abstractmethod
+    def remove_lora(self, lora_id: int) -> bool:
+        raise NotImplementedError
+
+    @abstractmethod
+    def list_loras(self) -> List[int]:
+        raise NotImplementedError
+
+    @abstractmethod
+    def check_health(self) -> None:
+        """Checks if the executor is healthy. If not, it should raise an
+        exception."""
+        raise NotImplementedError
+
+
+class ExecutorAsyncBase(ExecutorBase):
+
+    @abstractmethod
+    async def execute_model_async(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+    ) -> SamplerOutput:
+        """Executes one model step on the given sequences."""
+        raise NotImplementedError
+
+    @abstractmethod
+    async def check_health_async(self) -> None:
+        """Checks if the executor is healthy. If not, it should raise an
+        exception."""
+        raise NotImplementedError
diff --git a/vllm/executor/gpu_executor.py b/vllm/executor/gpu_executor.py
new file mode 100644
index 0000000000000..9019ee7763c77
--- /dev/null
+++ b/vllm/executor/gpu_executor.py
@@ -0,0 +1,163 @@
+import importlib
+from typing import Dict, List, Optional
+
+from vllm.lora.request import LoRARequest
+from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
+                         ParallelConfig, SchedulerConfig, LoRAConfig)
+from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
+from vllm.executor.utils import check_block_size_valid
+from vllm.logger import init_logger
+from vllm.sequence import SamplerOutput, SequenceGroupMetadata
+from vllm.utils import (get_ip, get_open_port, get_distributed_init_method,
+                        make_async)
+
+logger = init_logger(__name__)
+
+# A map between the device type (in device config) to its worker module.
+DEVICE_TO_WORKER_MODULE_MAP = {
+    "cuda": "vllm.worker.worker",
+    "neuron": "vllm.worker.neuron_worker",
+}
+
+
+class GPUExecutor(ExecutorBase):
+
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        cache_config: CacheConfig,
+        parallel_config: ParallelConfig,
+        scheduler_config: SchedulerConfig,
+        device_config: DeviceConfig,
+        lora_config: Optional[LoRAConfig],
+    ) -> None:
+        self.model_config = model_config
+        self.cache_config = cache_config
+        self.lora_config = lora_config
+        self.parallel_config = parallel_config
+        self.scheduler_config = scheduler_config
+        self.device_config = device_config
+
+        # Instantiate the worker and load the model to GPU.
+        self._init_worker()
+
+        # Profile the memory usage and initialize the cache.
+        self._init_cache()
+
+    def _dispatch_worker(self):
+        worker_module = DEVICE_TO_WORKER_MODULE_MAP[
+            self.device_config.device_type]
+        imported_worker = importlib.import_module(worker_module)
+        Worker = imported_worker.Worker
+        return Worker
+
+    def _init_worker(self):
+        # Lazy import the Worker to avoid importing torch.cuda/xformers
+        # before CUDA_VISIBLE_DEVICES is set in the Worker
+        Worker = self._dispatch_worker()
+
+        assert self.parallel_config.world_size == 1, (
+            "GPUExecutor only supports single GPU.")
+
+        distributed_init_method = get_distributed_init_method(
+            get_ip(), get_open_port())
+        self.driver_worker = Worker(
+            self.model_config,
+            self.parallel_config,
+            self.scheduler_config,
+            self.device_config,
+            local_rank=0,
+            rank=0,
+            distributed_init_method=distributed_init_method,
+            lora_config=self.lora_config,
+            kv_cache_dtype=self.cache_config.cache_dtype,
+            is_driver_worker=True,
+        )
+        self.driver_worker.init_model()
+        self.driver_worker.load_model()
+
+    def _init_cache(self) -> None:
+        """Profiles the memory usage and initializes the KV cache.
+
+        The engine first profiles the existing memory usage.
+        Then, it allocates the remaining memory for KV blocks.
+
+        .. tip::
+            You may limit the usage of GPU memory
+            by adjusting the `gpu_memory_utilization` parameter.
+        """
+        # Get the maximum number of blocks that can be allocated on GPU and CPU.
+        num_gpu_blocks, num_cpu_blocks = (
+            self.driver_worker.profile_num_available_blocks(
+                block_size=self.cache_config.block_size,
+                gpu_memory_utilization=self.cache_config.
+                gpu_memory_utilization,
+                cpu_swap_space=self.cache_config.swap_space_bytes,
+                cache_dtype=self.cache_config.cache_dtype,
+            ))
+
+        logger.info(f"# GPU blocks: {num_gpu_blocks}, "
+                    f"# CPU blocks: {num_cpu_blocks}")
+
+        check_block_size_valid(num_gpu_blocks, self.cache_config.block_size,
+                               self.model_config.max_model_len)
+
+        self.cache_config.num_gpu_blocks = num_gpu_blocks
+        self.cache_config.num_cpu_blocks = num_cpu_blocks
+
+        # Initialize the cache.
+        self.driver_worker.init_cache_engine(cache_config=self.cache_config)
+        # Warm up the model. This includes capturing the model into CUDA graph
+        # if enforce_eager is False.
+        self.driver_worker.warm_up_model()
+
+    def execute_model(self,
+                      seq_group_metadata_list: List[SequenceGroupMetadata],
+                      blocks_to_swap_in: Dict[int, int],
+                      blocks_to_swap_out: Dict[int, int],
+                      blocks_to_copy: Dict[int, List[int]]) -> SamplerOutput:
+        output = self.driver_worker.execute_model(
+            seq_group_metadata_list=seq_group_metadata_list,
+            blocks_to_swap_in=blocks_to_swap_in,
+            blocks_to_swap_out=blocks_to_swap_out,
+            blocks_to_copy=blocks_to_copy,
+        )
+        return output
+
+    def add_lora(self, lora_request: LoRARequest) -> bool:
+        assert lora_request.lora_int_id > 0, "lora_id must be greater than 0."
+        return self.driver_worker.add_lora(lora_request)
+
+    def remove_lora(self, lora_id: int) -> bool:
+        assert lora_id > 0, "lora_id must be greater than 0."
+        return self.driver_worker.remove_lora(lora_id)
+
+    def list_loras(self) -> List[int]:
+        return self.driver_worker.list_loras()
+
+    def check_health(self) -> None:
+        # GPUExecutor will always be healthy as long as
+        # it's running.
+        return
+
+
+class GPUExecutorAsync(GPUExecutor, ExecutorAsyncBase):
+
+    async def execute_model_async(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+    ) -> SamplerOutput:
+        output = await make_async(self.driver_worker.execute_model)(
+            seq_group_metadata_list=seq_group_metadata_list,
+            blocks_to_swap_in=blocks_to_swap_in,
+            blocks_to_swap_out=blocks_to_swap_out,
+            blocks_to_copy=blocks_to_copy)
+        return output
+
+    async def check_health_async(self) -> None:
+        # GPUExecutor will always be healthy as long as
+        # it's running.
+        return
diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
new file mode 100644
index 0000000000000..261fcfb7dad9b
--- /dev/null
+++ b/vllm/executor/ray_gpu_executor.py
@@ -0,0 +1,442 @@
+import asyncio
+import copy
+from collections import defaultdict
+import os
+import pickle
+import importlib
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+
+from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
+                         ParallelConfig, SchedulerConfig, LoRAConfig)
+from vllm.engine.ray_utils import RayWorkerVllm, ray
+from vllm.executor.executor_base import ExecutorAsyncBase, ExecutorBase
+from vllm.executor.utils import check_block_size_valid
+from vllm.logger import init_logger
+from vllm.lora.request import LoRARequest
+from vllm.sequence import SamplerOutput, SequenceGroupMetadata
+from vllm.utils import (set_cuda_visible_devices, get_ip, get_open_port,
+                        get_distributed_init_method, make_async)
+
+if ray is not None:
+    from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+
+if TYPE_CHECKING:
+    from ray.util.placement_group import PlacementGroup
+
+logger = init_logger(__name__)
+
+# A map between the device type (in device config) to its worker module.
+DEVICE_TO_WORKER_MODULE_MAP = {
+    "cuda": "vllm.worker.worker",
+    "neuron": "vllm.worker.neuron_worker",
+}
+
+# If the env var is set, it uses the Ray's compiled DAG API
+# which optimizes the control plane overhead.
+# Run vLLM with VLLM_USE_RAY_COMPILED_DAG=1 to enable it.
+USE_RAY_COMPILED_DAG = bool(os.getenv("VLLM_USE_RAY_COMPILED_DAG", 0))
+
+
+class RayGPUExecutor(ExecutorBase):
+
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        cache_config: CacheConfig,
+        parallel_config: ParallelConfig,
+        scheduler_config: SchedulerConfig,
+        device_config: DeviceConfig,
+        lora_config: Optional[LoRAConfig],
+    ) -> None:
+        self.model_config = model_config
+        self.cache_config = cache_config
+        self.lora_config = lora_config
+        self.parallel_config = parallel_config
+        self.scheduler_config = scheduler_config
+        self.device_config = device_config
+
+        assert self.parallel_config.worker_use_ray
+        placement_group = self.parallel_config.placement_group
+
+        # Disable Ray usage stats collection.
+        ray_usage = os.environ.get("RAY_USAGE_STATS_ENABLED", "0")
+        if ray_usage != "1":
+            os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
+
+        # Create the parallel GPU workers.
+        self._init_workers_ray(placement_group)
+
+        # Profile the memory usage and initialize the cache.
+        self._init_cache()
+
+        self.forward_dag = None
+        if USE_RAY_COMPILED_DAG:
+            self.forward_dag = self._compiled_ray_dag()
+
+    def _dispatch_worker(self):
+        worker_module = DEVICE_TO_WORKER_MODULE_MAP[
+            self.device_config.device_type]
+        imported_worker = importlib.import_module(worker_module)
+        Worker = imported_worker.Worker
+        return Worker
+
+    def _init_workers_ray(self, placement_group: "PlacementGroup",
+                          **ray_remote_kwargs):
+        if self.parallel_config.tensor_parallel_size == 1:
+            # For single GPU case, we use a ray worker with constrained memory.
+            num_gpus = self.cache_config.gpu_memory_utilization
+        else:
+            # Otherwise, the ray workers are allocated with a full GPU.
+            num_gpus = 1
+
+        # The driver dummy worker does not actually use any resources.
+        # It holds the resource for the driver worker.
+        self.driver_dummy_worker: RayWorkerVllm = None
+        # The remaining workers are the actual ray actors.
+        self.workers: List[RayWorkerVllm] = []
+
+        # Create the workers.
+        driver_ip = get_ip()
+        for bundle_id, bundle in enumerate(placement_group.bundle_specs):
+            if not bundle.get("GPU", 0):
+                continue
+            scheduling_strategy = PlacementGroupSchedulingStrategy(
+                placement_group=placement_group,
+                placement_group_capture_child_tasks=True,
+                placement_group_bundle_index=bundle_id,
+            )
+            worker = ray.remote(
+                num_cpus=0,
+                num_gpus=num_gpus,
+                scheduling_strategy=scheduling_strategy,
+                **ray_remote_kwargs,
+            )(RayWorkerVllm).remote(self.model_config.trust_remote_code)
+
+            worker_ip = ray.get(worker.get_node_ip.remote())
+            if worker_ip == driver_ip and self.driver_dummy_worker is None:
+                # If the worker is on the same node as the driver, we use it
+                # as the resource holder for the driver process.
+                self.driver_dummy_worker = worker
+            else:
+                # Else, added to the list of workers.
+                self.workers.append(worker)
+
+        if self.driver_dummy_worker is None:
+            raise ValueError(
+                "Ray does not allocate any GPUs on the driver node. Consider "
+                "adjusting the Ray placement group or running the driver on a "
+                "GPU node.")
+
+        # Get the set of GPU IDs used on each node.
+        driver_node_id, driver_gpu_ids = ray.get(
+            self.driver_dummy_worker.get_node_and_gpu_ids.remote())
+        worker_node_and_gpu_ids = ray.get(
+            [worker.get_node_and_gpu_ids.remote() for worker in self.workers])
+
+        node_workers = defaultdict(list)
+        node_gpus = defaultdict(list)
+
+        node_workers[driver_node_id].append(0)
+        node_gpus[driver_node_id].extend(driver_gpu_ids)
+        for i, (node_id, gpu_ids) in enumerate(worker_node_and_gpu_ids,
+                                               start=1):
+            node_workers[node_id].append(i)
+            node_gpus[node_id].extend(gpu_ids)
+        for node_id, gpu_ids in node_gpus.items():
+            node_gpus[node_id] = sorted(gpu_ids)
+
+        # Set CUDA_VISIBLE_DEVICES for the driver and workers.
+        set_cuda_visible_devices(node_gpus[driver_node_id])
+        for worker, (node_id, _) in zip(self.workers, worker_node_and_gpu_ids):
+            worker.set_cuda_visible_devices.remote(node_gpus[node_id])
+
+        distributed_init_method = get_distributed_init_method(
+            driver_ip, get_open_port())
+
+        # Lazy import the Worker to avoid importing torch.cuda/xformers
+        # before CUDA_VISIBLE_DEVICES is set in the Worker
+        Worker = self._dispatch_worker()
+
+        model_config = copy.deepcopy(self.model_config)
+        parallel_config = copy.deepcopy(self.parallel_config)
+        scheduler_config = copy.deepcopy(self.scheduler_config)
+        device_config = copy.deepcopy(self.device_config)
+        lora_config = copy.deepcopy(self.lora_config)
+        kv_cache_dtype = self.cache_config.cache_dtype
+
+        # Initialize the actual workers with the Worker class.
+        for rank, (worker, (node_id, _)) in enumerate(
+                zip(self.workers, worker_node_and_gpu_ids),
+                start=1,
+        ):
+            local_rank = node_workers[node_id].index(rank)
+            worker.init_worker.remote(
+                lambda rank=rank, local_rank=local_rank: Worker(
+                    model_config,
+                    parallel_config,
+                    scheduler_config,
+                    device_config,
+                    local_rank,
+                    rank,
+                    distributed_init_method,
+                    lora_config=lora_config,
+                    kv_cache_dtype=kv_cache_dtype,
+                ))
+
+        # Initialize the driver worker with the Worker class.
+        driver_rank = 0
+        driver_local_rank = node_workers[driver_node_id].index(driver_rank)
+        self.driver_worker = Worker(
+            self.model_config,
+            self.parallel_config,
+            self.scheduler_config,
+            self.device_config,
+            driver_local_rank,
+            driver_rank,
+            distributed_init_method,
+            lora_config=self.lora_config,
+            kv_cache_dtype=kv_cache_dtype,
+            is_driver_worker=True,
+        )
+
+        # FIXME(woosuk): We are not properly initializing cupy NCCL when
+        # we have multiple nodes.
+        self._run_workers("init_model",
+                          cupy_port=get_open_port()
+                          if not model_config.enforce_eager else None)
+        self._run_workers(
+            "load_model",
+            max_concurrent_workers=self.parallel_config.
+            max_parallel_loading_workers,
+        )
+
+    def _init_cache(self) -> None:
+        """Profiles the memory usage and initializes the KV cache.
+
+        The engine will first conduct a profiling of the existing memory usage.
+        Then, it calculate the maximum possible number of GPU and CPU blocks
+        that can be allocated with the remaining free memory.
+        More details can be found in the
+        :meth:`~vllm.worker.worker.Worker.profile_num_available_blocks` method
+        from class :class:`~vllm.worker.Worker`.
+
+        Afterwards, as there may be multiple workers,
+        we take the minimum number of blocks across all workers
+        to ensure this can be applied to all of them.
+
+        Finally, the engine will initialize the KV cache
+        with the calculated number of blocks.
+
+        .. tip::
+            You may limit the usage of GPU memory
+            by adjusting the `gpu_memory_utilization` parameter.
+        """
+        # Get the maximum number of blocks that can be allocated on GPU and CPU.
+        num_blocks = self._run_workers(
+            "profile_num_available_blocks",
+            block_size=self.cache_config.block_size,
+            gpu_memory_utilization=self.cache_config.gpu_memory_utilization,
+            cpu_swap_space=self.cache_config.swap_space_bytes,
+            cache_dtype=self.cache_config.cache_dtype,
+        )
+
+        # Since we use a shared centralized controller, we take the minimum
+        # number of blocks across all workers to make sure all the memory
+        # operators can be applied to all workers.
+        num_gpu_blocks = min(b[0] for b in num_blocks)
+        num_cpu_blocks = min(b[1] for b in num_blocks)
+        logger.info(f"# GPU blocks: {num_gpu_blocks}, "
+                    f"# CPU blocks: {num_cpu_blocks}")
+
+        check_block_size_valid(num_gpu_blocks, self.cache_config.block_size,
+                               self.model_config.max_model_len)
+
+        self.cache_config.num_gpu_blocks = num_gpu_blocks
+        self.cache_config.num_cpu_blocks = num_cpu_blocks
+
+        # Initialize the cache.
+        self._run_workers("init_cache_engine", cache_config=self.cache_config)
+        # Warm up the model. This includes capturing the model into CUDA graph
+        # if enforce_eager is False.
+        self._run_workers("warm_up_model")
+
+    def execute_model(self,
+                      seq_group_metadata_list: List[SequenceGroupMetadata],
+                      blocks_to_swap_in: Dict[int, int],
+                      blocks_to_swap_out: Dict[int, int],
+                      blocks_to_copy: Dict[int, List[int]]) -> SamplerOutput:
+        all_outputs = self._run_workers(
+            "execute_model",
+            driver_kwargs={
+                "seq_group_metadata_list": seq_group_metadata_list,
+                "blocks_to_swap_in": blocks_to_swap_in,
+                "blocks_to_swap_out": blocks_to_swap_out,
+                "blocks_to_copy": blocks_to_copy,
+            },
+            use_ray_compiled_dag=USE_RAY_COMPILED_DAG)
+
+        # Only the driver worker returns the sampling results.
+        output = all_outputs[0]
+        return output
+
+    def add_lora(self, lora_request: LoRARequest) -> bool:
+        assert lora_request.lora_int_id > 0, "lora_id must be greater than 0."
+        return self._run_workers(
+            "add_lora",
+            lora_request=lora_request,
+        )
+
+    def remove_lora(self, lora_id: int) -> bool:
+        assert lora_id > 0, "lora_id must be greater than 0."
+        return self._run_workers(
+            "remove_lora",
+            lora_id=lora_id,
+        )
+
+    def list_loras(self) -> List[int]:
+        return self._run_workers("list_loras")
+
+    def _run_workers(
+        self,
+        method: str,
+        *args,
+        driver_args: Optional[List[Any]] = None,
+        driver_kwargs: Optional[Dict[str, Any]] = None,
+        max_concurrent_workers: Optional[int] = None,
+        use_ray_compiled_dag: bool = False,
+        **kwargs,
+    ) -> Any:
+        """Runs the given method on all workers."""
+
+        if max_concurrent_workers:
+            raise NotImplementedError(
+                "max_concurrent_workers is not supported yet.")
+
+        if use_ray_compiled_dag:
+            # Right now, compiled DAG can only accept a single
+            # input. TODO(sang): Fix it.
+            output_channels = self.forward_dag.execute(1)
+        else:
+            # Start the ray workers first.
+            ray_worker_outputs = [
+                worker.execute_method.remote(method, *args, **kwargs)
+                for worker in self.workers
+            ]
+
+        if driver_args is None:
+            driver_args = args
+        if driver_kwargs is None:
+            driver_kwargs = kwargs
+
+        # Start the driver worker after all the ray workers.
+        driver_worker_output = getattr(self.driver_worker,
+                                       method)(*driver_args, **driver_kwargs)
+
+        # Get the results of the ray workers.
+        if self.workers:
+            if use_ray_compiled_dag:
+                try:
+                    ray_worker_outputs = [
+                        pickle.loads(chan.begin_read())
+                        for chan in output_channels
+                    ]
+                finally:
+                    # Has to call end_read in order to reuse the DAG.
+                    for chan in output_channels:
+                        chan.end_read()
+            else:
+                ray_worker_outputs = ray.get(ray_worker_outputs)
+
+        return [driver_worker_output] + ray_worker_outputs
+
+    def _compiled_ray_dag(self):
+        import pkg_resources
+        required_version = "2.9"
+        current_version = pkg_resources.get_distribution("ray").version
+        if current_version < required_version:
+            raise ValueError(f"Ray version {required_version} or greater is "
+                             f"required, but found {current_version}")
+
+        from ray.dag import MultiOutputNode, InputNode
+        assert self.parallel_config.worker_use_ray
+
+        # Right now, compiled DAG requires at least 1 arg. We send
+        # a dummy value for now. It will be fixed soon.
+        with InputNode() as input_data:
+            forward_dag = MultiOutputNode([
+                worker.execute_model_compiled_dag_remote.bind(input_data)
+                for worker in self.workers
+            ])
+        return forward_dag.experimental_compile()
+
+    def check_health(self) -> None:
+        """Raises an error if engine is unhealthy."""
+        self._check_if_any_actor_is_dead()
+
+    def _check_if_any_actor_is_dead(self):
+        if not self.workers:
+            return
+
+        dead_actors = []
+        for actor in self.workers:
+            actor_state = ray.state.actors(actor._ray_actor_id.hex())  # pylint: disable=protected-access
+            if actor_state["State"] == "DEAD":
+                dead_actors.append(actor)
+        if dead_actors:
+            raise RuntimeError("At least one Worker is dead. "
+                               f"Dead Workers: {dead_actors}. ")
+
+
+class RayGPUExecutorAsync(RayGPUExecutor, ExecutorAsyncBase):
+
+    async def _run_workers_async(
+        self,
+        method: str,
+        *args,
+        driver_args: Optional[List[Any]] = None,
+        driver_kwargs: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ) -> Any:
+        """Runs the given method on all workers."""
+        coros = []
+
+        if driver_args is None:
+            driver_args = args
+        if driver_kwargs is None:
+            driver_kwargs = kwargs
+
+        # Run the driver worker asynchronously.
+        driver_executor = make_async(getattr(self.driver_worker, method))
+        coros.append(driver_executor(*driver_args, **driver_kwargs))
+
+        # Run the ray workers asynchronously.
+        for worker in self.workers:
+            coros.append(worker.execute_method.remote(method, *args, **kwargs))
+
+        all_outputs = await asyncio.gather(*coros)
+        return all_outputs
+
+    async def execute_model_async(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        blocks_to_swap_in: Dict[int, int],
+        blocks_to_swap_out: Dict[int, int],
+        blocks_to_copy: Dict[int, List[int]],
+    ) -> SamplerOutput:
+        all_outputs = await self._run_workers_async(
+            "execute_model",
+            driver_kwargs={
+                "seq_group_metadata_list": seq_group_metadata_list,
+                "blocks_to_swap_in": blocks_to_swap_in,
+                "blocks_to_swap_out": blocks_to_swap_out,
+                "blocks_to_copy": blocks_to_copy,
+            },
+            use_ray_compiled_dag=USE_RAY_COMPILED_DAG)
+
+        # Only the driver worker returns the sampling results.
+        output = all_outputs[0]
+        return output
+
+    async def check_health_async(self) -> None:
+        """Raises an error if engine is unhealthy."""
+        self._check_if_any_actor_is_dead()
diff --git a/vllm/executor/utils.py b/vllm/executor/utils.py
new file mode 100644
index 0000000000000..44976696a77c6
--- /dev/null
+++ b/vllm/executor/utils.py
@@ -0,0 +1,13 @@
+def check_block_size_valid(num_gpu_blocks, block_size, max_model_len) -> None:
+    if num_gpu_blocks <= 0:
+        raise ValueError("No available memory for the cache blocks. "
+                         "Try increasing `gpu_memory_utilization` when "
+                         "initializing the engine.")
+    max_seq_len = block_size * num_gpu_blocks
+    if max_model_len > max_seq_len:
+        raise ValueError(
+            f"The model's max seq len ({max_model_len}) "
+            "is larger than the maximum number of tokens that can be "
+            f"stored in KV cache ({max_seq_len}). Try increasing "
+            "`gpu_memory_utilization` or decreasing `max_model_len` when "
+            "initializing the engine.")

From c9415c19d3df26d8ede611abefba35c6837cd934 Mon Sep 17 00:00:00 2001
From: kliuae <17350011+kliuae@users.noreply.github.com>
Date: Tue, 12 Mar 2024 04:14:07 +0800
Subject: [PATCH 083/196] [ROCm] Fix warp and lane calculation in
 blockReduceSum (#3321)

---
 csrc/reduction_utils.cuh | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/csrc/reduction_utils.cuh b/csrc/reduction_utils.cuh
index 210bf0b023ab2..c25464e866e55 100644
--- a/csrc/reduction_utils.cuh
+++ b/csrc/reduction_utils.cuh
@@ -29,12 +29,22 @@ __inline__ __device__ T warpReduceSum(T val) {
   return val;
 }
 
+__inline__ __device__ constexpr int _calculateLaneMask(int warp_size) {
+  return warp_size - 1;
+}
+
+__inline__ __device__ constexpr int _calculateWidShift(int warp_size) {
+  return 5 + (warp_size >> 6);
+}
+
 /* Calculate the sum of all elements in a block */
 template<typename T>
 __inline__ __device__ T blockReduceSum(T val) {
   static __shared__ T shared[WARP_SIZE];
-  int lane = threadIdx.x & 0x1f;
-  int wid = threadIdx.x >> 5;
+  constexpr auto LANE_MASK = _calculateLaneMask(WARP_SIZE);
+  constexpr auto WID_SHIFT = _calculateWidShift(WARP_SIZE);
+  int lane = threadIdx.x & LANE_MASK;
+  int wid = threadIdx.x >> WID_SHIFT;
 
   val = warpReduceSum<T>(val);
 

From 654865e21df8ac6fe95de926625306e5756c2c0d Mon Sep 17 00:00:00 2001
From: DAIZHENWEI <32122197+DAIZHENWEI@users.noreply.github.com>
Date: Mon, 11 Mar 2024 13:19:51 -0700
Subject: [PATCH 084/196] Support Mistral Model Inference with
 transformers-neuronx (#3153)

---
 examples/offline_inference_neuron.py         | 10 ++-
 vllm/model_executor/models/__init__.py       |  7 +-
 vllm/model_executor/models/neuron/mistral.py | 82 ++++++++++++++++++++
 3 files changed, 93 insertions(+), 6 deletions(-)
 mode change 100644 => 100755 examples/offline_inference_neuron.py
 mode change 100644 => 100755 vllm/model_executor/models/__init__.py
 create mode 100755 vllm/model_executor/models/neuron/mistral.py

diff --git a/examples/offline_inference_neuron.py b/examples/offline_inference_neuron.py
old mode 100644
new mode 100755
index 9b9dc4d94892f..da8874abd92a2
--- a/examples/offline_inference_neuron.py
+++ b/examples/offline_inference_neuron.py
@@ -14,14 +14,16 @@
 llm = LLM(
     model="openlm-research/open_llama_3b",
     max_num_seqs=8,
-    # The max_model_len and block_size arguments are required to be same as max sequence length,
-    # when targeting neuron device. Currently, this is a known limitation in continuous batching
-    # support in transformers-neuronx.
+    # The max_model_len and block_size arguments are required to be same as
+    # max sequence length when targeting neuron device.
+    # Currently, this is a known limitation in continuous batching support
+    # in transformers-neuronx.
     # TODO(liangfu): Support paged-attention in transformers-neuronx.
     max_model_len=128,
     block_size=128,
     # The device can be automatically detected when AWS Neuron SDK is installed.
-    # The device argument can be either unspecified for automated detection, or explicitly assigned.
+    # The device argument can be either unspecified for automated detection,
+    # or explicitly assigned.
     device="neuron")
 # Generate texts from the prompts. The output is a list of RequestOutput objects
 # that contain the prompt, generated text, and other information.
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
old mode 100644
new mode 100755
index 75c2ae1e9f48e..bc3b6a582d53d
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -62,8 +62,11 @@
     "Sliding window attention is not yet supported in ROCm's flash attention",
 }
 
-# Models not supported by Neuron.
-_NEURON_SUPPORTED_MODELS = {"LlamaForCausalLM": "neuron.llama"}
+# Models supported by Neuron.
+_NEURON_SUPPORTED_MODELS = {
+    "LlamaForCausalLM": "neuron.llama",
+    "MistralForCausalLM": "neuron.mistral"
+}
 
 
 class ModelRegistry:
diff --git a/vllm/model_executor/models/neuron/mistral.py b/vllm/model_executor/models/neuron/mistral.py
new file mode 100755
index 0000000000000..a302cce30abab
--- /dev/null
+++ b/vllm/model_executor/models/neuron/mistral.py
@@ -0,0 +1,82 @@
+"""Inference-only Mistral model compatible with HuggingFace weights."""
+from typing import List, Optional, Tuple
+
+import torch
+from torch import nn
+from transformers import MistralConfig
+
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+from vllm.sequence import SamplerOutput
+import os
+
+KVCache = Tuple[torch.Tensor, torch.Tensor]
+
+
+class MistralForCausalLM(nn.Module):
+
+    def __init__(
+        self,
+        config: MistralConfig,
+        linear_method=None,
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.linear_method = linear_method
+        self.model = None
+        self.lm_head = None
+        self.sampler = Sampler(config.vocab_size)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> SamplerOutput:
+        with torch.inference_mode():
+            seq_ids = []
+            block_size = self.model.context_buckets[-1]
+            if input_metadata.is_prompt:
+                seq_ids = input_metadata.slot_mapping[:, 0] // block_size
+            else:
+                seq_ids = input_metadata.block_tables
+
+            logits = self.model(input_ids,
+                                cache_ids=positions,
+                                start_ids=seq_ids)
+        return logits
+
+    def sample(
+        self,
+        hidden_states: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        next_tokens = self.sampler(self.model.chkpt_model.lm_head,
+                                   hidden_states, sampling_metadata)
+        return next_tokens
+
+    def load_weights(self,
+                     model_name_or_path: str,
+                     cache_dir: Optional[str] = None,
+                     load_format: str = "auto",
+                     revision: Optional[str] = None,
+                     **kwargs):
+        from transformers_neuronx.mistral.model import MistralForSampling
+
+        split_model_dir = f"{model_name_or_path}-split"
+        if os.path.isdir(os.path.join(model_name_or_path,
+                                      "pytorch_model.bin")):
+            split_model_dir = model_name_or_path
+        elif not os.path.exists(f"{model_name_or_path}-split"):
+            from transformers import MistralForCausalLM
+            from transformers_neuronx.module import save_pretrained_split
+
+            hf_model = MistralForCausalLM.from_pretrained(
+                model_name_or_path, low_cpu_mem_usage=True)
+            save_pretrained_split(hf_model, f"{model_name_or_path}-split")
+
+        self.model = MistralForSampling.from_pretrained(
+            split_model_dir, **kwargs)
+        self.model.to_neuron()

From b0925b38789bb3b20dcc39e229fcfe12a311e487 Mon Sep 17 00:00:00 2001
From: Sherlock Xu <65327072+Sherlock113@users.noreply.github.com>
Date: Wed, 13 Mar 2024 01:34:30 +0800
Subject: [PATCH 085/196] docs: Add BentoML deployment doc (#3336)

Signed-off-by: Sherlock113 <sherlockxu07@gmail.com>
---
 docs/source/index.rst                          | 1 +
 docs/source/serving/deploying_with_bentoml.rst | 8 ++++++++
 2 files changed, 9 insertions(+)
 create mode 100644 docs/source/serving/deploying_with_bentoml.rst

diff --git a/docs/source/index.rst b/docs/source/index.rst
index c0250bf99f7ae..65bfbbabf8be1 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -73,6 +73,7 @@ Documentation
    serving/run_on_sky
    serving/deploying_with_kserve
    serving/deploying_with_triton
+   serving/deploying_with_bentoml
    serving/deploying_with_docker
    serving/serving_with_langchain
    serving/metrics
diff --git a/docs/source/serving/deploying_with_bentoml.rst b/docs/source/serving/deploying_with_bentoml.rst
new file mode 100644
index 0000000000000..4b9d19f5bdb72
--- /dev/null
+++ b/docs/source/serving/deploying_with_bentoml.rst
@@ -0,0 +1,8 @@
+.. _deploying_with_bentoml:
+
+Deploying with BentoML
+======================
+
+`BentoML <https://github.com/bentoml/BentoML>`_ allows you to deploy a large language model (LLM) server with vLLM as the backend, which exposes OpenAI-compatible endpoints. You can serve the model locally or containerize it as an OCI-complicant image and deploy it on Kubernetes.
+
+For details, see the tutorial `vLLM inference in the BentoML documentation <https://docs.bentoml.com/en/latest/use-cases/large-language-models/vllm.html>`_.
\ No newline at end of file

From 49a3c8662ba745503890ab8b3c502aad7e1a0a19 Mon Sep 17 00:00:00 2001
From: Breno Faria <breno@veltefaria.de>
Date: Wed, 13 Mar 2024 01:30:08 +0100
Subject: [PATCH 086/196] Fixes #1556 double free (#3347)

---
 tests/core/test_block_manager.py | 87 ++++++++++++++++++++++++++++++++
 vllm/core/block_manager.py       | 17 ++++++-
 2 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/tests/core/test_block_manager.py b/tests/core/test_block_manager.py
index b280fd1d73c2f..44ac05a1430b3 100644
--- a/tests/core/test_block_manager.py
+++ b/tests/core/test_block_manager.py
@@ -274,3 +274,90 @@ def test_reset():
     # Resetting block manager frees all allocated blocks.
     block_manager.reset()
     assert block_manager.get_num_free_gpu_blocks() == original_blocks
+
+
+def test_sliding_window_multi_seq():
+    """
+    Tests that memory allocation and deallocation is handled
+    correctly with multiple sequences that exceed the sliding
+    window's capacity.
+    """
+    block_size = 1
+    num_cpu_blocks = 8
+    num_gpu_blocks = 8
+    sliding_window = 2
+    block_manager = BlockSpaceManager(block_size,
+                                      num_cpu_blocks,
+                                      num_gpu_blocks,
+                                      sliding_window=sliding_window,
+                                      watermark=0)
+
+    assert block_manager.get_num_free_gpu_blocks() == num_gpu_blocks
+
+    parent = Sequence(1, "one two three", [0, 1, 2], block_size)
+    seq_group = SequenceGroup("1", [parent], SamplingParams(), time.time(),
+                              None)
+    block_manager.allocate(seq_group)
+
+    # assert the number of blocks allocated is correct
+    # the parent seq has len 3, but since sliding_window is 2,
+    # we will use at most 2 blocks
+    assert block_manager.get_num_free_gpu_blocks(
+    ) == num_gpu_blocks - sliding_window
+
+    # Fork prompt and copy block tables.
+    child = parent.fork(2)
+    block_manager.fork(parent, child)
+
+    # assert the number of blocks allocated is correct
+    # forking does not increase memory consumption
+    assert block_manager.get_num_free_gpu_blocks(
+    ) == num_gpu_blocks - sliding_window
+
+    # assert both parent and child share all blocks
+    assert block_manager.get_block_table(
+        parent) == block_manager.get_block_table(child)
+
+    token_id = 4
+    # Append token to child. Block is shared so copy on write occurs.
+    child.append_token_id(token_id, {token_id: Logprob(0.0)})
+    block_manager.append_slot(child)
+
+    # assert the number of blocks allocated is correct
+    # we will use now one block more. Each seq will use 2 blocks,
+    # but only one can be shared
+    assert block_manager.get_num_free_gpu_blocks(
+    ) == num_gpu_blocks - sliding_window - 1
+
+    token_id = 5
+    parent.append_token_id(token_id, {token_id: Logprob(0.0)})
+    block_manager.append_slot(parent)
+
+    # assert the number of blocks allocated is correct
+    # no change, because both sequences are still just sharing one block
+    assert block_manager.get_num_free_gpu_blocks(
+    ) == num_gpu_blocks - sliding_window - 1
+
+    block_table_parent = block_manager.get_block_table(parent)
+    block_table_child = block_manager.get_block_table(child)
+
+    assert block_table_parent != block_table_child
+
+    # assert both blocks are sharing the second-last block
+    assert block_table_parent[-2] == block_table_child[-2]
+
+    # now let's clean up...
+    block_manager.free(parent)
+
+    # assert the number of blocks allocated is correct
+    # We have freed one seq, reducing the ref count of two blocks by one.
+    # One of the two was only used by the parent seq, so this is now free.
+    # The child seq still consumes sliding_window blocks
+    assert block_manager.get_num_free_gpu_blocks(
+    ) == num_gpu_blocks - sliding_window
+
+    # free all blocks
+    block_manager.free(child)
+
+    # assert all blocks are free now
+    assert block_manager.get_num_free_gpu_blocks() == num_gpu_blocks
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 8bfc14999f0a7..8b089a5650f48 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -312,7 +312,12 @@ def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None:
         # Thus, it is always safe from OOM.
         src_block_table = self.block_tables[parent_seq.seq_id]
         self.block_tables[child_seq.seq_id] = src_block_table.copy()
-        for block in src_block_table:
+        # When using a sliding window, blocks will be eventually reused.
+        # In this case the block tables will contain repeated blocks.
+        # When forking, we must make sure that each block's `ref_count`
+        # is only incremented by one, so we deduplicate them by wrapping
+        # them in a set.
+        for block in set(src_block_table):
             block.ref_count += 1
 
     def _get_physical_blocks(
@@ -393,7 +398,15 @@ def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
         return block_number_mapping
 
     def _free_block_table(self, block_table: BlockTable) -> None:
-        for block in set(block_table):
+        # when using a sliding window, each seq will only use up
+        # to `self.block_sliding_window` blocks. When freeing
+        # the block table, we must make sure to not free blocks more
+        # than once. If no sliding window is used, there is no block
+        # reuse in the block table, so we must free all blocks.
+        blocks_to_free = (block_table[-self.block_sliding_window:]
+                          if self.block_sliding_window is not None else
+                          block_table)
+        for block in set(blocks_to_free):
             if block.device == Device.GPU:
                 self.gpu_allocator.free(block)
             else:

From 602358f8a86ef9fc0ba882e083e19b44e00b9302 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Tue, 12 Mar 2024 22:06:17 -0700
Subject: [PATCH 087/196] Add kernel for GeGLU with approximate GELU (#3337)

---
 csrc/activation_kernels.cu               | 22 +++++++++++++++++++++-
 csrc/ops.h                               |  4 ++++
 csrc/pybind.cpp                          |  6 +++++-
 tests/kernels/test_activation.py         | 11 ++++++++---
 vllm/model_executor/layers/activation.py | 13 +++++++++++--
 5 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/csrc/activation_kernels.cu b/csrc/activation_kernels.cu
index 22b10f0571d1c..24d972702c858 100644
--- a/csrc/activation_kernels.cu
+++ b/csrc/activation_kernels.cu
@@ -33,12 +33,25 @@ template<typename T>
 __device__ __forceinline__ T gelu_kernel(const T& x) {
   // Equivalent to PyTorch GELU with 'none' approximation.
   // Refer to:
-  // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L38
+  // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L36-L38
   const float f = (float) x;
   constexpr float ALPHA = M_SQRT1_2;
   return (T) (f * 0.5f * (1.0f + ::erf(f * ALPHA)));
 }
 
+template<typename T>
+__device__ __forceinline__ T gelu_tanh_kernel(const T& x) {
+  // Equivalent to PyTorch GELU with 'tanh' approximation.
+  // Refer to:
+  // https://github.com/pytorch/pytorch/blob/8ac9b20d4b090c213799e81acf48a55ea8d437d6/aten/src/ATen/native/cuda/ActivationGeluKernel.cu#L25-L30
+  const float f = (float) x;
+  constexpr float BETA = M_SQRT2 * M_2_SQRTPI * 0.5f;
+  constexpr float KAPPA = 0.044715;
+  float x_cube = f * f * f;
+  float inner = BETA * (f + KAPPA * x_cube);
+  return (T) (0.5f * f * (1.0f + ::tanhf(inner)));
+}
+
 } // namespace vllm
 
 // Launch activation and gating kernel.
@@ -73,6 +86,13 @@ void gelu_and_mul(
   LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_kernel);
 }
 
+void gelu_tanh_and_mul(
+  torch::Tensor& out,      // [..., d]
+  torch::Tensor& input)    // [..., 2 * d]
+{
+  LAUNCH_ACTIVATION_GATE_KERNEL(vllm::gelu_tanh_kernel);
+}
+
 namespace vllm {
 
 // Element-wise activation kernel template.
diff --git a/csrc/ops.h b/csrc/ops.h
index 249c7451bf73c..53222972abb70 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -61,6 +61,10 @@ void gelu_and_mul(
   torch::Tensor& out,
   torch::Tensor& input);
 
+void gelu_tanh_and_mul(
+  torch::Tensor& out,
+  torch::Tensor& input);
+
 void gelu_new(
   torch::Tensor& out,
   torch::Tensor& input);
diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
index 4b6ade7566398..39384f08d928c 100644
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -25,7 +25,11 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   ops.def(
     "gelu_and_mul",
     &gelu_and_mul,
-    "Activation function used in GeGLU.");
+    "Activation function used in GeGLU with `none` approximation.");
+  ops.def(
+    "gelu_tanh_and_mul",
+    &gelu_tanh_and_mul,
+    "Activation function used in GeGLU with `tanh` approximation.");
   ops.def(
     "gelu_new",
     &gelu_new,
diff --git a/tests/kernels/test_activation.py b/tests/kernels/test_activation.py
index e0dec144eba11..f78913f120aa4 100644
--- a/tests/kernels/test_activation.py
+++ b/tests/kernels/test_activation.py
@@ -16,7 +16,7 @@
 ]
 
 
-@pytest.mark.parametrize("activation", [SiluAndMul, GeluAndMul])
+@pytest.mark.parametrize("activation", ["silu", "gelu", "gelu_tanh"])
 @pytest.mark.parametrize("num_tokens", NUM_TOKENS)
 @pytest.mark.parametrize("d", D)
 @pytest.mark.parametrize("dtype", DTYPES)
@@ -24,7 +24,7 @@
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 @torch.inference_mode()
 def test_act_and_mul(
-    activation: Type[torch.nn.Module],
+    activation: str,
     num_tokens: int,
     d: int,
     dtype: torch.dtype,
@@ -36,7 +36,12 @@ def test_act_and_mul(
         torch.cuda.manual_seed(seed)
     torch.set_default_device(device)
     x = torch.randn(num_tokens, 2 * d, dtype=dtype)
-    layer = activation()
+    if activation == "silu":
+        layer = SiluAndMul()
+    elif activation == "gelu":
+        layer = GeluAndMul(approximate="none")
+    elif activation == "gelu_tanh":
+        layer = GeluAndMul(approximate="tanh")
     out = layer(x)
     ref_out = layer._forward(x)
     # The SiLU and GELU implementations are equivalent to the native PyTorch
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
index 5a3a7b2dbaee7..3eb73ee109f50 100644
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -47,16 +47,25 @@ class GeluAndMul(nn.Module):
         return: (batch_size, seq_len, d) or (num_tokens, d)
     """
 
+    def __init__(self, approximate: str = "none"):
+        super().__init__()
+        self.approximate = approximate
+        if approximate not in ("none", "tanh"):
+            raise ValueError(f"Unknown approximate mode: {approximate}")
+
     def _forward(self, x: torch.Tensor) -> torch.Tensor:
         """PyTorch-native implementation equivalent to forward()."""
         d = x.shape[-1] // 2
-        return F.gelu(x[..., :d]) * x[..., d:]
+        return F.gelu(x[..., :d], approximate=self.approximate) * x[..., d:]
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         d = x.shape[-1] // 2
         output_shape = (x.shape[:-1] + (d, ))
         out = torch.empty(output_shape, dtype=x.dtype, device=x.device)
-        ops.gelu_and_mul(out, x)
+        if self.approximate == "none":
+            ops.gelu_and_mul(out, x)
+        elif self.approximate == "tanh":
+            ops.gelu_tanh_and_mul(out, x)
         return out
 
 

From b167109ba12f18d028d2be8a61d3dce950eb2724 Mon Sep 17 00:00:00 2001
From: Bo-Wen Wang <1849994161@qq.com>
Date: Wed, 13 Mar 2024 13:51:42 +0800
Subject: [PATCH 088/196] [Fix] Fix quantization="gptq" when using Marlin
 (#3319)

Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
---
 vllm/config.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/vllm/config.py b/vllm/config.py
index d2b68b6fa1fe2..319c1569f5e98 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -168,13 +168,18 @@ def _verify_quantization(self) -> None:
         # Parse quantization method from the HF model config, if available.
         hf_quant_config = getattr(self.hf_config, "quantization_config", None)
         if hf_quant_config is not None:
-
             hf_quant_method = str(hf_quant_config["quant_method"]).lower()
+
             # If the GPTQ model is serialized in marlin format, use marlin.
             if (hf_quant_method == "gptq"
                     and "is_marlin_format" in hf_quant_config
                     and hf_quant_config["is_marlin_format"]):
+                logger.info("The model is serialized in Marlin format. "
+                            "Using Marlin kernel.")
                 hf_quant_method = "marlin"
+                if self.quantization == "gptq":
+                    self.quantization = hf_quant_method
+
             if self.quantization is None:
                 self.quantization = hf_quant_method
             elif self.quantization != hf_quant_method:

From e221910e77087743a50560e4ae69c3c2a12beb53 Mon Sep 17 00:00:00 2001
From: Ronan McGovern <78278410+RonanKMcGovern@users.noreply.github.com>
Date: Wed, 13 Mar 2024 06:33:43 +0000
Subject: [PATCH 089/196] add hf_transfer to requirements.txt (#3031)

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index dd4867702d3de..18770f994ebd2 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -96,7 +96,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 FROM vllm-base AS vllm-openai
 # install additional dependencies for openai api server
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install accelerate
+    pip install accelerate hf_transfer
 
 COPY --from=build /workspace/vllm/*.so /workspace/vllm/
 COPY vllm vllm

From ba8dc958a3d8533a6e5b7debda47e4d42a062b78 Mon Sep 17 00:00:00 2001
From: Hui Liu <96135754+hliuca@users.noreply.github.com>
Date: Wed, 13 Mar 2024 09:16:55 -0700
Subject: [PATCH 090/196] [Minor] Fix bias in if to remove ambiguity (#3259)

---
 vllm/model_executor/layers/linear.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
index 60f6fc83b200f..40e681df48f86 100644
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -73,7 +73,7 @@ def apply_weights(self,
                       bias: Optional[torch.Tensor] = None) -> torch.Tensor:
         weight = weights["weight"]
         if self.separate_bias_add:
-            if bias:
+            if bias is not None:
                 return F.linear(x, weight) + bias
             return F.linear(x, weight)
         return F.linear(x, weight, bias)

From 739c350c1926682f435316294491aa54661849b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=BA=8F?= <chenxu2048@gmail.com>
Date: Thu, 14 Mar 2024 00:43:24 +0800
Subject: [PATCH 091/196] [Minor Fix] Use cupy-cuda11x in CUDA 11.8 build
 (#3256)

---
 setup.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/setup.py b/setup.py
index 023c3cde1910c..accf6bb400310 100644
--- a/setup.py
+++ b/setup.py
@@ -431,6 +431,12 @@ def get_requirements() -> List[str]:
     else:
         with open(get_path("requirements.txt")) as f:
             requirements = f.read().strip().split("\n")
+        if nvcc_cuda_version <= Version("11.8"):
+            # replace cupy-cuda12x with cupy-cuda11x for cuda 11.x
+            for i in range(len(requirements)):
+                if requirements[i].startswith("cupy-cuda12x"):
+                    requirements[i] = "cupy-cuda11x"
+                    break
     return requirements
 
 

From ae0ccb40170d140ded8de99fc905fd8cb0bd409c Mon Sep 17 00:00:00 2001
From: Or Sharir <or+github@sharir.org>
Date: Wed, 13 Mar 2024 21:18:25 +0200
Subject: [PATCH 092/196] Add missing kernel for CodeLlama-34B on A/H100 (no
 tensor parallelism) when using Multi-LoRA. (#3350)

---
 csrc/punica/bgmv/bgmv_config.h | 1 +
 tests/lora/test_punica.py      | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/csrc/punica/bgmv/bgmv_config.h b/csrc/punica/bgmv/bgmv_config.h
index 4dc90de1ab42a..a7415dfc91369 100644
--- a/csrc/punica/bgmv/bgmv_config.h
+++ b/csrc/punica/bgmv/bgmv_config.h
@@ -43,6 +43,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
     f(in_T, out_T, W_T, narrow, 14336) \
     f(in_T, out_T, W_T, narrow, 16384) \
     f(in_T, out_T, W_T, narrow, 20480) \
+    f(in_T, out_T, W_T, narrow, 22016) \
     f(in_T, out_T, W_T, narrow, 24576) \
     f(in_T, out_T, W_T, narrow, 28672) \
     f(in_T, out_T, W_T, narrow, 32000) \
diff --git a/tests/lora/test_punica.py b/tests/lora/test_punica.py
index cbe0f6fa2e851..fd707766c6a30 100644
--- a/tests/lora/test_punica.py
+++ b/tests/lora/test_punica.py
@@ -45,7 +45,7 @@ def _lora_ref_impl(
 H1 = H2 = [
     128, 256, 512, 1024, 1280, 2048, 2560, 2752, 3072, 3456, 3584, 4096, 5120,
     5504, 5632, 6144, 6912, 7168, 8192, 9216, 10240, 11008, 13824, 14336,
-    24576, 32000, 32256, 32512, 32768, 33024
+    22016, 24576, 32000, 32256, 32512, 32768, 33024
 ]
 SEED = [0xabcdabcd987]
 

From 7e9bd08f60a4b18e3646ff986caeacde9ffffa53 Mon Sep 17 00:00:00 2001
From: Terry <149540247+tterrysun@users.noreply.github.com>
Date: Wed, 13 Mar 2024 13:45:26 -0700
Subject: [PATCH 093/196] Add batched RoPE kernel (#3095)

---
 benchmarks/kernels/benchmark_rope.py          | 120 ++++++++++++++++
 csrc/ops.h                                    |  10 ++
 csrc/pos_encoding_kernels.cu                  | 126 ++++++++++++++--
 csrc/pybind.cpp                               |   5 +
 tests/kernels/test_pos_encoding.py            | 135 +++++++++++++++++-
 .../model_executor/layers/rotary_embedding.py |  58 +++++---
 6 files changed, 417 insertions(+), 37 deletions(-)
 create mode 100644 benchmarks/kernels/benchmark_rope.py

diff --git a/benchmarks/kernels/benchmark_rope.py b/benchmarks/kernels/benchmark_rope.py
new file mode 100644
index 0000000000000..f9564dd9588f0
--- /dev/null
+++ b/benchmarks/kernels/benchmark_rope.py
@@ -0,0 +1,120 @@
+from typing import Optional
+
+import argparse
+import torch
+import nvtx
+from itertools import accumulate
+from vllm.model_executor.layers.rotary_embedding import get_rope
+
+
+def benchmark_rope_kernels_multi_lora(
+    is_neox_style: bool,
+    batch_size: int,
+    seq_len: int,
+    num_heads: int,
+    head_size: int,
+    rotary_dim: Optional[int],
+    dtype: torch.dtype,
+    seed: int,
+    device: str,
+    max_position: int = 8192,
+    base: int = 10000,
+) -> None:
+    torch.random.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+    torch.set_default_device(device)
+    if rotary_dim is None:
+        rotary_dim = head_size
+    # silulating serving 4 LoRAs
+    scaling_factors = [1, 2, 4, 8]
+    # batched RoPE can take multiple scaling factors
+    batched_rope = get_rope(head_size, rotary_dim, max_position, base,
+                            is_neox_style, {
+                                "type": "linear",
+                                "factor": tuple(scaling_factors)
+                            })
+    # non-batched RoPE takes only one scaling factor, we create multiple
+    # instances to simulate the same behavior
+    non_batched_ropes = []
+    for scaling_factor in scaling_factors:
+        non_batched_ropes.append(
+            get_rope(head_size, rotary_dim, max_position, base, is_neox_style,
+                     {
+                         "type": "linear",
+                         "factor": (scaling_factor, )
+                     }))
+
+    positions = torch.randint(0, max_position, (batch_size, seq_len))
+    query = torch.randn(batch_size,
+                        seq_len,
+                        num_heads * head_size,
+                        dtype=dtype)
+    key = torch.randn_like(query)
+
+    # create query offsets for batched RoPE, we concat multiple kv cache
+    # together and each query needs to find the right kv cache of its type
+    offset_map = torch.tensor(
+        list(
+            accumulate([0] + [
+                max_position * scaling_factor * 2
+                for scaling_factor in scaling_factors[:-1]
+            ])))
+    query_types = torch.randint(0,
+                                len(scaling_factors), (batch_size, seq_len),
+                                device=device)
+    # map query types to offsets
+    query_offsets = offset_map[query_types]
+    # the kernel takes flattened offsets
+    flatten_offsets = query_offsets.flatten()
+
+    # batched queries of the same type together for non-batched RoPE
+    queries = [query[query_types == i] for i in range(len(scaling_factors))]
+    keys = [key[query_types == i] for i in range(len(scaling_factors))]
+    packed_qkr = zip(queries, keys, non_batched_ropes)
+    # synchronize before start timing
+    torch.cuda.synchronize()
+    with nvtx.annotate("non-batched", color="yellow"):
+        for q, k, r in packed_qkr:
+            r.forward(positions, q, k)
+    torch.cuda.synchronize()
+    with nvtx.annotate("batched", color="green"):
+        batched_rope.forward(positions, query, key, flatten_offsets)
+    torch.cuda.synchronize()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Benchmark the rotary embedding kernels.")
+    parser.add_argument("--is-neox-style", type=bool, default=True)
+    parser.add_argument("--batch-size", type=int, default=16)
+    parser.add_argument("--seq-len", type=int, default=512)
+    parser.add_argument("--num-heads", type=int, default=8)
+    parser.add_argument("--head-size",
+                        type=int,
+                        choices=[64, 80, 96, 112, 128, 256],
+                        default=128)
+    parser.add_argument("--rotary-dim", type=int, choices=[16, 32], default=32)
+    parser.add_argument("--dtype",
+                        type=str,
+                        choices=["bfloat16", "float"],
+                        default="float")
+    parser.add_argument("--seed", type=int, default=0)
+    parser.add_argument("--device",
+                        type=str,
+                        choices=["cuda:0", "cuda:1"],
+                        default="cuda:0")
+    args = parser.parse_args()
+    print(args)
+
+    benchmark_rope_kernels_multi_lora(
+        is_neox_style=args.is_neox_style,
+        batch_size=args.batch_size,
+        seq_len=args.seq_len,
+        num_heads=args.num_heads,
+        head_size=args.head_size,
+        rotary_dim=args.rotary_dim,
+        dtype=getattr(torch, args.dtype),
+        seed=args.seed,
+        device=args.device,
+    )
diff --git a/csrc/ops.h b/csrc/ops.h
index 53222972abb70..d5d6e240da7c4 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -53,6 +53,16 @@ void rotary_embedding(
   torch::Tensor& cos_sin_cache,
   bool is_neox);
 
+void batched_rotary_embedding(
+  torch::Tensor& positions,
+  torch::Tensor& query,
+  torch::Tensor& key,
+  int head_size,
+  torch::Tensor& cos_sin_cache,
+  bool is_neox,
+  int rot_dim,
+  torch::Tensor& cos_sin_cache_offsets);
+
 void silu_and_mul(
   torch::Tensor& out,
   torch::Tensor& input);
diff --git a/csrc/pos_encoding_kernels.cu b/csrc/pos_encoding_kernels.cu
index 5f522795619e1..d80cb6973fad6 100644
--- a/csrc/pos_encoding_kernels.cu
+++ b/csrc/pos_encoding_kernels.cu
@@ -8,7 +8,7 @@
 namespace vllm {
 
 template<typename scalar_t, bool IS_NEOX>
-inline __device__ void apply_rotary_embedding(
+inline __device__ void apply_token_rotary_embedding(
   scalar_t* __restrict__ arr,
   const scalar_t* __restrict__ cos_ptr,
   const scalar_t* __restrict__ sin_ptr,
@@ -38,22 +38,18 @@ inline __device__ void apply_rotary_embedding(
 }
 
 template<typename scalar_t, bool IS_NEOX>
-__global__ void rotary_embedding_kernel(
-  const int64_t* __restrict__ positions,        // [batch_size, seq_len] or [num_tokens]
+inline __device__ void apply_rotary_embedding(
   scalar_t* __restrict__ query,                 // [batch_size, seq_len, num_heads, head_size] or [num_tokens, num_heads, head_size]
   scalar_t* __restrict__ key,                   // [batch_size, seq_len, num_kv_heads, head_size] or [num_tokens, num_kv_heads, head_size]
-  const scalar_t* __restrict__ cos_sin_cache,   // [max_position, 2, rot_dim // 2]
-  const int rot_dim,
-  const int64_t query_stride,
-  const int64_t key_stride,
+  const scalar_t* cache_ptr,
+  const int head_size,
   const int num_heads,
   const int num_kv_heads,
-  const int head_size) {
-  // Each thread block is responsible for one token.
-  const int token_idx = blockIdx.x;
-  int64_t pos = positions[token_idx];
-  const scalar_t* cache_ptr = cos_sin_cache + pos * rot_dim;
-
+  const int rot_dim,
+  const int token_idx,
+  const int64_t query_stride,
+  const int64_t key_stride)
+{
   const int embed_dim = rot_dim / 2;
   const scalar_t* cos_ptr = cache_ptr;
   const scalar_t* sin_ptr = cache_ptr + embed_dim;
@@ -63,7 +59,7 @@ __global__ void rotary_embedding_kernel(
     const int head_idx = i / embed_dim;
     const int64_t token_head = token_idx * query_stride + head_idx * head_size;
     const int rot_offset = i % embed_dim;
-    apply_rotary_embedding<scalar_t, IS_NEOX>(query + token_head, cos_ptr,
+    apply_token_rotary_embedding<scalar_t, IS_NEOX>(query + token_head, cos_ptr,
                                               sin_ptr, rot_offset, embed_dim);
   }
 
@@ -72,11 +68,53 @@ __global__ void rotary_embedding_kernel(
     const int head_idx = i / embed_dim;
     const int64_t token_head = token_idx * key_stride + head_idx * head_size;
     const int rot_offset = i % embed_dim;
-    apply_rotary_embedding<scalar_t, IS_NEOX>(key + token_head, cos_ptr,
+    apply_token_rotary_embedding<scalar_t, IS_NEOX>(key + token_head, cos_ptr,
                                               sin_ptr, rot_offset, embed_dim);
   }
 }
 
+template<typename scalar_t, bool IS_NEOX>
+__global__ void rotary_embedding_kernel(
+  const int64_t* __restrict__ positions,        // [batch_size, seq_len] or [num_tokens]
+  scalar_t* __restrict__ query,                 // [batch_size, seq_len, num_heads, head_size] or [num_tokens, num_heads, head_size]
+  scalar_t* __restrict__ key,                   // [batch_size, seq_len, num_kv_heads, head_size] or [num_tokens, num_kv_heads, head_size]
+  const scalar_t* __restrict__ cos_sin_cache,   // [max_position, 2, rot_dim // 2]
+  const int rot_dim,
+  const int64_t query_stride,
+  const int64_t key_stride,
+  const int num_heads,
+  const int num_kv_heads,
+  const int head_size) {
+  // Each thread block is responsible for one token.
+  const int token_idx = blockIdx.x;
+  int64_t pos = positions[token_idx];
+  const scalar_t* cache_ptr = cos_sin_cache + pos * rot_dim;
+
+  apply_rotary_embedding<scalar_t, IS_NEOX>(query, key, cache_ptr, head_size, num_heads, num_kv_heads, rot_dim, token_idx, query_stride, key_stride);
+}
+
+template<typename scalar_t, bool IS_NEOX>
+__global__ void batched_rotary_embedding_kernel(
+  const int64_t* __restrict__ positions,              // [batch_size, seq_len] or [num_tokens]
+  scalar_t* __restrict__ query,                       // [batch_size, seq_len, num_heads, head_size] or [num_tokens, num_heads, head_size]
+  scalar_t* __restrict__ key,                         // [batch_size, seq_len, num_kv_heads, head_size] or [num_tokens, num_kv_heads, head_size]
+  const scalar_t* __restrict__ cos_sin_cache,         // [max_position, 2, rot_dim // 2]
+  const int64_t* __restrict__ cos_sin_cache_offsets,  // [batch_size, seq_len] or [num_tokens]
+  const int rot_dim,
+  const int64_t query_stride,
+  const int64_t key_stride,
+  const int num_heads,
+  const int num_kv_heads,
+  const int head_size) {
+  // Each thread block is responsible for one token.
+  const int token_idx = blockIdx.x;
+  int64_t pos = positions[token_idx];
+  int64_t cos_sin_cache_offset = cos_sin_cache_offsets[token_idx];
+  const scalar_t* cache_ptr = cos_sin_cache + (cos_sin_cache_offset + pos) * rot_dim;
+
+  apply_rotary_embedding<scalar_t, IS_NEOX>(query, key, cache_ptr, head_size, num_heads, num_kv_heads, rot_dim, token_idx, query_stride, key_stride);
+}
+
 } // namespace vllm
 
 void rotary_embedding(
@@ -128,3 +166,61 @@ void rotary_embedding(
       }
     });
 }
+
+/*
+Batched version of rotary embedding, pack multiple LoRAs together
+and process in batched manner.
+*/
+void batched_rotary_embedding(
+  torch::Tensor& positions,         // [batch_size, seq_len] or [num_tokens]
+  torch::Tensor& query,             // [batch_size, seq_len, num_heads * head_size] or [num_tokens, num_heads * head_size]
+  torch::Tensor& key,               // [batch_size, seq_len, num_kv_heads * head_size] or [num_tokens, num_kv_heads * head_size]
+  int head_size,
+  torch::Tensor& cos_sin_cache,     // [max_position, rot_dim]
+  bool is_neox,
+  int rot_dim,
+  torch::Tensor& cos_sin_cache_offsets // [num_tokens]
+) {
+  int64_t num_tokens = cos_sin_cache_offsets.size(0);
+  int num_heads = query.size(-1) / head_size;
+  int num_kv_heads = key.size(-1) / head_size;
+  int64_t query_stride = query.stride(-2);
+  int64_t key_stride = key.stride(-2);
+
+  dim3 grid(num_tokens);
+  dim3 block(std::min(num_heads * rot_dim / 2, 512));
+  const at::cuda::OptionalCUDAGuard device_guard(device_of(query));
+  const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+  VLLM_DISPATCH_FLOATING_TYPES(
+    query.scalar_type(),
+    "rotary_embedding",
+    [&] {
+      if (is_neox) {
+        vllm::batched_rotary_embedding_kernel<scalar_t, true><<<grid, block, 0, stream>>>(
+          positions.data_ptr<int64_t>(),
+          query.data_ptr<scalar_t>(),
+          key.data_ptr<scalar_t>(),
+          cos_sin_cache.data_ptr<scalar_t>(),
+          cos_sin_cache_offsets.data_ptr<int64_t>(),
+          rot_dim,
+          query_stride,
+          key_stride,
+          num_heads,
+          num_kv_heads,
+          head_size);
+      } else {
+        vllm::batched_rotary_embedding_kernel<scalar_t, false><<<grid, block, 0, stream>>>(
+          positions.data_ptr<int64_t>(),
+          query.data_ptr<scalar_t>(),
+          key.data_ptr<scalar_t>(),
+          cos_sin_cache.data_ptr<scalar_t>(),
+          cos_sin_cache_offsets.data_ptr<int64_t>(),
+          rot_dim,
+          query_stride,
+          key_stride,
+          num_heads,
+          num_kv_heads,
+          head_size);
+      }
+    });
+}
diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
index 39384f08d928c..a5c6439fd6909 100644
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -56,6 +56,11 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
     &rotary_embedding,
     "Apply GPT-NeoX or GPT-J style rotary embedding to query and key");
 
+  ops.def(
+    "batched_rotary_embedding",
+    &batched_rotary_embedding,
+    "Apply GPT-NeoX or GPT-J style rotary embedding to query and key (supports multiple loras)");
+
 // Quantization ops
 #ifndef USE_ROCM
   ops.def("awq_gemm", &awq_gemm, "Quantized GEMM for AWQ");
diff --git a/tests/kernels/test_pos_encoding.py b/tests/kernels/test_pos_encoding.py
index 0d27bbaff9fc5..ffdcc1e8c80fd 100644
--- a/tests/kernels/test_pos_encoding.py
+++ b/tests/kernels/test_pos_encoding.py
@@ -1,8 +1,9 @@
-from typing import Optional
+from typing import List, Optional
 
 import pytest
 import torch
 from allclose_default import get_default_atol, get_default_rtol
+from itertools import accumulate
 from vllm.model_executor.layers.rotary_embedding import get_rope
 
 IS_NEOX_STYLE = [True, False]
@@ -72,3 +73,135 @@ def test_rotary_embedding(
                           ref_key,
                           atol=get_default_atol(out_key),
                           rtol=get_default_rtol(out_key))
+
+
+@pytest.mark.parametrize("is_neox_style", IS_NEOX_STYLE)
+@pytest.mark.parametrize("batch_size", BATCH_SIZES)
+@pytest.mark.parametrize("seq_len", SEQ_LENS)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES)
+@pytest.mark.parametrize("rotary_dim", ROTARY_DIMS)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("seed", SEEDS)
+@pytest.mark.parametrize("device", CUDA_DEVICES)
+@torch.inference_mode()
+def test_batched_rotary_embedding(
+    is_neox_style: bool,
+    batch_size: int,
+    seq_len: int,
+    num_heads: int,
+    head_size: int,
+    rotary_dim: Optional[int],
+    dtype: torch.dtype,
+    seed: int,
+    device: str,
+    max_position: int = 8192,
+    base: int = 10000,
+) -> None:
+    torch.random.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+    torch.set_default_device(device)
+    if rotary_dim is None:
+        rotary_dim = head_size
+    rope = get_rope(head_size, rotary_dim, max_position, base, is_neox_style, {
+        "type": "linear",
+        "factor": (1, )
+    })
+    rope = rope.to(dtype=dtype)
+
+    positions = torch.randint(0, max_position, (batch_size, seq_len))
+    query = torch.randn(batch_size,
+                        seq_len,
+                        num_heads * head_size,
+                        dtype=dtype)
+    key = torch.randn_like(query)
+
+    # NOTE(woosuk): The reference implementation should be executed first
+    # because the custom kernel is in-place.
+    ref_query, ref_key = rope._forward(positions, query, key)
+    out_query, out_key = rope.forward(positions,
+                                      query,
+                                      key,
+                                      offsets=torch.zeros(batch_size * seq_len,
+                                                          dtype=int,
+                                                          device=device))
+    # Compare the results.
+    assert torch.allclose(out_query,
+                          ref_query,
+                          atol=get_default_atol(out_query),
+                          rtol=get_default_rtol(out_query))
+    assert torch.allclose(out_key,
+                          ref_key,
+                          atol=get_default_atol(out_key),
+                          rtol=get_default_rtol(out_key))
+
+
+@pytest.mark.parametrize("is_neox_style", IS_NEOX_STYLE)
+@pytest.mark.parametrize("batch_size", BATCH_SIZES)
+@pytest.mark.parametrize("seq_len", SEQ_LENS)
+@pytest.mark.parametrize("num_heads", NUM_HEADS)
+@pytest.mark.parametrize("head_size", HEAD_SIZES)
+@pytest.mark.parametrize("rotary_dim", ROTARY_DIMS)
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("seed", SEEDS)
+@pytest.mark.parametrize("device", CUDA_DEVICES)
+@torch.inference_mode()
+def test_batched_rotary_embedding_multi_lora(
+    is_neox_style: bool,
+    batch_size: int,
+    seq_len: int,
+    num_heads: int,
+    head_size: int,
+    rotary_dim: Optional[int],
+    dtype: torch.dtype,
+    seed: int,
+    device: str,
+    max_position: int = 8192,
+    base: int = 10000,
+) -> None:
+    torch.random.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed(seed)
+    torch.set_default_device(device)
+    if rotary_dim is None:
+        rotary_dim = head_size
+    scaling_factors: List[int] = [1, 2, 4]
+    rope = get_rope(head_size, rotary_dim, max_position, base, is_neox_style, {
+        "type": "linear",
+        "factor": tuple(scaling_factors)
+    })
+    rope = rope.to(dtype=dtype)
+
+    positions = torch.randint(0, max_position, (batch_size, seq_len))
+    query = torch.randn(batch_size,
+                        seq_len,
+                        num_heads * head_size,
+                        dtype=dtype)
+    key = torch.randn_like(query)
+
+    offset_map = torch.tensor(
+        list(
+            accumulate([0] + [
+                max_position * scaling_factor * 2
+                for scaling_factor in scaling_factors[:-1]
+            ])))
+    query_types = torch.randint(0,
+                                len(scaling_factors), (batch_size, seq_len),
+                                device=device)
+    query_offsets = offset_map[query_types]
+
+    # NOTE(woosuk): The reference implementation should be executed first
+    # because the custom kernel is in-place.
+    ref_query, ref_key = rope._forward(positions, query, key, query_offsets)
+    out_query, out_key = rope.forward(positions, query, key,
+                                      query_offsets.flatten())
+    # Compare the results.
+    assert torch.allclose(out_query,
+                          ref_query,
+                          atol=get_default_atol(out_query),
+                          rtol=get_default_rtol(out_query))
+    assert torch.allclose(out_key,
+                          ref_key,
+                          atol=get_default_atol(out_key),
+                          rtol=get_default_rtol(out_key))
diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
index 13749570f28a2..db5c7080b50b0 100644
--- a/vllm/model_executor/layers/rotary_embedding.py
+++ b/vllm/model_executor/layers/rotary_embedding.py
@@ -22,7 +22,7 @@
 # limitations under the License.
 """Rotary Positional Embeddings."""
 import math
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -96,6 +96,7 @@ def _forward(
         positions: torch.Tensor,
         query: torch.Tensor,
         key: torch.Tensor,
+        offsets: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """PyTorch-native implementation equivalent to forward()."""
         query = query.view(*query.shape[:-1], -1, self.head_size)
@@ -107,7 +108,9 @@ def _forward(
             query_pass = query[..., self.rotary_dim:]
             key_pass = key[..., self.rotary_dim:]
 
-        cos_sin = self.cos_sin_cache[positions]
+        self.cos_sin_cache = self.cos_sin_cache.to(positions.get_device())
+        cos_sin = self.cos_sin_cache[torch.add(positions, offsets)
+                                     if offsets is not None else positions]
         cos, sin = cos_sin.chunk(2, dim=-1)
         if self.is_neox_style:
             # NOTE(woosuk): Here we assume that the positions tensor has the
@@ -137,11 +140,19 @@ def forward(
         positions: torch.Tensor,
         query: torch.Tensor,
         key: torch.Tensor,
+        offsets: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        # ops.rotary_embedding() is an in-place operation that
-        # updates the query and key tensors.
-        ops.rotary_embedding(positions, query, key, self.head_size,
-                             self.cos_sin_cache, self.is_neox_style)
+        self.cos_sin_cache = self.cos_sin_cache.to(positions.get_device())
+        # ops.rotary_embedding()/batched_rotary_embedding() are in-place operations that
+        # update the query and key tensors.
+        if offsets is not None:
+            ops.batched_rotary_embedding(positions, query, key, self.head_size,
+                                         self.cos_sin_cache,
+                                         self.is_neox_style, self.rotary_dim,
+                                         offsets)
+        else:
+            ops.rotary_embedding(positions, query, key, self.head_size,
+                                 self.cos_sin_cache, self.is_neox_style)
         return query, key
 
 
@@ -158,27 +169,32 @@ def __init__(
         max_position_embeddings: int,
         base: int,
         is_neox_style: bool,
-        scaling_factor: float,
+        scaling_factors: Union[List[float], float],
     ) -> None:
-        self.scaling_factor = scaling_factor
+        if isinstance(scaling_factors, float):
+            scaling_factors = [scaling_factors]
+        self.scaling_factors = scaling_factors
         super().__init__(head_size, rotary_dim, max_position_embeddings, base,
                          is_neox_style)
 
     def _compute_cos_sin_cache(self) -> torch.Tensor:
         inv_freq = self._compute_inv_freq(self.base)
-        # NOTE(woosuk): self.max_position_embeddings is the original
-        # maximum length before applying the rope scaling.
-        # Thus, the maximum length after applying the rope scaling is
-        # self.max_position_embeddings * self.scaling_factor.
-        max_len = self.max_position_embeddings * self.scaling_factor
-        t = torch.arange(max_len, dtype=torch.float)
-        t = t / self.scaling_factor
-
-        freqs = torch.einsum("i,j -> ij", t, inv_freq)
-        cos = freqs.cos()
-        sin = freqs.sin()
-        cache = torch.cat((cos, sin), dim=-1)
-        return cache
+        cache_list = []
+        for scaling_factor in self.scaling_factors:
+            # NOTE(woosuk): self.max_position_embeddings is the original
+            # maximum length before applying the rope scaling.
+            # Thus, the maximum length after applying the rope scaling is
+            # self.max_position_embeddings * self.scaling_factor.
+            max_len = self.max_position_embeddings * scaling_factor
+            t = torch.arange(max_len, dtype=torch.float)
+            t = t / scaling_factor
+
+            freqs = torch.einsum("i,j -> ij", t, inv_freq)
+            cos = freqs.cos()
+            sin = freqs.sin()
+            cache = torch.cat((cos, sin), dim=-1)
+            cache_list.append(cache)
+        return torch.cat(cache_list, dim=0)
 
 
 class DynamicNTKScalingRotaryEmbedding(RotaryEmbedding):

From c33afd89f56ba5c260275fdd6723c59642f82f22 Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Wed, 13 Mar 2024 13:56:49 -0700
Subject: [PATCH 094/196] Fix lint (#3388)

---
 vllm/model_executor/layers/rotary_embedding.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/layers/rotary_embedding.py b/vllm/model_executor/layers/rotary_embedding.py
index db5c7080b50b0..71af9b26e2e93 100644
--- a/vllm/model_executor/layers/rotary_embedding.py
+++ b/vllm/model_executor/layers/rotary_embedding.py
@@ -143,8 +143,8 @@ def forward(
         offsets: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         self.cos_sin_cache = self.cos_sin_cache.to(positions.get_device())
-        # ops.rotary_embedding()/batched_rotary_embedding() are in-place operations that
-        # update the query and key tensors.
+        # ops.rotary_embedding()/batched_rotary_embedding()
+        # are in-place operations that update the query and key tensors.
         if offsets is not None:
             ops.batched_rotary_embedding(positions, query, key, self.head_size,
                                          self.cos_sin_cache,

From eeab52a4ff02e15f970880a689df2861ad173770 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Wed, 13 Mar 2024 14:18:40 -0700
Subject: [PATCH 095/196] [FIX] Simpler fix for async engine running on ray
 (#3371)

---
 vllm/executor/ray_gpu_executor.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
index 261fcfb7dad9b..82a2b456895e8 100644
--- a/vllm/executor/ray_gpu_executor.py
+++ b/vllm/executor/ray_gpu_executor.py
@@ -430,8 +430,7 @@ async def execute_model_async(
                 "blocks_to_swap_in": blocks_to_swap_in,
                 "blocks_to_swap_out": blocks_to_swap_out,
                 "blocks_to_copy": blocks_to_copy,
-            },
-            use_ray_compiled_dag=USE_RAY_COMPILED_DAG)
+            })
 
         # Only the driver worker returns the sampling results.
         output = all_outputs[0]

From 81653d968842d2ec51b2642b6b5d83786271f9af Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Wed, 13 Mar 2024 17:02:21 -0700
Subject: [PATCH 096/196] [Hotfix] [Debug]
 test_openai_server.py::test_guided_regex_completion (#3383)

---
 .buildkite/test-pipeline.yaml | 2 +-
 requirements.txt              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 42a1eacb6de57..6a130f6fadcc3 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -13,7 +13,7 @@ steps:
 
 - label: Basic Correctness Test
   command: pytest -v -s --forked basic_correctness
-  
+
 - label: Core Test
   command: pytest -v -s core
 
diff --git a/requirements.txt b/requirements.txt
index 05ec2e804e13b..d6c33ad85da58 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,5 +12,5 @@ pydantic >= 2.0  # Required for OpenAI server.
 prometheus_client >= 0.18.0
 pynvml == 11.5.0
 triton >= 2.1.0
-outlines >= 0.0.27
+outlines == 0.0.34
 cupy-cuda12x == 12.1.0  # Required for CUDA graphs. CUDA 11.8 users should install cupy-cuda11x instead.

From a37415c31b3b5c7ab40d2d897192025f0ca7be08 Mon Sep 17 00:00:00 2001
From: "Allen.Dou" <allen.dou@hotmail.com>
Date: Thu, 14 Mar 2024 14:35:13 +0800
Subject: [PATCH 097/196] allow user to chose which vllm's merics to display in
 grafana (#3393)

---
 examples/production_monitoring/grafana.json | 184 ++++++++++----------
 1 file changed, 88 insertions(+), 96 deletions(-)

diff --git a/examples/production_monitoring/grafana.json b/examples/production_monitoring/grafana.json
index f48b6314eb055..071f134c6e5e0 100644
--- a/examples/production_monitoring/grafana.json
+++ b/examples/production_monitoring/grafana.json
@@ -1,35 +1,4 @@
 {
-  "__inputs": [
-    {
-      "name": "DS_PROMETHEUS",
-      "label": "prometheus",
-      "description": "",
-      "type": "datasource",
-      "pluginId": "prometheus",
-      "pluginName": "Prometheus"
-    }
-  ],
-  "__elements": {},
-  "__requires": [
-    {
-      "type": "grafana",
-      "id": "grafana",
-      "name": "Grafana",
-      "version": "10.2.3"
-    },
-    {
-      "type": "datasource",
-      "id": "prometheus",
-      "name": "Prometheus",
-      "version": "1.0.0"
-    },
-    {
-      "type": "panel",
-      "id": "timeseries",
-      "name": "Time series",
-      "version": ""
-    }
-  ],
   "annotations": {
     "list": [
       {
@@ -42,6 +11,12 @@
         "hide": true,
         "iconColor": "rgba(0, 211, 255, 1)",
         "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
         "type": "dashboard"
       }
     ]
@@ -50,14 +25,14 @@
   "editable": true,
   "fiscalYearStartMonth": 0,
   "graphTooltip": 0,
-  "id": null,
+  "id": 29,
   "links": [],
   "liveNow": false,
   "panels": [
     {
       "datasource": {
         "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
+        "uid": "prometheus"
       },
       "description": "End to end request latency measured in seconds.",
       "fieldConfig": {
@@ -66,7 +41,6 @@
             "mode": "palette-classic"
           },
           "custom": {
-            "axisBorderShow": false,
             "axisCenteredZero": false,
             "axisColorMode": "text",
             "axisLabel": "",
@@ -80,7 +54,6 @@
               "tooltip": false,
               "viz": false
             },
-            "insertNulls": false,
             "lineInterpolation": "linear",
             "lineWidth": 1,
             "pointSize": 5,
@@ -138,11 +111,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "includeNullMetadata": false,
           "instant": false,
@@ -154,11 +127,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -171,11 +144,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -188,11 +161,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -205,10 +178,10 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "editorMode": "code",
-          "expr": "rate(vllm:e2e_request_latency_seconds_sum[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count[$__rate_interval])",
+          "expr": "rate(vllm:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
           "hide": false,
           "instant": false,
           "legendFormat": "Average",
@@ -222,7 +195,7 @@
     {
       "datasource": {
         "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
+        "uid": "prometheus"
       },
       "description": "Number of tokens processed per second",
       "fieldConfig": {
@@ -231,7 +204,6 @@
             "mode": "palette-classic"
           },
           "custom": {
-            "axisBorderShow": false,
             "axisCenteredZero": false,
             "axisColorMode": "text",
             "axisLabel": "",
@@ -245,7 +217,6 @@
               "tooltip": false,
               "viz": false
             },
-            "insertNulls": false,
             "lineInterpolation": "linear",
             "lineWidth": 1,
             "pointSize": 5,
@@ -302,11 +273,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "rate(vllm:prompt_tokens_total[$__rate_interval])",
+          "expr": "rate(vllm:prompt_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
           "fullMetaSearch": false,
           "includeNullMetadata": false,
           "instant": false,
@@ -318,11 +289,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "rate(vllm:generation_tokens_total[$__rate_interval])",
+          "expr": "rate(vllm:generation_tokens_total{model_name=\"$model_name\"}[$__rate_interval])",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -339,7 +310,7 @@
     {
       "datasource": {
         "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
+        "uid": "prometheus"
       },
       "description": "Inter token latency in seconds.",
       "fieldConfig": {
@@ -348,7 +319,6 @@
             "mode": "palette-classic"
           },
           "custom": {
-            "axisBorderShow": false,
             "axisCenteredZero": false,
             "axisColorMode": "text",
             "axisLabel": "",
@@ -362,7 +332,6 @@
               "tooltip": false,
               "viz": false
             },
-            "insertNulls": false,
             "lineInterpolation": "linear",
             "lineWidth": 1,
             "pointSize": 5,
@@ -420,11 +389,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "includeNullMetadata": false,
           "instant": false,
@@ -436,11 +405,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -453,11 +422,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -470,11 +439,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -487,10 +456,10 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "editorMode": "code",
-          "expr": "rate(vllm:time_per_output_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count[$__rate_interval])",
+          "expr": "rate(vllm:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
           "hide": false,
           "instant": false,
           "legendFormat": "Mean",
@@ -504,7 +473,7 @@
     {
       "datasource": {
         "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
+        "uid": "prometheus"
       },
       "description": "Number of requests in RUNNING, WAITING, and SWAPPED state",
       "fieldConfig": {
@@ -513,7 +482,6 @@
             "mode": "palette-classic"
           },
           "custom": {
-            "axisBorderShow": false,
             "axisCenteredZero": false,
             "axisColorMode": "text",
             "axisLabel": "",
@@ -527,7 +495,6 @@
               "tooltip": false,
               "viz": false
             },
-            "insertNulls": false,
             "lineInterpolation": "linear",
             "lineWidth": 1,
             "pointSize": 5,
@@ -585,11 +552,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "vllm:num_requests_running",
+          "expr": "vllm:num_requests_running{model_name=\"$model_name\"}",
           "fullMetaSearch": false,
           "includeNullMetadata": true,
           "instant": false,
@@ -601,11 +568,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "vllm:num_requests_swapped",
+          "expr": "vllm:num_requests_swapped{model_name=\"$model_name\"}",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": true,
@@ -618,11 +585,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "vllm:num_requests_waiting",
+          "expr": "vllm:num_requests_waiting{model_name=\"$model_name\"}",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": true,
@@ -639,7 +606,7 @@
     {
       "datasource": {
         "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
+        "uid": "prometheus"
       },
       "description": "P50, P90, P95, and P99 TTFT latency in seconds.",
       "fieldConfig": {
@@ -648,7 +615,6 @@
             "mode": "palette-classic"
           },
           "custom": {
-            "axisBorderShow": false,
             "axisCenteredZero": false,
             "axisColorMode": "text",
             "axisLabel": "",
@@ -662,7 +628,6 @@
               "tooltip": false,
               "viz": false
             },
-            "insertNulls": false,
             "lineInterpolation": "linear",
             "lineWidth": 1,
             "pointSize": 5,
@@ -720,11 +685,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -737,11 +702,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "includeNullMetadata": false,
           "instant": false,
@@ -753,11 +718,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -770,11 +735,11 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "disableTextWrap": false,
           "editorMode": "builder",
-          "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))",
+          "expr": "histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket{model_name=\"$model_name\"}[$__rate_interval])))",
           "fullMetaSearch": false,
           "hide": false,
           "includeNullMetadata": false,
@@ -787,10 +752,10 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "editorMode": "code",
-          "expr": "rate(vllm:time_to_first_token_seconds_sum[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count[$__rate_interval])",
+          "expr": "rate(vllm:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])",
           "hide": false,
           "instant": false,
           "legendFormat": "Average",
@@ -804,7 +769,7 @@
     {
       "datasource": {
         "type": "prometheus",
-        "uid": "${DS_PROMETHEUS}"
+        "uid": "prometheus"
       },
       "description": "Percentage of used cache blocks by vLLM.",
       "fieldConfig": {
@@ -813,7 +778,6 @@
             "mode": "palette-classic"
           },
           "custom": {
-            "axisBorderShow": false,
             "axisCenteredZero": false,
             "axisColorMode": "text",
             "axisLabel": "",
@@ -827,7 +791,6 @@
               "tooltip": false,
               "viz": false
             },
-            "insertNulls": false,
             "lineInterpolation": "linear",
             "lineWidth": 1,
             "pointSize": 5,
@@ -885,10 +848,10 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "editorMode": "code",
-          "expr": "vllm:gpu_cache_usage_perc",
+          "expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}",
           "instant": false,
           "legendFormat": "GPU Cache Usage",
           "range": true,
@@ -897,10 +860,10 @@
         {
           "datasource": {
             "type": "prometheus",
-            "uid": "${DS_PROMETHEUS}"
+            "uid": "prometheus"
           },
           "editorMode": "code",
-          "expr": "vllm:cpu_cache_usage_perc",
+          "expr": "vllm:cpu_cache_usage_perc{model_name=\"$model_name\"}",
           "hide": false,
           "instant": false,
           "legendFormat": "CPU Cache Usage",
@@ -913,10 +876,39 @@
     }
   ],
   "refresh": "",
-  "schemaVersion": 39,
+  "schemaVersion": 37,
+  "style": "dark",
   "tags": [],
   "templating": {
-    "list": []
+    "list": [
+      {
+        "current": {
+          "selected": false,
+          "text": "vllm",
+          "value": "vllm"
+        },
+        "datasource": {
+          "type": "prometheus",
+          "uid": "prometheus"
+        },
+        "definition": "label_values(model_name)",
+        "hide": 0,
+        "includeAll": false,
+        "label": "model_name",
+        "multi": false,
+        "name": "model_name",
+        "options": [],
+        "query": {
+          "query": "label_values(model_name)",
+          "refId": "StandardVariableQuery"
+        },
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      }
+    ]
   },
   "time": {
     "from": "now-5m",

From 8fe838659164b415d7f3044ec6b7e5bc52c6b6a5 Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@gmail.com>
Date: Thu, 14 Mar 2024 01:11:48 -0700
Subject: [PATCH 098/196] [Kernel] change benchmark script so that result can
 be directly used; tune moe kernel in A100/H100 with tp=2,4,8 (#3389)

---
 benchmarks/kernels/benchmark_mixtral_moe.py   |  30 ++--
 .../layers/fused_moe/__init__.py              |   6 +-
 ...792,device_name=NVIDIA_A100-SXM4-80GB.json | 146 +++++++++++++++
 ...792,device_name=NVIDIA_H100_80GB_HBM3.json | 146 +++++++++++++++
 ...584,device_name=NVIDIA_A100-SXM4-80GB.json | 162 +++++++++++++++--
 ...584,device_name=NVIDIA_H100_80GB_HBM3.json | 146 +++++++++++++++
 ...168,device_name=NVIDIA_A100-SXM4-80GB.json | 146 +++++++++++++++
 ...168,device_name=NVIDIA_H100_80GB_HBM3.json | 166 +++++++++++++++---
 .../layers/fused_moe/fused_moe.py             |  10 +-
 9 files changed, 903 insertions(+), 55 deletions(-)
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json

diff --git a/benchmarks/kernels/benchmark_mixtral_moe.py b/benchmarks/kernels/benchmark_mixtral_moe.py
index 9e08df76947f8..964eca5aaf72b 100644
--- a/benchmarks/kernels/benchmark_mixtral_moe.py
+++ b/benchmarks/kernels/benchmark_mixtral_moe.py
@@ -2,13 +2,13 @@
 import os
 import sys
 
-os.environ['CUDA_VISIBLE_DEVICES'] = '0'
-
-from vllm.model_executor.layers.fused_moe import fused_moe
+from vllm.model_executor.layers.fused_moe import fused_moe, get_config_file_name
 import torch
 import torch.nn.functional as F
 import triton
 
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+
 
 def main():
     method = fused_moe
@@ -64,7 +64,7 @@ def run_grid(bs, method):
         print(f'{tp_size=} {bs=}')
         print(f'{config}')
         # warmup
-        print(f'warming up')
+        print('warming up')
         try:
             for _ in range(num_warmup_trials):
                 run_timing(
@@ -82,7 +82,7 @@ def run_grid(bs, method):
             continue
 
         # trial
-        print(f'benchmarking')
+        print('benchmarking')
         for _ in range(num_trials):
             kernel_dur_ms = run_timing(
                 num_calls=num_calls,
@@ -103,17 +103,25 @@ def run_grid(bs, method):
                 best_config = config
                 best_time_us = kernel_dur_us
 
-            print(
-                f'{kernel_dur_us=:.1f} {model_dur_ms=:.1f} {bs=} {tp_size=} {top_k=} {num_total_experts=} {d_model=} {model_intermediate_size=} {num_layers=}'
-            )
+            print(f'{kernel_dur_us=:.1f} {model_dur_ms=:.1f}'
+                  f' {bs=} {tp_size=} {top_k=} {num_total_experts=} '
+                  f'{d_model=} {model_intermediate_size=} {num_layers=}')
 
     print("best_time_us", best_time_us)
     print("best_config", best_config)
 
-    filename = "/tmp/config.jsonl"
+    # holds Dict[str, Dict[str, int]]
+    filename = get_config_file_name(num_total_experts,
+                                    model_intermediate_size // tp_size)
     print(f"writing config to file {filename}")
-    with open(filename, "a") as f:
-        f.write(json.dumps({str(bs): best_config}) + "\n")
+    existing_content = {}
+    if os.path.exists(filename):
+        with open(filename, "r") as f:
+            existing_content = json.load(f)
+    existing_content[str(bs)] = best_config
+    with open(filename, "w") as f:
+        json.dump(existing_content, f, indent=4)
+        f.write("\n")
 
 
 def run_timing(num_calls: int, bs: int, d_model: int, num_total_experts: int,
diff --git a/vllm/model_executor/layers/fused_moe/__init__.py b/vllm/model_executor/layers/fused_moe/__init__.py
index 1391d43c8abeb..299ab44f8f3d5 100644
--- a/vllm/model_executor/layers/fused_moe/__init__.py
+++ b/vllm/model_executor/layers/fused_moe/__init__.py
@@ -1,5 +1,9 @@
-from vllm.model_executor.layers.fused_moe.fused_moe import fused_moe
+from vllm.model_executor.layers.fused_moe.fused_moe import (
+    fused_moe,
+    get_config_file_name,
+)
 
 __all__ = [
     "fused_moe",
+    "get_config_file_name",
 ]
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
new file mode 100644
index 0000000000000..5c8185cfdeec1
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_A100-SXM4-80GB.json
@@ -0,0 +1,146 @@
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "64": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "512": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json
new file mode 100644
index 0000000000000..97c9f4445b166
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json
@@ -0,0 +1,146 @@
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "64": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "512": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
index 1fefb5ff7e42d..edf2a38d12ad3 100644
--- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_A100-SXM4-80GB.json
@@ -1,20 +1,146 @@
 {
-    "1": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "2": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 7},
-    "4": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 6},
-    "8": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 7},
-    "16": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 7},
-    "24": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "32": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "64": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "96": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 16, "num_warps": 4, "num_stages": 4},
-    "128": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 6},
-    "192": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 6},
-    "256": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 16, "num_warps": 8, "num_stages": 4},
-    "512": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 32, "num_warps": 8, "num_stages": 4},
-    "1024": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
-    "1536": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 16, "num_warps": 8, "num_stages": 4},
-    "2048": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 16, "num_warps": 4, "num_stages": 4},
-    "3072": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 1, "num_warps": 8, "num_stages": 4},
-    "4096": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 32, "GROUP_SIZE_M": 16, "num_warps": 8, "num_stages": 4}
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "64": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "512": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    }
 }
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json
new file mode 100644
index 0000000000000..b2100cebb7f58
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=NVIDIA_H100_80GB_HBM3.json
@@ -0,0 +1,146 @@
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "64": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "512": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
new file mode 100644
index 0000000000000..f578c8d0160ac
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_A100-SXM4-80GB.json
@@ -0,0 +1,146 @@
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "64": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "512": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    }
+}
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
index 64d49ca66c1c8..e341a67917d51 100644
--- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=NVIDIA_H100_80GB_HBM3.json
@@ -1,24 +1,146 @@
 {
-    "1": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4},
-    "2": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "4": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "8": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 8, "num_stages": 4},
-    "16": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4},
-    "24": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 4, "num_warps": 4, "num_stages": 4},
-    "32": {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "80": {"BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "96": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "128": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "192": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4},
-    "200": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 4},
-    "208": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 2, "num_warps": 4, "num_stages": 4},
-    "216": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 16, "num_warps": 4, "num_stages": 4},
-    "224": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 32, "num_warps": 4, "num_stages": 4},
-    "256": {"BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 32, "num_warps": 4, "num_stages": 4},
-    "512": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
-    "1024": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
-    "1536": {"BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
-    "2048": {"BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
-    "3072": {"BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4},
-    "4096": {"BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 64, "num_warps": 8, "num_stages": 4}
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "48": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "64": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "96": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "128": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "256": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "512": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 64,
+        "num_warps": 8,
+        "num_stages": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 8,
+        "num_stages": 4
+    }
 }
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 3e6dd0dfe2eb3..1ec09f0cd4c28 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -245,6 +245,11 @@ def invoke_fused_moe_kernel(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor,
     )
 
 
+def get_config_file_name(E: int, N: int) -> str:
+    device_name = torch.cuda.get_device_name().replace(" ", "_")
+    return f"E={E},N={N},device_name={device_name}.json"
+
+
 @functools.lru_cache
 def get_moe_configs(E: int, N: int) -> Optional[Dict[int, Any]]:
     """
@@ -258,11 +263,10 @@ def get_moe_configs(E: int, N: int) -> Optional[Dict[int, Any]]:
 
     # First look up if an optimized configuration is available in the configs
     # directory
-    device_name = torch.cuda.get_device_name().replace(" ", "_")
+    json_file_name = get_config_file_name(E, N)
 
     config_file_path = os.path.join(
-        os.path.dirname(os.path.realpath(__file__)), "configs",
-        f"E={E},N={N},device_name={device_name}.json")
+        os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name)
     if os.path.exists(config_file_path):
         with open(config_file_path) as f:
             logger.info(

From 06ec486794f42db656c3cc16c8c5ed56ce4f696b Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tom.parnell@gmail.com>
Date: Thu, 14 Mar 2024 18:55:54 +0100
Subject: [PATCH 099/196] Install `flash_attn` in Docker image (#3396)

---
 Dockerfile | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index 18770f994ebd2..8be03b3567f0e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -57,6 +57,22 @@ ENV VLLM_INSTALL_PUNICA_KERNELS=1
 RUN python3 setup.py build_ext --inplace
 #################### EXTENSION Build IMAGE ####################
 
+#################### FLASH_ATTENTION Build IMAGE ####################
+FROM dev as flash-attn-builder
+# max jobs used for build
+ARG max_jobs=2
+ENV MAX_JOBS=${max_jobs}
+# flash attention version
+ARG flash_attn_version=v2.5.6
+ENV FLASH_ATTN_VERSION=${flash_attn_version}
+
+WORKDIR /usr/src/flash-attention-v2
+
+# Download the wheel or build it if a pre-compiled release doesn't exist
+RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
+    --no-build-isolation --no-deps --no-cache-dir
+
+#################### FLASH_ATTENTION Build IMAGE ####################
 
 #################### TEST IMAGE ####################
 # image to run unit testing suite
@@ -68,6 +84,9 @@ WORKDIR /vllm-workspace
 # ADD is used to preserve directory structure
 ADD . /vllm-workspace/
 COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
+# Install flash attention (from pre-built wheel)
+RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
+    pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
 # ignore build dependencies installation because we are using pre-complied extensions
 RUN rm pyproject.toml
 RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install . --verbose
@@ -88,6 +107,11 @@ WORKDIR /workspace
 COPY requirements.txt requirements.txt
 RUN --mount=type=cache,target=/root/.cache/pip \
     pip install -r requirements.txt
+
+# Install flash attention (from pre-built wheel)
+RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
+    pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
+
 #################### RUNTIME BASE IMAGE ####################
 
 

From c17ca8ef186b5e90a500d3e37724b220944450f7 Mon Sep 17 00:00:00 2001
From: Dan Clark <44146800+declark1@users.noreply.github.com>
Date: Thu, 14 Mar 2024 13:11:45 -0700
Subject: [PATCH 100/196] Add args for mTLS support (#3410)

Co-authored-by: Daniel Clark <daniel.clark@ibm.com>
---
 vllm/entrypoints/api_server.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py
index 86b6c4c67cfa4..5130586e036b2 100644
--- a/vllm/entrypoints/api_server.py
+++ b/vllm/entrypoints/api_server.py
@@ -82,6 +82,14 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
     parser.add_argument("--port", type=int, default=8000)
     parser.add_argument("--ssl-keyfile", type=str, default=None)
     parser.add_argument("--ssl-certfile", type=str, default=None)
+    parser.add_argument("--ssl-ca-certs",
+                        type=str,
+                        default=None,
+                        help="The CA certificates file")
+    parser.add_argument("--ssl-cert-reqs",
+                        type=int,
+                        default=0,
+                        help="Whether client certificate is required")
     parser.add_argument(
         "--root-path",
         type=str,
@@ -100,4 +108,6 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
                 log_level="debug",
                 timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
                 ssl_keyfile=args.ssl_keyfile,
-                ssl_certfile=args.ssl_certfile)
+                ssl_certfile=args.ssl_certfile,
+                ssl_ca_certs=args.ssl_ca_certs,
+                ssl_cert_reqs=args.ssl_cert_reqs)

From dfc77408bdca19308cbb28a54dfe697442fbf335 Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@126.com>
Date: Thu, 14 Mar 2024 13:16:00 -0700
Subject: [PATCH 101/196] [issue templates] add some issue templates (#3412)

---
 .github/ISSUE_TEMPLATE/100-documentation.yml  |  22 +
 .github/ISSUE_TEMPLATE/200-installation.yml   |  39 +
 .github/ISSUE_TEMPLATE/300-usage.yml          |  37 +
 .github/ISSUE_TEMPLATE/400-bug report.yml     |  81 +++
 .../ISSUE_TEMPLATE/500-feature request.yml    |  31 +
 .github/ISSUE_TEMPLATE/600-new model.yml      |  33 +
 .../700-performance discussion.yml            |  51 ++
 .../ISSUE_TEMPLATE/800-misc discussion.yml    |  21 +
 .github/ISSUE_TEMPLATE/config.yml             |   1 +
 .yapfignore                                   |   1 +
 collect_env.py                                | 688 ++++++++++++++++++
 11 files changed, 1005 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/100-documentation.yml
 create mode 100644 .github/ISSUE_TEMPLATE/200-installation.yml
 create mode 100644 .github/ISSUE_TEMPLATE/300-usage.yml
 create mode 100644 .github/ISSUE_TEMPLATE/400-bug report.yml
 create mode 100644 .github/ISSUE_TEMPLATE/500-feature request.yml
 create mode 100644 .github/ISSUE_TEMPLATE/600-new model.yml
 create mode 100644 .github/ISSUE_TEMPLATE/700-performance discussion.yml
 create mode 100644 .github/ISSUE_TEMPLATE/800-misc discussion.yml
 create mode 100644 .github/ISSUE_TEMPLATE/config.yml
 create mode 100644 .yapfignore
 create mode 100644 collect_env.py

diff --git a/.github/ISSUE_TEMPLATE/100-documentation.yml b/.github/ISSUE_TEMPLATE/100-documentation.yml
new file mode 100644
index 0000000000000..7ef052a525963
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/100-documentation.yml
@@ -0,0 +1,22 @@
+name: 📚 Documentation
+description: Report an issue related to https://docs.vllm.ai/
+title: "[Doc]: "
+labels: ["doc"]
+
+body:
+- type: textarea
+  attributes:
+    label: 📚 The doc issue
+    description: >
+      A clear and concise description of what content in https://docs.vllm.ai/ is an issue.
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Suggest a potential alternative/fix
+    description: >
+      Tell us how we could improve the documentation in this regard.
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/200-installation.yml b/.github/ISSUE_TEMPLATE/200-installation.yml
new file mode 100644
index 0000000000000..4c6c96187cc6c
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/200-installation.yml
@@ -0,0 +1,39 @@
+name: 🛠️ Installation
+description: Report an issue here when you hit errors during installation.
+title: "[Installation]: "
+labels: ["installation"]
+
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue+sort%3Acreated-desc+).
+- type: textarea
+  attributes:
+    label: Your current environment
+    description: |
+      Please run the following and paste the output below.
+      ```sh
+      wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
+      # For security purposes, please feel free to check the contents of collect_env.py before running it.
+      python collect_env.py
+      ```
+    value: |
+      ```text
+      The output of `python collect_env.py`
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: How you are installing vllm
+    description: |
+      Paste the full command you are trying to execute.
+    value: |
+      ```sh
+      pip install -vvv vllm
+      ```
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/300-usage.yml b/.github/ISSUE_TEMPLATE/300-usage.yml
new file mode 100644
index 0000000000000..88227b4b2e7b9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/300-usage.yml
@@ -0,0 +1,37 @@
+name: 💻 Usage
+description: Raise an issue here if you don't know how to use vllm.
+title: "[Usage]: "
+labels: ["usage"]
+
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue+sort%3Acreated-desc+).
+- type: textarea
+  attributes:
+    label: Your current environment
+    description: |
+      Please run the following and paste the output below.
+      ```sh
+      wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
+      # For security purposes, please feel free to check the contents of collect_env.py before running it.
+      python collect_env.py
+      ```
+    value: |
+      ```text
+      The output of `python collect_env.py`
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: How would you like to use vllm
+    description: |
+      A detailed description of how you want to use vllm.
+    value: |
+      I want to run inference of a [specific model](put link here). I don't know how to integrate it with vllm.
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/400-bug report.yml b/.github/ISSUE_TEMPLATE/400-bug report.yml
new file mode 100644
index 0000000000000..f1124dfa78bbc
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/400-bug report.yml	
@@ -0,0 +1,81 @@
+name: 🐛 Bug report
+description: Raise an issue here if you find a bug.
+title: "[Bug]: "
+labels: ["bug"]
+
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue+sort%3Acreated-desc+).
+- type: textarea
+  attributes:
+    label: Your current environment
+    description: |
+      Please run the following and paste the output below.
+      ```sh
+      wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
+      # For security purposes, please feel free to check the contents of collect_env.py before running it.
+      python collect_env.py
+      ```
+    value: |
+      ```text
+      The output of `python collect_env.py`
+      ```
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: 🐛 Describe the bug
+    description: |
+      Please provide a clear and concise description of what the bug is.
+
+      If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc. For example:
+
+      ```python
+      from vllm import LLM, SamplingParams
+
+      prompts = [
+          "Hello, my name is",
+          "The president of the United States is",
+          "The capital of France is",
+          "The future of AI is",
+      ]
+      sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+      llm = LLM(model="facebook/opt-125m")
+
+      outputs = llm.generate(prompts, sampling_params)
+
+      # Print the outputs.
+      for output in outputs:
+          prompt = output.prompt
+          generated_text = output.outputs[0].text
+          print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+      ```
+
+      If the code is too long (hopefully, it isn't), feel free to put it in a public gist and link it in the issue: https://gist.github.com.
+
+      Please also paste or describe the results you observe instead of the expected results. If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
+    placeholder: |
+      A clear and concise description of what the bug is.
+
+      ```python
+      # Sample code to reproduce the problem
+      ```
+
+      ```
+      The error message you got, with the full traceback.
+      ```
+  validations:
+    required: true
+- type: markdown
+  attributes:
+    value: >
+      ⚠️ Please separate bugs of `transformers` implementation or usage from bugs of `vllm`. If you think anything is wrong with the models' output:
+
+      - Try the counterpart of `transformers` first. If the error appears, please go to [their issues](https://github.com/huggingface/transformers/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc).
+
+      - If the error only appears in vllm, please provide the detailed script of how you run `transformers` and `vllm`, also highlight the difference and what you expect.
+
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/500-feature request.yml b/.github/ISSUE_TEMPLATE/500-feature request.yml
new file mode 100644
index 0000000000000..0dd5a3e5d14de
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/500-feature request.yml	
@@ -0,0 +1,31 @@
+name: 🚀 Feature request
+description: Submit a proposal/request for a new vllm feature
+title: "[Feature]: "
+labels: ["feature"]
+
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue+sort%3Acreated-desc+).
+- type: textarea
+  attributes:
+    label: 🚀 The feature, motivation and pitch
+    description: >
+      A clear and concise description of the feature proposal. Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too.
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: Alternatives
+    description: >
+      A description of any alternative solutions or features you've considered, if any.
+- type: textarea
+  attributes:
+    label: Additional context
+    description: >
+      Add any other context or screenshots about the feature request.
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/600-new model.yml b/.github/ISSUE_TEMPLATE/600-new model.yml
new file mode 100644
index 0000000000000..bbddbfd67138a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/600-new model.yml	
@@ -0,0 +1,33 @@
+name: 🤗 Support request for a new model from huggingface
+description: Submit a proposal/request for a new model from huggingface
+title: "[New Model]: "
+labels: ["new model"]
+
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue+sort%3Acreated-desc+).
+
+      #### We also highly recommend you read https://docs.vllm.ai/en/latest/models/adding_model.html first to understand how to add a new model.
+- type: textarea
+  attributes:
+    label: The model to consider.
+    description: >
+      A huggingface url, pointing to the model, e.g. https://huggingface.co/openai-community/gpt2 .
+  validations:
+    required: true
+- type: textarea
+  attributes:
+    label: The closest model vllm already supports.
+    description: >
+      Here is the list of models already supported by vllm: https://github.com/vllm-project/vllm/tree/main/vllm/model_executor/models . Which model is the most similar to the model you want to add support for?
+- type: textarea
+  attributes:
+    label: What's your difficulty of supporting the model you want?
+    description: >
+      For example, any new operators or new architecture?
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/700-performance discussion.yml b/.github/ISSUE_TEMPLATE/700-performance discussion.yml
new file mode 100644
index 0000000000000..9e8e7b4aa3530
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/700-performance discussion.yml	
@@ -0,0 +1,51 @@
+name: ⚡ Discussion on the performance of vllm
+description: Submit a proposal/discussion about the performance of vllm
+title: "[Performance]: "
+labels: ["performance"]
+
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue+sort%3Acreated-desc+).
+- type: textarea
+  attributes:
+    label: Proposal to improve performance
+    description: >
+      How do you plan to improve vllm's performance?
+  validations:
+    required: false
+- type: textarea
+  attributes:
+    label: Report of performance regression
+    description: >
+      Please provide detailed description of performance comparison to confirm the regression. You may want to run the benchmark script at https://github.com/vllm-project/vllm/tree/main/benchmarks .
+  validations:
+    required: false
+- type: textarea
+  attributes:
+    label: Misc discussion on performance
+    description: >
+      Anything about the performance.
+  validations:
+    required: false
+- type: textarea
+  attributes:
+    label: Your current environment (if you think it is necessary)
+    description: |
+      Please run the following and paste the output below.
+      ```sh
+      wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
+      # For security purposes, please feel free to check the contents of collect_env.py before running it.
+      python collect_env.py
+      ```
+    value: |
+      ```text
+      The output of `python collect_env.py`
+      ```
+  validations:
+    required: false
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/800-misc discussion.yml b/.github/ISSUE_TEMPLATE/800-misc discussion.yml
new file mode 100644
index 0000000000000..ddb10f72db293
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/800-misc discussion.yml	
@@ -0,0 +1,21 @@
+name: 🎲 Misc/random discussions that do not fit into the above categories.
+description: Submit a discussion as you like. Note that developers are heavily overloaded and we mainly rely on community users to answer these issues.
+title: "[Misc]: "
+labels: ["misc"]
+
+body:
+- type: markdown
+  attributes:
+    value: >
+      #### Before submitting an issue, please make sure the issue hasn't been already addressed by searching through [the existing and past issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue+sort%3Acreated-desc+).
+- type: textarea
+  attributes:
+    label: Anything you want to discuss about vllm.
+    description: >
+      Anything you want to discuss about vllm.
+  validations:
+    required: true
+- type: markdown
+  attributes:
+    value: >
+      Thanks for contributing 🎉!
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000000000..3ba13e0cec6cb
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false
diff --git a/.yapfignore b/.yapfignore
new file mode 100644
index 0000000000000..2d6dcf8380cac
--- /dev/null
+++ b/.yapfignore
@@ -0,0 +1 @@
+collect_env.py
diff --git a/collect_env.py b/collect_env.py
new file mode 100644
index 0000000000000..a886db693e2f1
--- /dev/null
+++ b/collect_env.py
@@ -0,0 +1,688 @@
+# code borrowed from https://github.com/pytorch/pytorch/blob/main/torch/utils/collect_env.py
+
+# Unlike the rest of the PyTorch this file must be python2 compliant.
+# This script outputs relevant system environment info
+# Run it with `python collect_env.py` or `python -m torch.utils.collect_env`
+import datetime
+import locale
+import re
+import subprocess
+import sys
+import os
+from collections import namedtuple
+
+
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except (ImportError, NameError, AttributeError, OSError):
+    TORCH_AVAILABLE = False
+
+# System Environment Information
+SystemEnv = namedtuple('SystemEnv', [
+    'torch_version',
+    'is_debug_build',
+    'cuda_compiled_version',
+    'gcc_version',
+    'clang_version',
+    'cmake_version',
+    'os',
+    'libc_version',
+    'python_version',
+    'python_platform',
+    'is_cuda_available',
+    'cuda_runtime_version',
+    'cuda_module_loading',
+    'nvidia_driver_version',
+    'nvidia_gpu_models',
+    'cudnn_version',
+    'pip_version',  # 'pip' or 'pip3'
+    'pip_packages',
+    'conda_packages',
+    'hip_compiled_version',
+    'hip_runtime_version',
+    'miopen_runtime_version',
+    'caching_allocator_config',
+    'is_xnnpack_available',
+    'cpu_info',
+    'rocm_version',  # vllm specific field
+    'neuron_sdk_version', # vllm specific field
+    'vllm_version',  # vllm specific field
+    'vllm_build_flags',  # vllm specific field
+    'gpu_topo',  # vllm specific field
+])
+
+DEFAULT_CONDA_PATTERNS = {
+    "torch",
+    "numpy",
+    "cudatoolkit",
+    "soumith",
+    "mkl",
+    "magma",
+    "triton",
+    "optree",
+}
+
+DEFAULT_PIP_PATTERNS = {
+    "torch",
+    "numpy",
+    "mypy",
+    "flake8",
+    "triton",
+    "optree",
+    "onnx",
+}
+
+
+def run(command):
+    """Return (return-code, stdout, stderr)."""
+    shell = True if type(command) is str else False
+    p = subprocess.Popen(command, stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE, shell=shell)
+    raw_output, raw_err = p.communicate()
+    rc = p.returncode
+    if get_platform() == 'win32':
+        enc = 'oem'
+    else:
+        enc = locale.getpreferredencoding()
+    output = raw_output.decode(enc)
+    err = raw_err.decode(enc)
+    return rc, output.strip(), err.strip()
+
+
+def run_and_read_all(run_lambda, command):
+    """Run command using run_lambda; reads and returns entire output if rc is 0."""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out
+
+
+def run_and_parse_first_match(run_lambda, command, regex):
+    """Run command using run_lambda, returns the first regex match if it exists."""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    match = re.search(regex, out)
+    if match is None:
+        return None
+    return match.group(1)
+
+def run_and_return_first_line(run_lambda, command):
+    """Run command using run_lambda and returns first line if output is not empty."""
+    rc, out, _ = run_lambda(command)
+    if rc != 0:
+        return None
+    return out.split('\n')[0]
+
+
+def get_conda_packages(run_lambda, patterns=None):
+    if patterns is None:
+        patterns = DEFAULT_CONDA_PATTERNS
+    conda = os.environ.get('CONDA_EXE', 'conda')
+    out = run_and_read_all(run_lambda, "{} list".format(conda))
+    if out is None:
+        return out
+
+    return "\n".join(
+        line
+        for line in out.splitlines()
+        if not line.startswith("#")
+        and any(name in line for name in patterns)
+    )
+
+def get_gcc_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
+
+def get_clang_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
+
+
+def get_cmake_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
+
+
+def get_nvidia_driver_version(run_lambda):
+    if get_platform() == 'darwin':
+        cmd = 'kextstat | grep -i cuda'
+        return run_and_parse_first_match(run_lambda, cmd,
+                                         r'com[.]nvidia[.]CUDA [(](.*?)[)]')
+    smi = get_nvidia_smi()
+    return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
+
+
+def get_gpu_info(run_lambda):
+    if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
+        if TORCH_AVAILABLE and torch.cuda.is_available():
+            if torch.version.hip is not None:
+                prop = torch.cuda.get_device_properties(0)
+                if hasattr(prop, "gcnArchName"):
+                    gcnArch = " ({})".format(prop.gcnArchName)
+                else:
+                    gcnArch = "NoGCNArchNameOnOldPyTorch"
+            else:
+                gcnArch = ""
+            return torch.cuda.get_device_name(None) + gcnArch
+        return None
+    smi = get_nvidia_smi()
+    uuid_regex = re.compile(r' \(UUID: .+?\)')
+    rc, out, _ = run_lambda(smi + ' -L')
+    if rc != 0:
+        return None
+    # Anonymize GPUs by removing their UUID
+    return re.sub(uuid_regex, '', out)
+
+
+def get_running_cuda_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
+
+
+def get_cudnn_version(run_lambda):
+    """Return a list of libcudnn.so; it's hard to tell which one is being used."""
+    if get_platform() == 'win32':
+        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+        cuda_path = os.environ.get('CUDA_PATH', "%CUDA_PATH%")
+        where_cmd = os.path.join(system_root, 'System32', 'where')
+        cudnn_cmd = '{} /R "{}\\bin" cudnn*.dll'.format(where_cmd, cuda_path)
+    elif get_platform() == 'darwin':
+        # CUDA libraries and drivers can be found in /usr/local/cuda/. See
+        # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install
+        # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac
+        # Use CUDNN_LIBRARY when cudnn library is installed elsewhere.
+        cudnn_cmd = 'ls /usr/local/cuda/lib/libcudnn*'
+    else:
+        cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev'
+    rc, out, _ = run_lambda(cudnn_cmd)
+    # find will return 1 if there are permission errors or if not found
+    if len(out) == 0 or (rc != 1 and rc != 0):
+        l = os.environ.get('CUDNN_LIBRARY')
+        if l is not None and os.path.isfile(l):
+            return os.path.realpath(l)
+        return None
+    files_set = set()
+    for fn in out.split('\n'):
+        fn = os.path.realpath(fn)  # eliminate symbolic links
+        if os.path.isfile(fn):
+            files_set.add(fn)
+    if not files_set:
+        return None
+    # Alphabetize the result because the order is non-deterministic otherwise
+    files = sorted(files_set)
+    if len(files) == 1:
+        return files[0]
+    result = '\n'.join(files)
+    return 'Probably one of the following:\n{}'.format(result)
+
+
+def get_nvidia_smi():
+    # Note: nvidia-smi is currently available only on Windows and Linux
+    smi = 'nvidia-smi'
+    if get_platform() == 'win32':
+        system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+        program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
+        legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
+        new_path = os.path.join(system_root, 'System32', smi)
+        smis = [new_path, legacy_path]
+        for candidate_smi in smis:
+            if os.path.exists(candidate_smi):
+                smi = '"{}"'.format(candidate_smi)
+                break
+    return smi
+
+
+def get_rocm_version(run_lambda):
+    """Returns the ROCm version if available, otherwise 'N/A'."""
+    return run_and_parse_first_match(run_lambda, 'hipcc --version', r'HIP version: (\S+)')
+
+
+def get_neuron_sdk_version(run_lambda):
+    # Adapted from your install script
+    try:
+        result = run_lambda(["neuron-ls"])
+        return result if result[0] == 0 else 'N/A'
+    except Exception:
+        return 'N/A'
+
+
+def get_vllm_version():
+    try:
+        import vllm
+        return vllm.__version__
+    except ImportError:
+        return 'N/A'
+
+
+def summarize_vllm_build_flags():
+    # This could be a static method if the flags are constant, or dynamic if you need to check environment variables, etc.
+    return 'CUDA Archs: {}; ROCm: {}; Neuron: {}'.format(
+        os.environ.get('TORCH_CUDA_ARCH_LIST', 'Not Set'),
+        'Enabled' if os.environ.get('ROCM_HOME') else 'Disabled',
+        'Enabled' if os.environ.get('NEURON_CORES') else 'Disabled',
+    )
+
+
+def get_gpu_topo(run_lambda):
+    if get_platform() == 'linux':
+        return run_and_read_all(run_lambda, 'nvidia-smi topo -m')
+    return None
+
+
+# example outputs of CPU infos
+#  * linux
+#    Architecture:            x86_64
+#      CPU op-mode(s):        32-bit, 64-bit
+#      Address sizes:         46 bits physical, 48 bits virtual
+#      Byte Order:            Little Endian
+#    CPU(s):                  128
+#      On-line CPU(s) list:   0-127
+#    Vendor ID:               GenuineIntel
+#      Model name:            Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#        CPU family:          6
+#        Model:               106
+#        Thread(s) per core:  2
+#        Core(s) per socket:  32
+#        Socket(s):           2
+#        Stepping:            6
+#        BogoMIPS:            5799.78
+#        Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr
+#                             sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl
+#                             xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16
+#                             pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
+#                             hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp ibrs_enhanced
+#                             fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap
+#                             avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1
+#                             xsaves wbnoinvd ida arat avx512vbmi pku ospke avx512_vbmi2 gfni vaes vpclmulqdq
+#                             avx512_vnni avx512_bitalg tme avx512_vpopcntdq rdpid md_clear flush_l1d arch_capabilities
+#    Virtualization features:
+#      Hypervisor vendor:     KVM
+#      Virtualization type:   full
+#    Caches (sum of all):
+#      L1d:                   3 MiB (64 instances)
+#      L1i:                   2 MiB (64 instances)
+#      L2:                    80 MiB (64 instances)
+#      L3:                    108 MiB (2 instances)
+#    NUMA:
+#      NUMA node(s):          2
+#      NUMA node0 CPU(s):     0-31,64-95
+#      NUMA node1 CPU(s):     32-63,96-127
+#    Vulnerabilities:
+#      Itlb multihit:         Not affected
+#      L1tf:                  Not affected
+#      Mds:                   Not affected
+#      Meltdown:              Not affected
+#      Mmio stale data:       Vulnerable: Clear CPU buffers attempted, no microcode; SMT Host state unknown
+#      Retbleed:              Not affected
+#      Spec store bypass:     Mitigation; Speculative Store Bypass disabled via prctl and seccomp
+#      Spectre v1:            Mitigation; usercopy/swapgs barriers and __user pointer sanitization
+#      Spectre v2:            Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence
+#      Srbds:                 Not affected
+#      Tsx async abort:       Not affected
+#  * win32
+#    Architecture=9
+#    CurrentClockSpeed=2900
+#    DeviceID=CPU0
+#    Family=179
+#    L2CacheSize=40960
+#    L2CacheSpeed=
+#    Manufacturer=GenuineIntel
+#    MaxClockSpeed=2900
+#    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#    ProcessorType=3
+#    Revision=27142
+#
+#    Architecture=9
+#    CurrentClockSpeed=2900
+#    DeviceID=CPU1
+#    Family=179
+#    L2CacheSize=40960
+#    L2CacheSpeed=
+#    Manufacturer=GenuineIntel
+#    MaxClockSpeed=2900
+#    Name=Intel(R) Xeon(R) Platinum 8375C CPU @ 2.90GHz
+#    ProcessorType=3
+#    Revision=27142
+
+def get_cpu_info(run_lambda):
+    rc, out, err = 0, '', ''
+    if get_platform() == 'linux':
+        rc, out, err = run_lambda('lscpu')
+    elif get_platform() == 'win32':
+        rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
+        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE')
+    elif get_platform() == 'darwin':
+        rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
+    cpu_info = 'None'
+    if rc == 0:
+        cpu_info = out
+    else:
+        cpu_info = err
+    return cpu_info
+
+
+def get_platform():
+    if sys.platform.startswith('linux'):
+        return 'linux'
+    elif sys.platform.startswith('win32'):
+        return 'win32'
+    elif sys.platform.startswith('cygwin'):
+        return 'cygwin'
+    elif sys.platform.startswith('darwin'):
+        return 'darwin'
+    else:
+        return sys.platform
+
+
+def get_mac_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
+
+
+def get_windows_version(run_lambda):
+    system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
+    wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
+    findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
+    return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
+
+
+def get_lsb_version(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
+
+
+def check_release_file(run_lambda):
+    return run_and_parse_first_match(run_lambda, 'cat /etc/*-release',
+                                     r'PRETTY_NAME="(.*)"')
+
+
+def get_os(run_lambda):
+    from platform import machine
+    platform = get_platform()
+
+    if platform == 'win32' or platform == 'cygwin':
+        return get_windows_version(run_lambda)
+
+    if platform == 'darwin':
+        version = get_mac_version(run_lambda)
+        if version is None:
+            return None
+        return 'macOS {} ({})'.format(version, machine())
+
+    if platform == 'linux':
+        # Ubuntu/Debian based
+        desc = get_lsb_version(run_lambda)
+        if desc is not None:
+            return '{} ({})'.format(desc, machine())
+
+        # Try reading /etc/*-release
+        desc = check_release_file(run_lambda)
+        if desc is not None:
+            return '{} ({})'.format(desc, machine())
+
+        return '{} ({})'.format(platform, machine())
+
+    # Unknown platform
+    return platform
+
+
+def get_python_platform():
+    import platform
+    return platform.platform()
+
+
+def get_libc_version():
+    import platform
+    if get_platform() != 'linux':
+        return 'N/A'
+    return '-'.join(platform.libc_ver())
+
+
+def get_pip_packages(run_lambda, patterns=None):
+    """Return `pip list` output. Note: will also find conda-installed pytorch and numpy packages."""
+    if patterns is None:
+        patterns = DEFAULT_PIP_PATTERNS
+
+    # People generally have `pip` as `pip` or `pip3`
+    # But here it is invoked as `python -mpip`
+    def run_with_pip(pip):
+        out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
+        return "\n".join(
+            line
+            for line in out.splitlines()
+            if any(name in line for name in patterns)
+        )
+
+    pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
+    out = run_with_pip([sys.executable, '-mpip'])
+
+    return pip_version, out
+
+
+def get_cachingallocator_config():
+    ca_config = os.environ.get('PYTORCH_CUDA_ALLOC_CONF', '')
+    return ca_config
+
+
+def get_cuda_module_loading_config():
+    if TORCH_AVAILABLE and torch.cuda.is_available():
+        torch.cuda.init()
+        config = os.environ.get('CUDA_MODULE_LOADING', '')
+        return config
+    else:
+        return "N/A"
+
+
+def is_xnnpack_available():
+    if TORCH_AVAILABLE:
+        import torch.backends.xnnpack
+        return str(torch.backends.xnnpack.enabled)  # type: ignore[attr-defined]
+    else:
+        return "N/A"
+
+def get_env_info():
+    run_lambda = run
+    pip_version, pip_list_output = get_pip_packages(run_lambda)
+
+    if TORCH_AVAILABLE:
+        version_str = torch.__version__
+        debug_mode_str = str(torch.version.debug)
+        cuda_available_str = str(torch.cuda.is_available())
+        cuda_version_str = torch.version.cuda
+        if not hasattr(torch.version, 'hip') or torch.version.hip is None:  # cuda version
+            hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
+        else:  # HIP version
+            def get_version_or_na(cfg, prefix):
+                _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
+                return _lst[0] if _lst else 'N/A'
+
+            cfg = torch._C._show_config().split('\n')
+            hip_runtime_version = get_version_or_na(cfg, 'HIP Runtime')
+            miopen_runtime_version = get_version_or_na(cfg, 'MIOpen')
+            cuda_version_str = 'N/A'
+            hip_compiled_version = torch.version.hip
+    else:
+        version_str = debug_mode_str = cuda_available_str = cuda_version_str = 'N/A'
+        hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
+
+    sys_version = sys.version.replace("\n", " ")
+
+    conda_packages = get_conda_packages(run_lambda)
+
+    rocm_version = get_rocm_version(run_lambda)
+    neuron_sdk_version = get_neuron_sdk_version(run_lambda)
+    vllm_version = get_vllm_version()
+    vllm_build_flags = summarize_vllm_build_flags()
+    gpu_topo = get_gpu_topo(run_lambda)
+
+    return SystemEnv(
+        torch_version=version_str,
+        is_debug_build=debug_mode_str,
+        python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
+        python_platform=get_python_platform(),
+        is_cuda_available=cuda_available_str,
+        cuda_compiled_version=cuda_version_str,
+        cuda_runtime_version=get_running_cuda_version(run_lambda),
+        cuda_module_loading=get_cuda_module_loading_config(),
+        nvidia_gpu_models=get_gpu_info(run_lambda),
+        nvidia_driver_version=get_nvidia_driver_version(run_lambda),
+        cudnn_version=get_cudnn_version(run_lambda),
+        hip_compiled_version=hip_compiled_version,
+        hip_runtime_version=hip_runtime_version,
+        miopen_runtime_version=miopen_runtime_version,
+        pip_version=pip_version,
+        pip_packages=pip_list_output,
+        conda_packages=conda_packages,
+        os=get_os(run_lambda),
+        libc_version=get_libc_version(),
+        gcc_version=get_gcc_version(run_lambda),
+        clang_version=get_clang_version(run_lambda),
+        cmake_version=get_cmake_version(run_lambda),
+        caching_allocator_config=get_cachingallocator_config(),
+        is_xnnpack_available=is_xnnpack_available(),
+        cpu_info=get_cpu_info(run_lambda),
+        rocm_version=rocm_version,
+        neuron_sdk_version=neuron_sdk_version,
+        vllm_version=vllm_version,
+        vllm_build_flags=vllm_build_flags,
+        gpu_topo=gpu_topo,
+    )
+
+env_info_fmt = """
+PyTorch version: {torch_version}
+Is debug build: {is_debug_build}
+CUDA used to build PyTorch: {cuda_compiled_version}
+ROCM used to build PyTorch: {hip_compiled_version}
+
+OS: {os}
+GCC version: {gcc_version}
+Clang version: {clang_version}
+CMake version: {cmake_version}
+Libc version: {libc_version}
+
+Python version: {python_version}
+Python platform: {python_platform}
+Is CUDA available: {is_cuda_available}
+CUDA runtime version: {cuda_runtime_version}
+CUDA_MODULE_LOADING set to: {cuda_module_loading}
+GPU models and configuration: {nvidia_gpu_models}
+Nvidia driver version: {nvidia_driver_version}
+cuDNN version: {cudnn_version}
+HIP runtime version: {hip_runtime_version}
+MIOpen runtime version: {miopen_runtime_version}
+Is XNNPACK available: {is_xnnpack_available}
+
+CPU:
+{cpu_info}
+
+Versions of relevant libraries:
+{pip_packages}
+{conda_packages}
+""".strip()
+
+env_info_fmt += """
+ROCM Version: {rocm_version}
+Neuron SDK Version: {neuron_sdk_version}
+vLLM Version: {vllm_version}
+vLLM Build Flags:
+{vllm_build_flags}
+GPU Topology:
+{gpu_topo}
+""".strip()
+
+
+def pretty_str(envinfo):
+    def replace_nones(dct, replacement='Could not collect'):
+        for key in dct.keys():
+            if dct[key] is not None:
+                continue
+            dct[key] = replacement
+        return dct
+
+    def replace_bools(dct, true='Yes', false='No'):
+        for key in dct.keys():
+            if dct[key] is True:
+                dct[key] = true
+            elif dct[key] is False:
+                dct[key] = false
+        return dct
+
+    def prepend(text, tag='[prepend]'):
+        lines = text.split('\n')
+        updated_lines = [tag + line for line in lines]
+        return '\n'.join(updated_lines)
+
+    def replace_if_empty(text, replacement='No relevant packages'):
+        if text is not None and len(text) == 0:
+            return replacement
+        return text
+
+    def maybe_start_on_next_line(string):
+        # If `string` is multiline, prepend a \n to it.
+        if string is not None and len(string.split('\n')) > 1:
+            return '\n{}\n'.format(string)
+        return string
+
+    mutable_dict = envinfo._asdict()
+
+    # If nvidia_gpu_models is multiline, start on the next line
+    mutable_dict['nvidia_gpu_models'] = \
+        maybe_start_on_next_line(envinfo.nvidia_gpu_models)
+
+    # If the machine doesn't have CUDA, report some fields as 'No CUDA'
+    dynamic_cuda_fields = [
+        'cuda_runtime_version',
+        'nvidia_gpu_models',
+        'nvidia_driver_version',
+    ]
+    all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
+    all_dynamic_cuda_fields_missing = all(
+        mutable_dict[field] is None for field in dynamic_cuda_fields)
+    if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
+        for field in all_cuda_fields:
+            mutable_dict[field] = 'No CUDA'
+        if envinfo.cuda_compiled_version is None:
+            mutable_dict['cuda_compiled_version'] = 'None'
+
+    # Replace True with Yes, False with No
+    mutable_dict = replace_bools(mutable_dict)
+
+    # Replace all None objects with 'Could not collect'
+    mutable_dict = replace_nones(mutable_dict)
+
+    # If either of these are '', replace with 'No relevant packages'
+    mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
+    mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
+
+    # Tag conda and pip packages with a prefix
+    # If they were previously None, they'll show up as ie '[conda] Could not collect'
+    if mutable_dict['pip_packages']:
+        mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
+                                               '[{}] '.format(envinfo.pip_version))
+    if mutable_dict['conda_packages']:
+        mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
+                                                 '[conda] ')
+    mutable_dict['cpu_info'] = envinfo.cpu_info
+    return env_info_fmt.format(**mutable_dict)
+
+
+def get_pretty_env_info():
+    return pretty_str(get_env_info())
+
+
+def main():
+    print("Collecting environment information...")
+    output = get_pretty_env_info()
+    print(output)
+
+    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
+        minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
+        if sys.platform == "linux" and os.path.exists(minidump_dir):
+            dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
+            latest = max(dumps, key=os.path.getctime)
+            ctime = os.path.getctime(latest)
+            creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
+            msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
+                  "if this is related to your bug please include it when you file a report ***"
+            print(msg, file=sys.stderr)
+
+
+
+if __name__ == '__main__':
+    main()

From 54be8a0be2819340ce7c2d7993382559597f5665 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E5=BA=8F?= <chenxu2048@gmail.com>
Date: Fri, 15 Mar 2024 04:56:57 +0800
Subject: [PATCH 102/196] Fix assertion failure in Qwen 1.5 with prefix caching
 enabled (#3373)

Co-authored-by: Cade Daniel <edacih@gmail.com>
---
 tests/test_config.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 vllm/config.py       | 14 ++++++++++++--
 2 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_config.py

diff --git a/tests/test_config.py b/tests/test_config.py
new file mode 100644
index 0000000000000..13a9f76212679
--- /dev/null
+++ b/tests/test_config.py
@@ -0,0 +1,43 @@
+from vllm.config import ModelConfig
+
+
+def test_get_sliding_window():
+    TEST_SLIDING_WINDOW = 4096
+    # Test that the sliding window is correctly computed.
+    # For Qwen1.5/Qwen2, get_sliding_window() should be None
+    # when use_sliding_window is False.
+    qwen2_model_config = ModelConfig(
+        "Qwen/Qwen1.5-7B",
+        "Qwen/Qwen1.5-7B",
+        tokenizer_mode="auto",
+        trust_remote_code=False,
+        download_dir=None,
+        load_format="dummy",
+        seed=0,
+        dtype="float16",
+        revision=None,
+    )
+
+    qwen2_model_config.hf_config.use_sliding_window = False
+    qwen2_model_config.hf_config.sliding_window = TEST_SLIDING_WINDOW
+    assert qwen2_model_config.get_sliding_window() is None
+
+    qwen2_model_config.hf_config.use_sliding_window = True
+    assert qwen2_model_config.get_sliding_window() == TEST_SLIDING_WINDOW
+
+    mistral_model_config = ModelConfig(
+        "mistralai/Mistral-7B-v0.1",
+        "mistralai/Mistral-7B-v0.1",
+        tokenizer_mode="auto",
+        trust_remote_code=False,
+        download_dir=None,
+        load_format="dummy",
+        seed=0,
+        dtype="float16",
+        revision=None,
+    )
+    mistral_model_config.hf_config.sliding_window = None
+    assert mistral_model_config.get_sliding_window() is None
+
+    mistral_model_config.hf_config.sliding_window = TEST_SLIDING_WINDOW
+    assert mistral_model_config.get_sliding_window() == TEST_SLIDING_WINDOW
\ No newline at end of file
diff --git a/vllm/config.py b/vllm/config.py
index 319c1569f5e98..de687395a0001 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -103,6 +103,7 @@ def __init__(
             # download model from ModelScope hub,
             # lazy import so that modelscope is not required for normal use.
             from modelscope.hub.snapshot_download import snapshot_download  # pylint: disable=C
+
             if not os.path.exists(model):
                 model_path = snapshot_download(model_id=model,
                                                cache_dir=download_dir,
@@ -139,7 +140,7 @@ def _verify_load_format(self) -> None:
                 if (f not in rocm_not_supported_load_format)
             ]
             raise ValueError(
-                f"load format \'{load_format}\' is not supported in ROCm. "
+                f"load format '{load_format}' is not supported in ROCm. "
                 f"Supported load format are "
                 f"{rocm_supported_load_format}")
 
@@ -232,6 +233,15 @@ def verify_with_parallel_config(
                 f"({pipeline_parallel_size}).")
 
     def get_sliding_window(self) -> Optional[int]:
+        """Get the sliding window size, or None if disabled.
+        """
+
+        # Some models, like Qwen2 and Qwen1.5, use `use_sliding_window` in
+        # addition to sliding window size. We check if that field is present
+        # and if it's False, return None.
+        if (hasattr(self.hf_config, "use_sliding_window")
+                and not self.hf_config.use_sliding_window):
+            return None
         return getattr(self.hf_config, "sliding_window", None)
 
     def get_vocab_size(self) -> int:
@@ -624,7 +634,7 @@ def _get_and_verify_dtype(
             k for k, v in _STR_DTYPE_TO_TORCH_DTYPE.items()
             if (k not in _ROCM_NOT_SUPPORTED_DTYPE)
         ]
-        raise ValueError(f"dtype \'{dtype}\' is not supported in ROCm. "
+        raise ValueError(f"dtype '{dtype}' is not supported in ROCm. "
                          f"Supported dtypes are {rocm_supported_dtypes}")
 
     # Verify the dtype.

From b983ba35bd29f6d385efff8bedf80f7989c28d12 Mon Sep 17 00:00:00 2001
From: Enrique Shockwave <33002121+qeternity@users.noreply.github.com>
Date: Thu, 14 Mar 2024 23:26:19 +0000
Subject: [PATCH 103/196] fix marlin config repr (#3414)

---
 vllm/model_executor/layers/quantization/marlin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/quantization/marlin.py b/vllm/model_executor/layers/quantization/marlin.py
index 0c4f20d9e3a58..48e44445a4a20 100644
--- a/vllm/model_executor/layers/quantization/marlin.py
+++ b/vllm/model_executor/layers/quantization/marlin.py
@@ -47,7 +47,7 @@ def __init__(
         self.perm_len = 1024
 
     def __repr__(self) -> str:
-        return f"MarlinConfig(group_size={self.group_size}"
+        return f"MarlinConfig(group_size={self.group_size})"
 
     @classmethod
     def get_name(cls) -> str:

From 78b6c4845ac9aa57ccf7e42cf4c7d3c4cdef14cf Mon Sep 17 00:00:00 2001
From: akhoroshev <arthoroshev@gmail.com>
Date: Fri, 15 Mar 2024 04:18:07 +0300
Subject: [PATCH 104/196] Dynamically configure shared memory size for
 moe_align_block_size_kernel (#3376)

---
 csrc/moe_align_block_size_kernels.cu | 42 +++++++++++++++++++---------
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/csrc/moe_align_block_size_kernels.cu b/csrc/moe_align_block_size_kernels.cu
index de6a0ec0a972c..138615a4bfba0 100644
--- a/csrc/moe_align_block_size_kernels.cu
+++ b/csrc/moe_align_block_size_kernels.cu
@@ -7,10 +7,17 @@
 #include "cuda_compat.h"
 #include "dispatch_utils.h"
 
-const static size_t NUM_MAX_EXPERTS = 64;
 #define CEILDIV(x,y) (((x) + (y) - 1) / (y))
 
 namespace vllm {
+
+namespace {
+__device__ __forceinline__ int32_t index(int32_t total_col, int32_t row, int32_t col) {
+    // don't worry about overflow because num_experts is relatively small
+    return row * total_col + col;
+}
+}
+
 template <typename scalar_t>
 __global__ void moe_align_block_size_kernel(scalar_t *__restrict__ topk_ids, 
                                 int32_t *sorted_token_ids, 
@@ -21,10 +28,14 @@ __global__ void moe_align_block_size_kernel(scalar_t *__restrict__ topk_ids,
                                 size_t numel) {
     const size_t tokens_per_thread = CEILDIV(numel, blockDim.x);
     const size_t start_idx = threadIdx.x * tokens_per_thread;
-    __shared__ int32_t tokens_cnts[NUM_MAX_EXPERTS + 1][NUM_MAX_EXPERTS];
-    __shared__ int32_t cumsum[NUM_MAX_EXPERTS + 1];
+
+    extern __shared__ int32_t shared_mem[];
+
+    int32_t* tokens_cnts = shared_mem; // 2d tensor with shape (num_experts + 1, num_experts)
+    int32_t* cumsum = shared_mem + (num_experts + 1) * num_experts; // 1d tensor with shape (num_experts + 1)
+
     for (int i = 0; i < num_experts; ++i) {
-        tokens_cnts[threadIdx.x + 1][i] = 0;
+        tokens_cnts[index(num_experts, threadIdx.x + 1, i)] = 0;
     }
 
     /**
@@ -33,15 +44,15 @@ __global__ void moe_align_block_size_kernel(scalar_t *__restrict__ topk_ids,
     * to expert expert_index.
     */
     for (int i = start_idx; i < numel && i < start_idx + tokens_per_thread; ++i) {
-        ++tokens_cnts[threadIdx.x + 1][topk_ids[i]]; 
+        ++tokens_cnts[index(num_experts, threadIdx.x + 1, topk_ids[i])]; 
     }
 
     __syncthreads();
 
     // For each expert we accumulate the token counts from the different threads.
-    tokens_cnts[0][threadIdx.x] = 0;
+    tokens_cnts[index(num_experts, 0, threadIdx.x)] = 0;
     for (int i = 1; i <= blockDim.x; ++i) {
-        tokens_cnts[i][threadIdx.x] += tokens_cnts[i-1][threadIdx.x];
+        tokens_cnts[index(num_experts, i, threadIdx.x)] += tokens_cnts[index(num_experts, i-1, threadIdx.x)];
     }
 
     __syncthreads();
@@ -50,7 +61,7 @@ __global__ void moe_align_block_size_kernel(scalar_t *__restrict__ topk_ids,
     if (threadIdx.x == 0) {
         cumsum[0] = 0;
         for (int i = 1; i <= num_experts; ++i) {
-            cumsum[i] = cumsum[i-1] + CEILDIV(tokens_cnts[blockDim.x][i - 1], block_size) * block_size;
+            cumsum[i] = cumsum[i-1] + CEILDIV(tokens_cnts[index(num_experts, blockDim.x, i - 1)], block_size) * block_size;
         }
         *total_tokens_post_pad = cumsum[num_experts];
     }
@@ -78,9 +89,9 @@ __global__ void moe_align_block_size_kernel(scalar_t *__restrict__ topk_ids,
         * stores the indices of the tokens processed by the expert with expert_id within
         * the current thread's token shard.
         */
-        int32_t rank_post_pad = tokens_cnts[threadIdx.x][expert_id] + cumsum[expert_id];
+        int32_t rank_post_pad = tokens_cnts[index(num_experts, threadIdx.x, expert_id)] + cumsum[expert_id];
         sorted_token_ids[rank_post_pad] = i;
-        ++tokens_cnts[threadIdx.x][expert_id];
+        ++tokens_cnts[index(num_experts, threadIdx.x, expert_id)];
     }
 }
 }
@@ -93,11 +104,16 @@ void moe_align_block_size(
     torch::Tensor experts_ids,
     torch::Tensor num_tokens_post_pad) {
     const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
-    assert(num_experts <= NUM_MAX_EXPERTS);
     VLLM_DISPATCH_INTEGRAL_TYPES(
         topk_ids.scalar_type(), "moe_align_block_size_kernel", [&] {
-        vllm::moe_align_block_size_kernel<scalar_t><<<1, num_experts, 0, stream>>>(
-            topk_ids.data_ptr<scalar_t>(), 
+        // calc needed amount of shared mem for `tokens_cnts` and `cumsum` tensors
+        const int32_t shared_mem = ((num_experts + 1) * num_experts + (num_experts + 1)) * sizeof(int32_t);
+
+        // set dynamic shared mem
+        auto kernel = vllm::moe_align_block_size_kernel<scalar_t>;
+        AT_CUDA_CHECK(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_mem));
+        kernel<<<1, num_experts, shared_mem, stream>>>(
+            topk_ids.data_ptr<scalar_t>(),
             sorted_token_ids.data_ptr<int32_t>(), 
             experts_ids.data_ptr<int32_t>(), 
             num_tokens_post_pad.data_ptr<int32_t>(), 

From b522c4476fcdaee254fe40fefb354a4908fccac5 Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@126.com>
Date: Thu, 14 Mar 2024 21:32:52 -0700
Subject: [PATCH 105/196] [Misc] add HOST_IP env var (#3419)

Co-authored-by: Simon Mo <simon.mo@hey.com>
---
 vllm/utils.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index fe6fd27962cd3..d4a8c962c3bfc 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -21,6 +21,7 @@
 from typing import Any, Hashable, Optional
 
 from vllm.logger import init_logger
+import warnings
 
 T = TypeVar("T")
 logger = init_logger(__name__)
@@ -172,16 +173,35 @@ def _async_wrapper(*args, **kwargs) -> asyncio.Future:
 
 
 def get_ip() -> str:
+    host_ip = os.environ.get("HOST_IP")
+    if host_ip:
+        return host_ip
+
+    # IP is not set, try to get it from the network interface
+
     # try ipv4
     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
     try:
         s.connect(("8.8.8.8", 80))  # Doesn't need to be reachable
         return s.getsockname()[0]
-    except OSError:
-        # try ipv6
+    except Exception:
+        pass
+
+    # try ipv6
+    try:
         s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM)
-        s.connect(("dns.google", 80))
+        # Google's public DNS server, see
+        # https://developers.google.com/speed/public-dns/docs/using#addresses
+        s.connect(("2001:4860:4860::8888", 80))  # Doesn't need to be reachable
         return s.getsockname()[0]
+    except Exception:
+        pass
+
+    warnings.warn(
+        "Failed to get the IP address, using 0.0.0.0 by default."
+        "The value can be set by the environment variable HOST_IP.",
+        stacklevel=2)
+    return "0.0.0.0"
 
 
 def get_distributed_init_method(ip: str, port: int) -> str:

From 21539e68563ae61d2be311d8b8e656fa039f5a5c Mon Sep 17 00:00:00 2001
From: Dinghow Yang <DinghowYang@gmail.com>
Date: Fri, 15 Mar 2024 14:19:02 +0800
Subject: [PATCH 106/196] Add chat templates for Falcon (#3420)

---
 examples/template_falcon.jinja      | 15 +++++++++++++++
 examples/template_falcon_180b.jinja | 17 +++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 examples/template_falcon.jinja
 create mode 100644 examples/template_falcon_180b.jinja

diff --git a/examples/template_falcon.jinja b/examples/template_falcon.jinja
new file mode 100644
index 0000000000000..01cf0e2670d0f
--- /dev/null
+++ b/examples/template_falcon.jinja
@@ -0,0 +1,15 @@
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- 'User: ' + message['content'] -}}
+    {%- elif message['role'] == 'assistant' -%}
+        {{- 'Assistant: ' + message['content'] -}}
+    {%- endif -%}
+    {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+        {{- '\n' -}}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- 'Assistant:' -}}
+{% endif %}
\ No newline at end of file
diff --git a/examples/template_falcon_180b.jinja b/examples/template_falcon_180b.jinja
new file mode 100644
index 0000000000000..f08f7395b7fd7
--- /dev/null
+++ b/examples/template_falcon_180b.jinja
@@ -0,0 +1,17 @@
+{%- for message in messages -%}
+    {%- if message['role'] == 'system' -%}
+        {{- 'System: ' + message['content'] -}}
+    {%- elif message['role'] == 'user' -%}
+        {{- 'User: ' + message['content'] -}}
+    {%- elif message['role'] == 'assistant' -%}
+        {{- 'Falcon: ' + message['content'] -}}
+    {%- endif -%}
+    {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+        {{- '\n' -}}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- 'Falcon:' -}}
+{% endif %}
\ No newline at end of file

From 253a98078a21a014c263bea9f99ae9234a263670 Mon Sep 17 00:00:00 2001
From: Dinghow Yang <DinghowYang@gmail.com>
Date: Fri, 15 Mar 2024 14:19:22 +0800
Subject: [PATCH 107/196] Add chat templates for ChatGLM (#3418)

---
 examples/template_chatglm.jinja  | 18 ++++++++++++++++++
 examples/template_chatglm2.jinja | 18 ++++++++++++++++++
 2 files changed, 36 insertions(+)
 create mode 100644 examples/template_chatglm.jinja
 create mode 100644 examples/template_chatglm2.jinja

diff --git a/examples/template_chatglm.jinja b/examples/template_chatglm.jinja
new file mode 100644
index 0000000000000..bf26f27274ef4
--- /dev/null
+++ b/examples/template_chatglm.jinja
@@ -0,0 +1,18 @@
+{%- set counter = namespace(index=0) -%}
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '[Round ' + counter.index|string + ']\n问：' + message['content'] -}}
+        {%- set counter.index = counter.index + 1 -%}
+    {%- endif -%}
+    {%- if message['role'] == 'assistant' -%}
+        {{- '\n答：' + message['content'] -}}
+        {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+            {{- '\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '\n答：' -}}
+{%- endif -%}
\ No newline at end of file
diff --git a/examples/template_chatglm2.jinja b/examples/template_chatglm2.jinja
new file mode 100644
index 0000000000000..c155b7c23f640
--- /dev/null
+++ b/examples/template_chatglm2.jinja
@@ -0,0 +1,18 @@
+{%- set counter = namespace(index=1) -%}
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '[Round ' + counter.index|string + ']\n\n问：' + message['content'] -}}
+        {%- set counter.index = counter.index + 1 -%}
+    {%- endif -%}
+    {%- if message['role'] == 'assistant' -%}
+        {{- '\n\n答：' + message['content'] -}}
+        {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+            {{- '\n\n' -}}
+        {%- endif -%}
+    {%- endif -%}
+{%- endfor -%}
+
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '\n\n答：' -}}
+{%- endif -%}
\ No newline at end of file

From 429284dc374bab79d4dfbb25053583901e6e5051 Mon Sep 17 00:00:00 2001
From: Junda Chen <32371474+GindaChen@users.noreply.github.com>
Date: Thu, 14 Mar 2024 23:25:05 -0700
Subject: [PATCH 108/196] Fix `dist.broadcast` stall without group argument
 (#3408)

---
 vllm/model_executor/parallel_utils/communication_op.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/parallel_utils/communication_op.py b/vllm/model_executor/parallel_utils/communication_op.py
index 521b6b8a383b0..6f00fd001d956 100644
--- a/vllm/model_executor/parallel_utils/communication_op.py
+++ b/vllm/model_executor/parallel_utils/communication_op.py
@@ -177,7 +177,7 @@ def broadcast_tensor_dict(
         for key, value in metadata_list:
             if isinstance(value, TensorMetadata):
                 tensor = tensor_dict[key]
-                torch.distributed.broadcast(tensor, src=src)
+                torch.distributed.broadcast(tensor, src=src, group=group)
     else:
         recv_metadata_list = [None]
         torch.distributed.broadcast_object_list(recv_metadata_list,

From a7c871680e622d998c9a4585404b0b88bad9ba92 Mon Sep 17 00:00:00 2001
From: Yang Fan <suyang.fy@alibaba-inc.com>
Date: Sat, 16 Mar 2024 00:36:53 +0800
Subject: [PATCH 109/196] Fix tie_word_embeddings for Qwen2. (#3344)

---
 vllm/model_executor/models/qwen2.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 3e4f843e649b4..12e0feddcb7f1 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -299,7 +299,11 @@ def __init__(
         self.config = config
         self.linear_method = linear_method
         self.model = Qwen2Model(config, linear_method)
-        self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
+
+        if not config.tie_word_embeddings:
+            self.lm_head = ParallelLMHead(config.vocab_size,
+                                          config.hidden_size)
+
         self.sampler = Sampler(config.vocab_size)
 
     def forward(
@@ -318,7 +322,11 @@ def sample(
         hidden_states: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
+        if self.config.tie_word_embeddings:
+            lm_head_weight = self.model.embed_tokens.weight
+        else:
+            lm_head_weight = self.lm_head.weight
+        next_tokens = self.sampler(lm_head_weight, hidden_states,
                                    sampling_metadata)
         return next_tokens
 
@@ -340,6 +348,8 @@ def load_weights(self,
                 model_name_or_path, cache_dir, load_format, revision):
             if "rotary_emb.inv_freq" in name:
                 continue
+            if self.config.tie_word_embeddings and "lm_head.weight" in name:
+                continue
             for (param_name, weight_name, shard_id) in stacked_params_mapping:
                 if weight_name not in name:
                     continue

From 03d37f24413b13a4e42ee115f89f647c441d1fcd Mon Sep 17 00:00:00 2001
From: Dan Clark <44146800+declark1@users.noreply.github.com>
Date: Fri, 15 Mar 2024 09:56:13 -0700
Subject: [PATCH 110/196] [Fix] Add args for mTLS support (#3430)

Co-authored-by: declark1 <daniel.clark@ibm.com>
---
 vllm/entrypoints/api_server.py        | 11 +++++++----
 vllm/entrypoints/openai/api_server.py | 15 ++++++++++++++-
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py
index 5130586e036b2..ba93b1beb2aa4 100644
--- a/vllm/entrypoints/api_server.py
+++ b/vllm/entrypoints/api_server.py
@@ -8,6 +8,7 @@
 
 import argparse
 import json
+import ssl
 from typing import AsyncGenerator
 
 from fastapi import FastAPI, Request
@@ -86,10 +87,12 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
                         type=str,
                         default=None,
                         help="The CA certificates file")
-    parser.add_argument("--ssl-cert-reqs",
-                        type=int,
-                        default=0,
-                        help="Whether client certificate is required")
+    parser.add_argument(
+        "--ssl-cert-reqs",
+        type=int,
+        default=int(ssl.CERT_NONE),
+        help="Whether client certificate is required (see stdlib ssl module's)"
+    )
     parser.add_argument(
         "--root-path",
         type=str,
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 00407bc0e809c..e0626ca4e9da1 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -5,6 +5,7 @@
 import os
 import importlib
 import inspect
+import ssl
 
 from prometheus_client import make_asgi_app
 import fastapi
@@ -124,6 +125,16 @@ def parse_args():
                         type=str,
                         default=None,
                         help="The file path to the SSL cert file")
+    parser.add_argument("--ssl-ca-certs",
+                        type=str,
+                        default=None,
+                        help="The CA certificates file")
+    parser.add_argument(
+        "--ssl-cert-reqs",
+        type=int,
+        default=int(ssl.CERT_NONE),
+        help="Whether client certificate is required (see stdlib ssl module's)"
+    )
     parser.add_argument(
         "--root-path",
         type=str,
@@ -262,4 +273,6 @@ async def authentication(request: Request, call_next):
                 log_level=args.uvicorn_log_level,
                 timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
                 ssl_keyfile=args.ssl_keyfile,
-                ssl_certfile=args.ssl_certfile)
+                ssl_certfile=args.ssl_certfile,
+                ssl_ca_certs=args.ssl_ca_certs,
+                ssl_cert_reqs=args.ssl_cert_reqs)

From 14b8ae02e74aa7223a25cf914b61e0a76e3cad87 Mon Sep 17 00:00:00 2001
From: Tao He <sighingnow@gmail.com>
Date: Sat, 16 Mar 2024 02:25:43 +0800
Subject: [PATCH 111/196] Fixes the misuse/mixuse of
 time.time()/time.monotonic() (#3220)

Signed-off-by: Tao He <sighingnow@gmail.com>
Co-authored-by: simon-mo <simon.mo@hey.com>
---
 vllm/core/scheduler.py                        | 2 +-
 vllm/engine/async_llm_engine.py               | 3 +--
 vllm/engine/llm_engine.py                     | 4 ++--
 vllm/entrypoints/openai/serving_chat.py       | 4 ++--
 vllm/entrypoints/openai/serving_completion.py | 2 +-
 5 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index 9255f91be55cb..c3f93a2928df5 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -160,7 +160,7 @@ def _schedule(self) -> SchedulerOutputs:
         blocks_to_copy: Dict[int, List[int]] = {}
 
         # Fix the current time.
-        now = time.monotonic()
+        now = time.time()
 
         # Join waiting sequences if possible.
         if not self.swapped:
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 0cee604c14d45..8bcd1e0ede6e5 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -604,8 +604,7 @@ async def generate(
             >>> ...
         """
         # Preprocess the request.
-        # This should not be used for logging, as it is monotonic time.
-        arrival_time = time.monotonic()
+        arrival_time = time.time()
 
         try:
             stream = await self.add_request(
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 4cdad4180aa14..691c9e83d59cc 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -244,7 +244,7 @@ def add_request(
             raise ValueError(f"Cannot request more than "
                              f"{max_logprobs} logprobs.")
         if arrival_time is None:
-            arrival_time = time.monotonic()
+            arrival_time = time.time()
         prompt_token_ids = self.encode_request(
             request_id=request_id,
             prompt=prompt,
@@ -628,7 +628,7 @@ def do_log_stats(self) -> None:
     def _get_stats(self,
                    scheduler_outputs: Optional[SchedulerOutputs]) -> Stats:
         """Get Stats to be Logged to Prometheus."""
-        now = time.monotonic()
+        now = time.time()
 
         # KV Cache Usage in %.
         num_total_gpu = self.cache_config.num_gpu_blocks
diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
index d2fb9ca001b15..bfdfe39f210ed 100644
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -103,7 +103,7 @@ async def chat_completion_stream_generator(
     ) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
 
         model_name = request.model
-        created_time = int(time.monotonic())
+        created_time = int(time.time())
         chunk_object_type = "chat.completion.chunk"
         first_iteration = True
 
@@ -244,7 +244,7 @@ async def chat_completion_full_generator(
             request_id: str) -> Union[ErrorResponse, ChatCompletionResponse]:
 
         model_name = request.model
-        created_time = int(time.monotonic())
+        created_time = int(time.time())
         final_res: RequestOutput = None
 
         async for res in result_generator:
diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index b78f053800f3c..bfd7c9b50cf32 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -118,7 +118,7 @@ async def create_completion(self, request: CompletionRequest,
 
         model_name = request.model
         request_id = f"cmpl-{random_uuid()}"
-        created_time = int(time.monotonic())
+        created_time = int(time.time())
 
         # Schedule the request and get the result generator.
         generators = []

From 604f235937684aa173afec96cca5b02e3b0bc154 Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@126.com>
Date: Fri, 15 Mar 2024 14:21:37 -0700
Subject: [PATCH 112/196] [Misc] add error message in non linux platform
 (#3438)

---
 setup.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index accf6bb400310..4e2bb2ce851f8 100644
--- a/setup.py
+++ b/setup.py
@@ -9,12 +9,17 @@
 
 from packaging.version import parse, Version
 import setuptools
+import sys
 import torch
 import torch.utils.cpp_extension as torch_cpp_ext
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME, ROCM_HOME
 
 ROOT_DIR = os.path.dirname(__file__)
 
+# vLLM only supports Linux platform
+assert sys.platform.startswith(
+    "linux"), "vLLM only supports Linux platform (including WSL)."
+
 # If you are developing the C++ backend of vLLM, consider building vLLM with
 # `python setup.py develop` since it will give you incremental builds.
 # The downside is that this method is deprecated, see
@@ -402,11 +407,13 @@ def get_vllm_version() -> str:
         if neuron_version != MAIN_CUDA_VERSION:
             neuron_version_str = neuron_version.replace(".", "")[:3]
             version += f"+neuron{neuron_version_str}"
-    else:
+    elif _is_cuda():
         cuda_version = str(nvcc_cuda_version)
         if cuda_version != MAIN_CUDA_VERSION:
             cuda_version_str = cuda_version.replace(".", "")[:3]
             version += f"+cu{cuda_version_str}"
+    else:
+        raise RuntimeError("Unknown runtime environment.")
 
     return version
 

From a7af4538ca92b53537f7869122f89d6a8ea44f7f Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 15 Mar 2024 21:26:00 +0000
Subject: [PATCH 113/196] Fix issue templates (#3436)

---
 .github/ISSUE_TEMPLATE/100-documentation.yml   | 2 +-
 .github/ISSUE_TEMPLATE/500-feature request.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/100-documentation.yml b/.github/ISSUE_TEMPLATE/100-documentation.yml
index 7ef052a525963..501c0aa48b887 100644
--- a/.github/ISSUE_TEMPLATE/100-documentation.yml
+++ b/.github/ISSUE_TEMPLATE/100-documentation.yml
@@ -1,7 +1,7 @@
 name: 📚 Documentation
 description: Report an issue related to https://docs.vllm.ai/
 title: "[Doc]: "
-labels: ["doc"]
+labels: ["documentation"]
 
 body:
 - type: textarea
diff --git a/.github/ISSUE_TEMPLATE/500-feature request.yml b/.github/ISSUE_TEMPLATE/500-feature request.yml
index 0dd5a3e5d14de..47a90628c76ce 100644
--- a/.github/ISSUE_TEMPLATE/500-feature request.yml	
+++ b/.github/ISSUE_TEMPLATE/500-feature request.yml	
@@ -1,7 +1,7 @@
 name: 🚀 Feature request
 description: Submit a proposal/request for a new vllm feature
 title: "[Feature]: "
-labels: ["feature"]
+labels: ["feature request"]
 
 body:
 - type: markdown

From 8fa7357f2d3171e3d373be865c8f9520e538c415 Mon Sep 17 00:00:00 2001
From: laneeee <55518470+laneeeee@users.noreply.github.com>
Date: Sat, 16 Mar 2024 07:06:09 +0800
Subject: [PATCH 114/196] fix document error for value and v_vec illustration
 (#3421)

---
 docs/source/assets/kernel/v_vec.png        | Bin 51256 -> 42452 bytes
 docs/source/assets/kernel/value.png        | Bin 121414 -> 171134 bytes
 docs/source/dev/kernel/paged_attention.rst |   2 +-
 3 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/assets/kernel/v_vec.png b/docs/source/assets/kernel/v_vec.png
index bac3c10949f6c55b60bd16e023485a9281dd7e9b..75d344ab933f2db650f6cb361f306790612bbf37 100644
GIT binary patch
literal 42452
zcmeFY1yo#Hw<fxwfWqB9L4&(H!IEGhcz`58a1T(p1q~7)K!R&<3YXx)li&m?+=4?D
zT0y(cx$^(_+}rQ{-F<Jr9&hxlv8vXn+I!cYbIrZhoZtM`-R#{8K>SEmLlr<n0|0f@
z3%FYaVpSk^uK_?)6W{^>02{zWBLzUH9vbQepwR-DfA#_35gOgU_Mf70{_`>D01#;h
zVEpqjCaBLp1a;*<&i?05P%hfvu9%DdFGpht<%0gDk52f<X?F*JgpRGt8y62-7guIs
zJ^?^NNkbFkkE^5P&pz9qUE(Q**trCt7GozE=i@$7De$ffAi+i_#1I0Z-3QP~&_E<;
zcf9};>Mk(R{**sYL%pG)gD^0$z}Psrc&Gy!hyipo5C|Ougo*jb-=GDceg`l}FiGzT
zDq@l8z69TQBNqxu%)@3;s_CY9Iu2(Qe&rsDgG)(8O+(AZ&cXSBOGH#mTtZSx`Jsxc
zn!3g#y=VFchDOFFmR8oUZEWrAJ>GbFc|&}B!`_BRL`Fr&Bqe`HNlp9sDLucSu;_Df
zNoiSaU427iQ*+CguRXne{R4wT-zFxfeooKK&cRmK);BhPZEf%DB92c^&(1H9zc2sL
z3uVuLQVaF|Pn!KjFA|hq=olCv4DcU%p`m;Kp*RT!<~>0yQbk?xOE<FnLLu1XN{M+j
z-8d}5PvI1=+{bY#Sw&Xa5Pzulr)K|{Vxj+2n*FO{f75FoPzJF66kIGUTtZx2TtZSp
zl#o)9{vlNKRDTNnKg<0;h2sz5`B%9^T?7pb2IJx4k>TQ!@iEad@%<0W-6F~@AKt+L
zd=MJSnLs3fJaG9rhBFua|F(a%0ny#q|Fy|q{DJ6h{C|T@j+r8uNDuvkyvybnU`1c%
zq^KW}K33ffUd&me^K%>3;+`o1@;j6eYH@-55QlzZyDxof^Ye=fAl!%Vg+$mj+Zc)l
z=>2;|1N;;)$#wsIqsXWTvbzJU7Vm&p`&ZsccYw`_+)+mT{%E6{g%E_Lf0=Z&56=P{
z(gDv&*Oj4T9-utpx*msK6^EVW5|!IRxsi1Uv}<sl>iIn;opwux=0K%z8XxSVCT6r<
zHOHBd(Xwmr@zb}6%FQ2lfTdHdQ0Ip(7DhbfgcPj+G{z~k^Kgnj2&iA|RsudnyI{|f
z#?BqpJ*Z(w7kthf!Psl)@bB;WKhooW!*@(~6aTdX-nO--7I|&C#=wW?0=5sDn^e1J
zr~MH@Lq!iDLVLhX%I0w@KwyBsBz!dPTE#7ZDXf94cnDCIq$2HZ6Lf0pC)_9Yotfw)
zY@<U6D@>=K^!YB&JH|S8C^o0Aq)Bw&tOYlpfo01FZgC;S%5dj~3K%kd#aN`9<Z(Gk
z2YbM`M`2;3zUOgj>!ZOuRRxWWscTn$m^Ps-4Z;gYT-w(dw7_vEjAx{SV?2G{JDqn(
zv#s$+IM#2gNT&@MgoyaAlGAtb<wuPshNc^<yhFz5GachE{w;uwt&4Npvmlyl{XF{)
ziAg5y=99F?E?gVojx2>OFNEYAEgMz*Mq~X0Rwd;a7wr?5BEs|=TN)egF$q1gw4QHr
zHUx(n0kXDeonU{P66EODN{{mQGhQBa-tX~!g1Ks2K*3tBZae9Sqgb&{QW?<z`DiNh
zERpZ!c?4|+7o@%8hX$@iRQ7Q4*p1{qyW5ctmhs9;=QdH9RsO>&TH9A#%Thd+5{^!T
zFfHLs+;YU^KnFR3GK?~;z#K25)%47VLicO9`lf163a;x9Fvb{|JGTcBv=&#d;SeN3
zNm_V^Lv;eIgDr!;=<B%$6d@MMvnMMx0>8sg7iVeYSz__?u~(Yi(`y2u(hM61=J
zKES39mE7i*@9w0QW87gRpQtaNKacTqP=A&3ko0|38OXgvajO}sgg8&X<wFLQnAmQO
zI<$JGFTcD4jO!~((Cz>SY9<rlcLamYns}eX++qIDwF$=pr%<sLv9S`F1KduLw}gdZ
zXABQ&9V|%X$alpq$acr=8-?U(MOv;NYJv6db=c0vzePi?#H`A&6d<|vgytc2`Z*fS
zYF|kRk+YWX)rSngvZ}HtW@1IFId4rMzh$&Nz1ERV2alPC8=-p&Og33cK_p1s1}6L;
z_tP)O+6$8iNfzaldcb*m-PaBq<#zx9a{O>nMt9wTTXJfpk1v0JH)z3}JWzkzobzW9
ze7WmJ1}V2|SGi>!cJQQ?&B#hf*b4tZI%0%~W@RQ3;?Q#k;INOS=^_j|-P5XY9Q+{P
zKFBZr;#dlTVc-Hsx1`9?-c&isJK!yo!Bm<-L#BGokJ_6&P(BSv?ZHMV5K5#xOYATa
zD0r>j3fGV8_>^yX^$1B?W#}3s*uO%A8NKR`y|UHd|9vIE<?9k1GR>|+g$H4Ke!w@!
zNA)Jp1gv2E3S7?EQ{)>tK*DNr2Sj!-&q86RC*;!9Op7(~%0lRVJ~0b!vzI!U>VeeP
zww<-K8?YaVi?v5atZkoO(`-4b4Hll$1v4O?5A<-izrI)(jlI2R;|ISW8L<N7wkr#t
zhzSnjg;}W0UppZ^#j^P~rtmeJctL#}=|mT<<e>rG1MU8kd5|r&<aKI9N4{oO*sw6^
zvle|@Mpjm44%Wm>8<acxAlg+fdZC0nhJXkWJy5zq!rQ|g_);Z6vI?f`lecWFmD?v;
zUsKHzD@0-;=I>jVs+Z@lIQ~)7Ii`%w{2BVg>ZYs8<emBHFBT;J?(oWi(9da_DH+dN
zn*@1FRk0p0c-6e_dd}yLEprDj<6F$-+c*$7FXSY$g>+q5y9S+-SZEuD+>ZKU_NT2r
zICDsu^^|Pt$L<zX6)TlDSWIwaJUjXT=&F$apU}|mCjV<Eaa5E*B!*!6>H<RMpPlil
z{wSKtY<n-Q`uwt*>46+_sCZi*=Nh~^|F*4WGUbpsw030p!BZ(>`V<V`U;<xJa<pHU
zX<y|deH%_;apxZ@We&|%jbFzK3l2%^e%;7!GiX@uIRabQx<cHywBjmwKep*&dCm^7
zzW>%;>+UfsyH}eZEMW#(g@(%T2a0NvX&Ui8qcFx#8hl0)mLNEb9~RFobh2{P<F{Ap
z!uKXc=tti|#bhqeLMGQBc_4bbSPXF7hkO7ZGLo3UQXCbh!U$A-LVG&z2jS&iZ)jAc
z*D1@tAB(9^4G<}C*>i$MWmMqMu3OG84mVm|yx(tjUcQe0cv=0FBB2FiS|zQUD3VgE
zRxEo4zb!$^jyjNw`oXn|?@w~H4EDIf??qXpkJaO%6TV%^b#Lx7kn<~m?T=63EUQD!
z!U0Jyif@ghH|DM{0vq<9>44KZyAvXk9&d@o=S4`6X;FoQ97h8Oqx2V5$7WROIge=R
zrxRsk`fIRh6l3qlasroV14Z__3XIYmT{)3N^68E4`FFsmXtri&DevWU@-;Q`^)ZfC
zd<*r8rUX4c(FcwA1;nWD87?PC%#7)A&ZFC&beMR}@mo2?=f$Z7Xl~m8^(j;|Y{k1L
ziXU^f+8cNNt+q%UW8mA3gjeck(C7KVEU2N6ei^QGA1EsmAV=l6Z_o?#V*t+oL)zWc
z|DfZCJ2U%aFR~h>3gG23*G6os?+V9r3!5MaBUfjiu0L;!8Ih#3r!l2Vcur&j%;o1b
zHAn1XA6CxFJS}<iay%tMkxADWHr-x;^cp?5T%7WHkIIk<g$6RwrgB~?(4HEo#zAeD
z?$=*=MS4>#NWx6SM(C=rcL|A<haJYyRLFJER4DLgh)wrTHjJ+Ov0Fd#{c?^Gp343j
z%$}d0ZR?ch_b!rTfHuwBGF)FbNkbN(NXbVgC?m+9YUVAeKoSKEZR3@agu@i*o;}kW
z#jMwK^SIwfi(l7*o9mkdp-3!*q$$k&Y7TqY5ph=O<<s#PtiPPqDX_9-x9z8cwfSEW
zP*M=Vm``rM|KiVT3QB8sp3iiipP^Y7RWqEf&%&BKxEw|q$r-1^0EWjH?3>LAdKy#u
zGRUTMuCK)IxMKQ*Z+k;I<>fBdccJ_=cl44Wj$q(OfiS;?Kzv|{U|LkLz%ISAy>D)0
zcD$|?NN(uZdw~w&h}qi)@?$trEy6E~*HZZTU=1@fr8~J*CZTre8p@Vo1EL`Xkm8<>
zdr7wS(%UYy%Zx9go17NkOENZ#pmnM|of4&EU?0&`d-~}`(_8Bek8m9=NqN2J3D-QR
z6><5LuKo__*ao%cr^nZFzL7Mb0Z-9-0_3_X|LHC0?&m-5!ih~z(!S@TPbS6{m^f{Z
z5PG+i=AGvhlZ53P^EV~ppUzz~$i5%{Ud*(VJW4;ch-ysF8vSZI*Jeggn4U=z$%~#E
zFD4X>Z+-h>o)CxO1eODZ$PXCE3AZDHLJl3@+3O78Bh0aWKd~>P@e0c(XY33qgUy89
zKG@49Y92ddFx75uY5zdutvWtH&w|0~9G&EizfrudK))P%?XiADwxIYKDKtH{Bjre0
z{oTzgzQsBYBaCYy^Kd?VC)6!{+h5PdX^klM3tx13{;0KGu?D1C9mMizWba!x(V||0
z{qP+SZoE!JXn*e8*|XQmjU}e59DrZRk;1brFj~)oPJ|IKL8$Fv78H9;wI*wD@d#lU
zak$br^JeykvlV#o6Se>M@hBSJU_Zv(l`$;v+x^NmSwynwSp8d5Ke{X_={j`$`-2tl
zjzhYx_T7$l%=gS+u4rTJnP`=kAfbItf@b!YsZ%a2a?f-*9zGBD!$-`z`*2MDldD0q
z<iwj|`VJ#_`k=Ex^}NH42e&Tsa!##RK9JL29q*tr$i!6!wLX|ig|7^vJK&`^!v>a)
zf1_<TohgtwBiX&9KngJqyf-S_Yc84aL@bVS%G5%j=9Q~5_*)8h=FG^YTCAuxt^@2S
z+@H*b9F}s}Dy58;aG`-=-A~TV{QQMwwT?$T_q<4){5t(*aNCS1uhi0#NHXQv!{-|J
zl*oLn@yW$-VAt!^Rd~1L&?x8A;infm*+VkQ9)bK$RVzOFmOS%?n2lIK(xVl6dL3}>
z{C3(BE4=euyH}mJIMwn~L4zHo5JeL^p)$IFaG1FdyjZJdLdI#`Ok6Wc*$HFXf!-mL
zv$xG7*(@-_5}~yGb%wI(0`>+r$i@2^_KP>n`A8VIux9Ih^RR0+6Wp+Gk<6lFdiqM8
zx9mH4TF##7eVLo?%_<4Yg#cU7HWz1pFz$bHlQW&NSTnrlqmk<pIXYn4KPQe5@{zYZ
z(OO(MJQ1F0@QrA3fSO+mPY=ZB)}$UwKQ7fVEw!DodSV&0WJ6fYN-`Piq3{24m5EyX
z-NCiPO6h^hH<N&PBj*bz?&d~Cod93ASnpx|oSq=cGbV!F^uW?`yUFLfT+Bvowbgxi
z<vgusI%2=@1yf!K9w{)otZ36C_w?&0+qhToOpWhP?iWz$nX`mzXgFSfTMrcg@RB@c
zU$+t^m~7r=t+tD6zJ{z%am+!SM(|W)@Kg)WU-_L4$XFTp`rktsycKcM>-P5YqOB~%
z;=bVLNRb>IKo@Q%$(qxhJ)?mtA|gZ2pj1eMe%RVBwdZ#nmX`Q8dK;g{h*hy!AB-03
zNE|58yYvj(^qj*EiX&XE*|Qm^o?w|iVAW63pt4*1^oaY2i%?s#8_dAHO^YzY>kgEh
z@_%W)w#;Y-BP;6PaU%FA@!FHi__%<<qg!S@ckkfYpMkI8=p9f`f&k#vlwX&xINO43
zd0R(M#IG}gB3+=qm*~wwQQ2e}{T<{jW0jwc*xeK8n%TTrKoJ8HvO>b~&9a>6k>MF@
z$@VO_q(3M6;(L-Ee)+U^M$*L#?xn-amnk$>m>7{c+VF-<)yrE4PRE03{za=Qi%q3w
z)+Xv*?df(8^~pft=k^8(RrV&Hl^+N{TQ$ZGo^WM9J2Dq16rv`X!rfp;O0N;PYX<2d
zo-~r$n*>Mie{QmSrp#kZBdMUT-o@;Ewu=e)W+#4zg!P(JH&5KMD%zXx9|)QV2j9P^
zj_>ZeTK6sU4fFx>)2cjvKx<D2_L#_bQ-`ca+cee@ti}c~l9NqD>wAq6ADD-E=?bs-
zK)QZrYM^L)L&xz?%EgtXi5E8BAAgiKpLTc4eJ*&{J(J|Gp3y&ZqQ=(d<=|vPJc1nB
z{BC90Y6;F7kQiWQ434VFjl-f)0DRH3{)^(QyXk*?(fJER`2SZ3ndoljAG1jdKfOKd
zJbJafQmrS|!nrM$pyt?>$PgwO(78yTzGs_D^x+hhzMIP-Dl82U(@mmvPw;pMKA9;W
zD{2lV5}iNuBj!4~7FJ2P!P>k7x;*cIwY{B7PEahI<1Ja&CHl7D^Nb>6b43>Ro|E-h
zbl+!7;_x!aWKSI=&@{}kPDt=Q-?5m0^Ct$kJu@eAv=i`OuC?7x^u&xxrDi<kNB%b$
zx9i4>>S%Q50f`aen5q0=nVB{Do+!C{-PZUsl{V*Mi6KTkhfBa=4|ShU|EDWCv4j%S
zWn+gEt^pnwE*@$8s0tjjuUbOFQzQo#>f;LZvQ;7LoT95i^3f*inxw)3(fheib;81h
z*{4@lwp=8j)YESg-sO<AsRR_aL34M+s$Rm(QWzjRby%;ydAlnSegZ>_{LK-mbt?%3
zie<L>WIQvEo&Bfy!>zo(tp*Q1SY}B-CBQv)Qd6#;;uHL}Ft5ygKuwngq9=Ni4*Z_I
z9rl`FIN6(MeJ?K9M4!{Ie7)BIC+hz0g6>dk8u`?(_9eu>F^ai|*yH`cXIXBTZ7zC;
zL+4Fqg^NpZ+wEM6%x`ne4y8;N)^+KAxAe=WF~EsMBoiSs(g$;2V5wnio>q*l0H^5%
zOC9}|=)6Pvt34Cm^VpGBRE-n!X&|<TC3;>cLuMKj{gdJK%dW>AYRluF-DJA@F`b3|
zo~egee*F>|aeaS@@tvNJi`Qu5?@1rS_l*O`iys-nT)vrNB={ce;(el}vwQvAn&hL+
zXN6(FJ)3S^#^SS?hIo{}@=XmM`_#LAH&aK)u+}$L$^ELbyPsQE?*Mv>-KS=@*_^jO
z8eZf%-FD+&sRLx~gB%<=_P^u^A?v1y{*00(hu88#I6sLWwmy5H*e@xoD9L&TwaUfU
zCN|omjn8zd=T*Dn+o(-++bHTM_cV)F*qUijL@htPwAK}gkVu2u*-ts{kI&yU&NhRb
zXNSO<nO8F^Du8RF2{<lyM&)KJx?{Zfch&N%EBpt?VqV4Rghy#CnO5sd^)i};kep{%
z^4BcsTj%0wov3I|C4OsovB~A~tJp#J&Wzm8pSlXv=Zz-yCd@T0ohQ)iRAs-7vR_Js
ztj8hq-QDw<^alJ9r`l9IX-_rRn27HHN2Q^L`&aY^^K{IIi!`i=#01~{l3YS6i!m9e
zl3n&KC4~67S&csW^1i^r%SgTUL77Xh$~)lyK~*P|3m-+CE)(0O)~c*Am|nel9WcZ{
zH!=Teg`m}vmC@bf$SAdg3DIjZ+tGqh%X+yboWWnM)DoK<bv@?oDj@DDQFR^zZ2JGd
zE1PF+S)rKs4sTfPfHv~O-uW^c*_Pu$5GO+?pBfDc19Q?<pf`*Zk*D5C8<mgpSGj>Y
zkEa*~_E9v>@D!pz6%K$TK69%<1-i{BHswj?kq61R;+riCaXF`U7U(Hm+F%TjHuO{h
zEcY5s8uLE4S4Ecuy*3kk+Qj&1v>ucsCJAh~9q9Xue@2EqX-MKH-d&z26F$@k97&z0
z0-WY)m`{>(e78~Nq+|f~4?(k(T*!RVy|X9qn5w@DlCV|;x=MHGhP2X1RPvMAnX~;Q
z#f1NAxQQQ66>6HZY&k*e$6{r1mfV>UokET3rDtlTQvq+vZQgn7N^GUiMum@MR^{dc
znAnEfGDZ06Fj2ph^~vZO6G@Q6e7&^+Q;mvK!}kY79YPvi7?#D72WFMzXkYg(!Ep@?
z4;Qx0*>Ar$a^;%dM9{?w?0UC=bF_YTR?O~E!5*8~UM8G-uFk!Kva$DLFJ3j>3w$}y
z=-?a!{Av>|LcV((YJ06x5#*@8kMDABd(g-yNgey%-Djxc4g3y>r!u?pLl$`DoQHAD
zc64Lk3^_)%EgY@}n$3>-9@*v+TJ*vUqdgyn_)As>S=L*}R-PLj`ggg7L}Sm#`+5%B
z+D)4@G{!a>UPi)>LWLT(l4>^Ny_!B0+s-Er_#5<*qxC^Nlo9ct{7;aPy%w<Z0~-Cv
zeO*&0_ixHau}Q9I)H@@7dNRtn$jPUjRJ7n|`C6w&Oo4zHy4Y$CUX=eL1{%D{b+F{1
z*5&4Qq{cf^k+b|p%#S<SUc;2p2fn`|KLipSw;G<pr_<JW`EUvllr=XsEY*UYa|Ng;
zcH@<h;?NfR7ZXlU_-z-W>HH2jkGy7GM(~|nk5=DAFr68Xrft~-X^C5k7OgpP_kPJ&
z=%AnOtbqyj8(i^t%e|23>V4}pAS?CjOAIH7>T%69uZT*`R0C7va=5FD%!hY8y&qg2
zXpPqs3og^hI)Hm%L~$bXCvH$=@|DT_OI`46UQ-XL!2DS9fQ;Xa8*1LTy*K7>S8DMt
z4%C}7CpM4%QLc}m--s*~<-U-0SS7N?Uz6%B-$2AK4#OA<LG_wEzU^lnB*&jzu?HLx
zBixnw!E#R~I_UkikdfrRO|cE76;2z{6I2Tp#rVum69*EPBhDDV<iLk1<xlC-?f|rz
zJK!C(csrL!4(YwZ*}=4vM(Zf<1mR;pX%fO<KER7b58y6D$VFtcjq$&sVH2p_P!(pT
zi`mLtkDWgF4_4D<f40@_>j$gz@<aoI&88~Wq!chy56Ye7_>P8g)Dz6qD-TjxzoBHH
zb@*IQ^1QPDOkpEM?a7P8o9iuDn`e*>WrLho#v09wkDv%*%Y-Ktxv_UZqJ=i<R1)~i
zyS1OI%{2(&fzJ;S$<}O{q?B>ki%eO^9n|>F%+-5I4&P-gz<mjHklOVg-xtXbCl1m+
zHPy1#bRt9&w|pGw@53eWebdkQqdVBA{8?76Dm@;NWJG1NS%^O7WB8hl#*k2H2ca@-
zn`yiE*@0?9wC@M*u>gR+h|>k&uNMIjn``0cr2ZFD*R|@tO>+ZsSymjy?QhIUkz@|5
z+9bxvP+sFh+VM|sUlZpwJuq(_k-9YO7+1jbzgi0lPVHcuITGFR^bUuz2~qc=g>E%^
z$IgtQV9jKINu<6L{QUUvnC_xYT(<&Z`xO({Bb%nqyi~^ApHw;1<!Q_5VBcdEz`y$r
zz#q4~0~A|=;0!^?FX#M`w*)y|FxD?t7o(->lOp-5n6Axey854jX)w-q`&(h0AnC_H
zdCuM``dH1Ud9akOuD_A6^1P+g>!v`}w&2Bm-j@UKuJwwR2Jlt*@Mqr&Jl*zBb@`OQ
zrYjsWL?e@aQTCq>U;aBLLD#g5-s&>ZcXHaF*J4T9x0P0o!XEKp^M=m9RemW#%1{W^
z7q$g4+J<Yf87k`I0<Ru1t?`hV<USV^SV-~Ben}D@5gz!(99lX*|2&h4cRpp5#cjLg
z4zP^@cDFwl@We1+Tfcm-#vHbwR{}t@_(a=n1IJP?IXjvu*@ZNl(r-2H0Oi+rfMHfA
z#HQHXVf)K*bRdJ)9YDT@3_I^5$tO7vh;+N@jy&38FqP#5{oMXb!hr}%pk*wmMA9=z
zf^H&K%r#T-8maTr+-P=H)j;W|@>tx^w<7w<sk8wlvcHehM7?{ZOdZm(@(OrDS^S)M
z&;|$f?o*7QLMFZKbKcK(V!jRn9u63;sUWmidn$>GKEQVT?`kOee)T?1uj8<c6N4Ey
z^ieZVpByekxR()sQWVdESfRS@gyr*$AqHmmRpDBD)t><1A0XgA+SaZ;*PX<>(pl_h
zR=%@jy;mqE;Q2TrJWE*NN2C|5ApMAYy%ieEM7dsXLB=2^@uv7?O<dhabq)}pmX@y#
zGuh-sM+%((ozu0`YHNTm_I0ZJ5RdW55b<h+U;JA?5(S2UzXGiOJL?A?CUwZ+)nUGo
z%U^UF>y1L)^i?&IcAH>#Q2ho#sK#tK?zw5sj&zgjTtDFGkM^zpoL*fT+;7yG!ae9Z
zA0gbzAgu!~`+GB?%1&s%?f}X+ypMa$!EF<#AA6JQR97Ek(pkFxaPK7y&4R~SSwu<+
zq)8qd#b&~-UUh!v9^-o@JU;|ToRFl+eWI?=#@TjSg%TQV^Kxd+^|A<I!l>gtVlzF@
zatR~;lJOB?A)Qv_Ng>_?Qiq-8N{J>*pW2dG4PKwHFG)Og>)H}M5*_NPGS^u5gAxj8
z*QpfuyeS3;UZjkjWLn}%8ycfr-asp>YWuQbcriP}lc9H7$j1;nl3jd=&UBe8hzpp`
z&&yuJGx{+j#G{UlnHf&Q0ZQJ5l$QMS&=!>Jngk}=u;-O&rA{O8+mZzIX;$0=w9$k4
zb)Vmctw9Nq*^;bd{P}GpmJsC=K3`|OP~ouHc!%77<@)^3B&~nt!VuviZycQcSzJnZ
zNZO~wt1F&TV0Jn6mN@4Mf|wcIG_2Q&qG`cVZ~U0(=Vvp_+?8?h%$BT3>0g>A5wY4F
z1+@>zg8Q_wA?5vEl<BWSWHSZS__G9BmZ-l33>Q%#H#S~){i^YQxiT8fc)w!|dH@ya
zOV%J6>FX^c7?YO<NNO|j38lxdEMhyjcgG-QLAVfmGr58N7d{v1?^2YIV0*NkYzS*6
z92i<e+98CF`b>(BBF<_YT`I|jHTFN&lc{23glcrtrFBZm;M4XZY`(_X2ayi1L5cV>
zyxG>EFeg#!roLYQn(YX6-0T^K$IuAo5&pFVf`_0~tX3pW)}`WgeXN~VH)~uG;Bq2X
z?zWShEu{k{sPKP|XiRaF87<i+beg>b@)fL8GC{FmkaN(A<qoXfus4U~&{0M(e|bCg
z)%r68ZZFTl!gbli{Td!sA@XVlRgWp$;}oRq`sR2}kFHwo09dkBkg_>BN!7}BF64rm
zrKakacEl2I8$3SQ#xuwPt7-MOvfW7+buWvG{<HVB8_D&gUhBaEKDk7a$fz7*(!Gar
zq79RUhMmEE-nPX*lyH9F4~Nxnc?pIZO0IR}Ab4PB9g&{m5zH}Ly84U{^%zP9SR^Ga
zR)^?Avd>l-@4?0UU8pKcruk2sr~9qaIj6pqtAFZgWO=3Cq@|7CUatGD?_gCmQagRh
zq2m#eNa?Z6Lfi&APB+bb;tUOSE`bxRrhyaJw3##KYKQHIQF=AbTFYO!?vT-z%j|}e
zi=HFA?La;_G|IC*5GI{5fAu|gQavx5xeRyT^4kGxw%~j<#VKLzmohtp(kdAje->S7
zC2VDON$&R&*O-u$tsW@T4wY16)=HM@1Ot;v;zlCJ0&7SSb@F$v;wE70vlK}>uosB8
zv|!;cF|oAyu3pLLS<ayv3l~wh0%*s)_&Ew@_m@R2@I-USz~OfTwi(`uq5B#(j$RAB
znO;?)!MJ5Ej^Sf<b_G-zR%*>IEKt16Q<`%96U>3@%=V@mclpbQr)Z})f0^tU?;%bM
ze!g)5!_4fcJmwz3-rAZksYb_qEMn78A{0U!Xz)#o;7Ho)F)`~MvG(Cv@$zDOM;WKV
znq7D{>>5PtFLuiUm)Yb%<$bT~?7`fvkcbCzii6$KUrtV|X1Z&Bn-h=cb4WY#Mg%7E
z0&hP2T``8j<m{8<mNhF09kH5@tNpOb-$xk$glhk$(t&n5q3m&m`GR?yH+IPkX<L!v
zlNBls^o~#{Kk_B~JSehJJXOz}cMDcz4B{f6ueYRTohd=p>;9<O{jFgV=r@p09OkG$
zOgdG**JMmcQ3UhC+Ua(D-64{;#?=24@c>@F_}e$}J#*u6CHO~=D_YFK1|#87e+LZu
zK2X|Zbaj-`PEMeX+D@O9p-!|sZ0O=?7(AD-XD#Pl2P;xuY22AP8_$O=ifw<eMw%`Y
zU0=8Rw=2%KD!I&O%ebNm@0OT5n|`*bFefXgi#r{wVK{DK@O9Ic-yFKDV(-A3x%E4c
z2MExCAr7k)VMfAxxPcJFoKR*+eG42cl~o}BLK)uxUet!o5-*A!Q-G$giYn;>IvDjR
zYeJ6tZY<XbBR?ybAVs)LzSe5^iOnvK5wyD2+HCG+q6en*_%kDH`uM5#xcy2O*+$}U
z-Za0C$B;g^)_-UZ&3x~1;g)@N>RkL1tGxL6P8y4#nG(DiHJU11LCO|55a|KmgdX)a
z#exc}#UB`g-bTgIpJ<}wf|Dnc02v*yZmGXIa&U<+qleO)v#G-;nzwSTDN%7Rc)zlX
z;juXV7PaWER8H?Q@wFU0uEJbc{qS1jjafm+BOnnJ?(@Wg-~9W<7poVE9NkR7;{v0m
zcV~UCryAH6nLd42BRfgbmfqO)7!i<4Hs?TuK5*&aUVzyfqEAp>NvVm1#Hi84X&nl=
zHP=WSr33qY0|!*lhn{|(sW3{q_2`Ghh;5Qvk2?Ups%e#|Pt&DWs=k8^@hYNS#Vg(^
zRGeM-q%cN69BmSKBl)Xs@s@sRz?p9Yv6Ov}<=*i8iyJxn?G921i*{sxfoUBAv#Oje
zW^khLzUIcaii{=+w8$UsiZWy1&)2%ha;lJCUxPH##oZ>n7TwyXY#eBrgc_C?&q#K)
zpZtD&2lS{}ba}eIj>cZ9;h$S~D$d02xL}MG4;0OCD!23MWa9TEqEfF>BluBH_AT(C
zwOMr?W3W>{&I&z~?`8T{zd31L*nzlIdy-mgs7*pnE|_DR6ZEx%;QAq=>D{%aSXSi%
z&v~_RUFTsUw(D;0ro!)dEj2AE1E$!STe3SKZaR=Mhu`vrf%x<5B3+rCj_{6Exv%ZT
zt4qbG<k}$Z*%!DBy4cCHMfuc0EB@wkf`tQlF);O3h&josnZZEMGJ|Kdzam#xowg6{
z;|H)ZtAO#1Y=BFEs5zG?w}VqQE3GHX>QPzsJ3Uf9K<9^&DscWv@VUkQiIWH;D9PBP
zaqZHW<qmjr|K1%i@L`4xQ8yqK$Q&`>6-bfdvh^hkzaI-l5IcT|y^_>R7S<15J<7&|
zUm{Jv!U97DOkl*UEP06?cG%~9TAZNbzs8MaNz1d{QYNEr<Q^JvB_nwm93UVwGaJ(s
z`cV$`r$e4_)YONn3r|Gbf>3zvzpY=%K-a|zO6-D&j`-hvk9M*#X2#>;8*j!^0pk~v
zB$@^hxbS%rq1#cV)yeiwn1&v!_btY&553ah*n5ni>(O3Y#$K`=AL0G(nCxTLbMb_U
zdT=jRpXqHPbqD?Spq2-Y%hk?YXC;8*()?S#E0efC+w_^H+8Rf6#Tob9p#kLwrDXNP
zl6R8SI=$f_<`;$&q8qokNE?o~ZTjVq)W@r?qAcvQZ~eS**iIy+<K2R#3@dc70xLWf
zKQAy4E4g?vF>AMeTb}n?ayx~xAYYc+A0u;m(^Z?Si(yNqB1AH&Bp*(!()=jBSW{)T
zI9fz^8Ceo~g?5CB!wZ{Yu2YU-+k#Q}*K;tO5h+)P-qTKuYpaQdVP%WP&hhY(63Z(^
zIXk^ge7>K@5fT@<cufEO>d~P;A1Y4?sz*b(?>M|*Va>4Q$Y@2nHLo+aly?&Qb9k(4
zV~^(c2`RhZnif<`i|2vCG|Y@@{+_>DvRE{c-`z@0<XTJZrW*yMJ{9~?70UmYs*q(B
zisZwA9A4vfzumPUe7-~L6jzrLWiLd+gFFOe&;@5LKd%q_T7>%hl_5{Q8m6T`6NB#=
zyzRAoU(+OH$u;-X)Dr(fCce<a+?&69l*Cpld=N$S6)Sq_b`LV67wKQUg;d654Hitu
z$Zk~&L_M`~q=@238Of49+slWv4U+|m8>YX#w#kp_^SQ-oy185fP)NxaaDtowvg&cW
zxOz>su_Fg$tJQUnP%nYkeSk-uLk6y@bS9N8O`F<Mf~4xX<%*GrJNuEeY}!9xV0RBE
z9Gp$KCYU@!BWnwXhIa5n%#A0euhfqD`Cx}D;^`Z8>w(LQ(3f3<movH7N^qy;nEm}j
z5x<nWpKn_F`U*!cl-*JsLET-lM2h{Kps1Ef9vMMI`9n=C>ixB!j#Z?;sGr>zPpw*Q
ztO8ieZY@kM(vSI3sac=)TJpL!qnYWbA34+OhKL!t=k|do^dTqoIp)RL!M+FoOMHo-
zf0UfuY$t=6*BQ5t7#9w*y-$p_x*1e{+;J#;p>Z_bn!Y*TA4sQ7#vhixcgtQSjxEz7
z<tP+z(<-;ZapdQjONg2X{!OBQAK*(MbfVre>FJY-=P;YSINPioO&&_#MAPqPa60ec
zAS(r%7;5Uqe?YI9HK(FOzTxT)Hi0k0q;DU-lfS}IL50Atp=R)~a-_D}8%Y97N5ykN
zRK)V^?;;j*U4553V7-6+wnhFn)D~!LuWNrg?J*#s7i#&Ui^>0WrQxteoyB$Vnakzx
zggZb1T4ek0GTkcf{#(#^o1Z0SMD@wT#^c{5aG~;z&_e5!enNN1E~Zz#4p?xNAA9>2
zZ-Pp7Jf*F=Z*hZph@}Z3iKZygMNbG+W!E)$PvuWkZH%uQyBN?Ae3ATGpQqKOrG~NI
zd>QjQ`Y6cm^{a<Za<fBts^vZnq1f>y(B#{>E7Vftb_X0SrXls#TW*RLzwWbeAd?1(
zde?&GDxs|HNZyqL<vyadj5_z@@A5Flh1!6uoYHUm3yLy{tvSUMyJAZF#tQu3G7@Qh
z`s8+!L0${zBW?yIKozJT#j$M@U+;};u!2;*q_b>(8~&cpqke<pWp=OIh+z5=3f#-g
zyUFkusnYB(hV0EU$7#tGnf^8`M@p|=M6?mmm*@4~I<DZnPH7tK_Qi(-Te%?Fy+%~+
ztFwO@$De=wU0%u3Z`6h*+{rElzVb1tYUcGB;Cuwr)oZ!VJL0LrjBm<D8KGBhoRF+=
zL;3bMPEZbfIlP`9$0+Faw%B(U66uVj9|yZXG+6HdzMnnF^_4pyqRl1XrqYeI(Yr{g
zt@{1L5LK+R7w!rxB=J&P=<Tmi6vR6K<C+cL6zVKr&x?`f-F(lcnfU`atVRLNV{hnU
zfMWMAQ~RG=8{DJ@g5U6Z^ij(3wF?I{X&X&5$74X^=4M=B8k?v}QhC}`o6Tw^hgk!B
zI?sF4o+=NY__w<ld|x-v`e^}Ap}Pv;N&avAP6g0!^RsGrzocHH^sHQ3KnLVD1(*6s
zL}Zul6_;;f=ll2*iWZ4OMoIW6{wfj-he{&%?f_QU)$JD)R##xq6i6;L1V2)-l0;u-
zim3x##sH^(WphYy<IQJ4qYR@7dLF;{;>gU^v7PEBe6+%lcfKx*Fgx&Xg1^#{V~14H
zyAKQ7+{}l{R|aVO4H^574Dmnl9n(Kbi|sH*EPP+(k2evIc2>r7sE-wiJgi!oK8lwQ
zSAQibbA>n)wcJBR(^Uhc%LAWymw$xnuc=%0gZdX%BXy^&@KJO;azjC}g0gKXnKa)v
zA$#{e_FgkN(uu*h!4;OQ!17WzObX83?JUQ9?UJMONo1Kws%t_%iM=k-NG-|GgK9q0
za|`+ixQvK_L-8ROYZNujjN<Uk)7Hj-oGZCmF}d@btD-x=oB9H(*ZK3PbCoK`WU``4
z?&rBdDz~BHRwV3EIs3j9V;P~gVseNA0|mu#8>1}p!nMy5j>N#fK2Ku4p6i0D!!Hu9
zby7)8k>x!m<@R(@i3hwNx<i-w@E&JV(Ol#by~uBtk4hhnK9vQ`kdKCrpe9zD??^si
zFsJgADO<)DbebU85Ech`6>6KtcJfw^>po3J3B}iKg3moN*PEt;Wlh)llC4XT=$EF0
zg&$@U>c6tEk}zX}=soobH5fx1<f!b=FGqvlM6XC5^Cr#p3q}TGj~<VX{WPQMYl402
z&Vz*Ye90u!ris{93nBP?P$)k_pA_qkHsEHGp4tEod(|-UW)V-Pwb2McJ(;J$68oTk
zGVLi*VCLnP2pVkMoPRxm{93)i6mJ>Rj`T5RUrQ?FkeQc9JDVcKB4R*C_~7|Kc$*!f
z;Mq*Y{#gLk{<*dgUNr5rdgDOO#|W=}H`MN*g6%5qXNV6l^UpBbF;BnhB86-i_7dd+
z+Mp$~jQ1clR01&?bIZ3QcdxYxKJuQge(<`JT^ICd_=_m-g#`zp`VK5u_BaN?A1TLy
zEU0qOCVF?8XZ@nOg_N3+@&wH+*Tpe2)GvtZU|+_D_q;4!Z(FLpamz}a%rrN|Yb$Aw
z#nr7FIL{@76{L!$bA;D|O7-HKStw5UO4k$OQw?i-u)=?RWp$(DEljdT>tZcP9K19v
zK!S}Czu}vQMe7+j=C-35F7z6-qxS)2=6Bo?nC{{)8V@-^pW#6v_41$|50|{(`O?LP
z)+rY&B?DmK<S2dLAsH3E#}v*~Sv5H^=}o@1>-^CX{z`R&piH@jBlC1eUo?SMv(TQ@
zhyY~NMzT(3(v|dZj^?-N4{&@WvxfphJ4Ia8?oqzcO#_ef8Bykgm|;o{1<Buyb$kj(
zq1v*7naZm-o(+3;d~{mvld&#!)~ByHoY59?g{{2yrh~<|tJr(`-Iwi4i{!$}=bgQj
z-9*1&Q#t$a)ry1ZiHYnx*O*9$RxiR0HKsi830QvVXZ^sw;QHH>MRGkp2p8Ddu80z~
z$x*qTHRUd}B-1!IR58u03e+rWJd{{*9%#3Q1ob_2m=Q8<Oeo#?Dzf~`LaBzu9qZ@4
z+2j%5H@SqhF>4J_8qETWhNv-5hm}0_^a+e4_Q6q%K|(bpY^GEKXNNDX+o|$SkLlW#
zw{Tz8sU+zT20B%Ie=og~Jo>~2pc63wN1jd3R2DWnNnL0Ob_}PDM`bmG-Ghe{=b<Kj
zb%^>VlYF~`TnA#8_x(1hix_w#;#9_Tn-{<s*h9qwg`DtYGePv<WT`zR!;-FerJ>|B
zK$P}WI9p;1UOo+LOOxV1AFOVTYims2Pr6?@`aLJj`Jgpkg03|NMN)2I0QDs~%p|;q
ziIo;(#18#h>Z(z7Re)i8{f>9Xk^<>Cm<wg7b~=#c?#^^8C5jf}uxch|<GiPje@iUS
zLkk?(Rs?t$1W@)av&Bv4Mc{23=MVAAp8qj;VLFf?Gbdlz<A_P<n{~SVxd7_u`CxBO
z{5gg%XSoEI+|f;l%(^|0dH~q)ozTa)@AJiT{Wx?N6_FbfD<Kp0{>p2TB1Ej8Q%dZW
z%$5=Bq>t;Uue!6;M<9C&47?Nxc4b4}1#w13ix2l>*CzBIHviW6{U{Aay&l`-W-JAM
z$2tRL9IkiL)54)i9&QhL6}u8rPkeAfzk&Y-D_ClmJ7&7gS}ZPbnnm#^G&KCagF2~t
z28#?Cvm^sHbD)uEQ-uS6p<RLQW4zY=6fC@Fh98}e8a_c7;E-H`GVW~nMHHPyjIo7g
z$K;bIR8FT#?2fUUUUgQ4XNg2{>}iIy8e^hSC*n!YjM%p$0<+6)>VaCnK}CQ!xuGbk
zl0B5`!zNOx{FL<Y4oIL*GW$5NgukomF<{beT{h%{Qp?y(sb9>^fvfDfwMY1TtL^In
z8b@~_6~JNqd3?y)km%k(ofF-&$xv0R6PFi81rp3o7}EX&k{|}O63<TR@A=Cphiq_I
zAs0v=!AN4s?)pT&;f<jKwo|d(h1Ko&zMPu)wtL~cykL62?G6wGy?y<BUFM|E;J>TZ
zDcach^QVw+ucuYx-!Pa31#AWsb~DW(GF_Hf<vIM<B@Rqbu1KE_PSSfcC3=tV2#JVh
zn~M!t87Z`pK?TaGG5ys)m+k;n+UA<pkQiw^jl`$xoa%T@USp<Ae5TYatOj!7YrjRm
zPg=p)%ZuZ}ujdCTC6EK(Tg+M{*JFTvhhIc-(PCliIRuco4;c_ziH#THmO~$gnrL>v
z4aa8@i5~ba$6AlOz<O;>8sNCpnl|VfB%?2$#JEd`a?M07eRU#~Ro0zw78QwJ&2qAg
zHn;YVEB^jkH-#fHTk;?s*vU(kT+>*6F?2t|fB~C@c`9WD;@8U$EgZSDzRlRApzer%
z#h-cHYF3LSxns_<#zgOL)r6qfj1SL~Np%|;0u9+7v?z8-|7;}oqd(0hDzt@y;LGuW
z9P?(6IpSobJ=q$6sPgdkX#66Td@*se079;{Cu{x8(HZZrJf|!7vi7>-*)DZECKXFr
zct+W8_^Tg#=a1OnTQw3D>`xul$jFDqUZ1<?qE_Roi=IGvM0-O;^tv{Iz0mWtAFsd8
z3i&|j#dZSEJ&-!!#_{I~??mG)wc&-ARUy`*eHW>5_~^2SG=o=&4YJtlE~|vEZKU!v
zw_~(1B4RN9_XAAQCLafn1~TXUEc33nL5m&CJ7Q9!VQ1V<N|Nj`oQ&(=qgEI5^zMhm
z`!KsLwGf5)wl@~R&nG6>*-gQzI3d(PnjICmbFc?dsR`;&hnw=U^qMpM#L8=7Zf=R8
zhRp<eee&;G-?+W6gg~*$=G$7T;p{msHu>A$-jqLn$h3=*2SkRYVUArdwViG?Do9Il
z4_uqAbux)1yevhssAkHHaxIuH^;~V>#TDjv(zW#C3_|55Wqc6dj&JcC;;;wE2J_V%
zBz3q*#TkCgQj?>d5l{>`bnvAqw^KUg+c&M`y76Y>@c1VQCf|SGLm$c=jM$0^d|>zG
zgXYQ&%Yhh?6MbPD0qZ*!vKDni3}V5=cuvp)Clo*x?K=1z7+D({5PS*enxbu^at4i%
zm^HBnp$Cgy3`8cUr83qczGLI@Vdca&#5bicl$|18$PBMN+3y~b7-Ud|<PuKD{2uL-
z6GToH?RlqOs>bl~JoPs9p`6>~rIvpn%{1A8>4GFZSsQ6D-VK5$hkU-tCvDV|;jC5W
zr1RLc60}(R9m$c`j|q6mQNhI{{N*d{rOsoI*oOCNpHse<DyLl91weL-L*U{D@04MP
z-q*{Zw{OBH>W?Q#`;9-p-~9Cn(V;3=41!ZkplZW^1El{m`rv<b8%>Kmttlkp4p2(g
zf=}{zQ#n^_Jdl2(Gw8+|<Bn}@{&DD(rmD6XN&-o`Qb$EVrw&%pc|SjAOTLTX$>+R?
zdn*8JRVn5Y7Hii=YF3A7H}Q?n_t%srzH7l15EtA?Z~41YFsjD+|M}LsXH;Ti=BAJB
z-1F8x(TeM#w*O#XE0%8O5<c#Ru~)7xN|~|fe4$E(Kl$1>#IE0Xi}C)w(DzA}2G52*
z*dA?cwru-Y;S0z7>Ig{xcp{55xBsFx;09}b^q<c^`1?I1+#`Vx8k?L@WDq^2DGROi
zUYgl_ZW?_>I-$Tr4KT}O5eh}5Sj#3x4z29X^KzqNDoUvjsc<!sB-P!Bm0Uu+75u|d
zdGDomvff)k_*U3K(q1o>gEJee>PMbKHM6qZBrH{J#id{=U+x%qQ!hUba%hPF7p@`W
z(4OaMyMZ^!EI<QdVYOk{V=+gzuDB4|yW7#m&EI^c#N$#Ut5#BV)e?ZnM&}Ye$*<n`
zf1GD8#f)GpT5u}<AVf0+D=WWfa_)XAI4E(yjcg6vcSH1ad;_&fW72qOUhE9ddA5Z?
z4E*wVUAoqHogD47onmzROXT&_X_KlV{_7SMnTe(w5Bz;MWO{PX63cj-UNM2GQqBQo
z-zRY=t9xlB_qte)a|yn2Z$V?ymPzX+gBmK&jCkuKTO+VvHa*u;j!oMCo;WD)*#ga7
zoEEX8UwuZZu@5!@JSk2v7<r~USnX>y!O?l}j*MT_6}f~tcFoM|KA4FuvULfoz<bRA
zrXrKYIlIQ`v<vp*HO`-`nI+0BXx}MlKysgQ32=L-pl_rD)U+F?uAvPJ((eRzTAIo>
z&*>S8UGx%7J-_V*(%jC?yfJe*U^F~`Sf{bMzxeBSX&qe?LG#|&uVAUNzo5?l*bWLP
zFERxo#T4oAnzs9>ux;rX_&8C?Qqrk%ba;{+ZTFdB7=&!y4$%~~3P+Bc&kq|~3XT4H
zi(ORP^Zg0B0?p`C?(`{nJ5DE;*NshM*$<=VKpGyKEikulKAQDcR%77FDZ_g!E)-3u
z1(L91+Qsr$9cSFUmr|y)!PiagDooE))Pw9l54|0OZ^M2rC1XXM;Av!&9zXX{i4lD2
z<>L}EKtBeacxD)_f=Fjsp+fXtJ{%A6JbxA}r#~Xx`2!}9rRsIF8nbbl1LJMb)SB0{
zAnI^tGa5+q_$B_s?dxx4`6n^J<?9&4GK0Sj{3y)X;&G_hl*{B%A04LpXhF*eQ8h+9
z5-MJIQ&{}?18r4w?c$(x{)Z!6zu<46KnGOPS5{InOlLHCcEh)9G_gTC|2+4N%P;JG
z#g+J8!vy5~dU516zYlv#Q5P(0fx|K5IV)$^N~ymfk94r$yyP(8mql@$)pWq(O+GJb
zhR;QkxcNgrvKT+}HszEfOy*IC0Q_fhRMKLl%2?2%urrtL9MN#d`$uqBPIMGv?zb9*
z*<V?;7`6HSwS*2;N@R)dm|rH5?~2GB00Va!HaR$Ow$&R*KdkHRFuGu47-U4ED6V7>
zx-MmtjDaPd-*0vAe_0R^>v5(qeJx}J7fd3$@Gz%C=$Rtj*68ren7Yp0p6@1Ug^JfV
z3BJ@)9ie!f%KFk(VQ;{d2--XLF>s8*zrFQh!)ZQtOI9%cY~8nW01E6Fs!nq>cfwpp
z9cXJo`y)d+C4;A>XLkT5vRoHwQH+#-)DnZ&8v&y$U=c8gBO?s?Ab2%<S;Er6b(?}p
zCcDgW;N52C(-Y4kcxSx5b<5Kn{Ren(K=PhylgU${Qh0G}oBaA?%FZ#H6AUY!n(BF0
zt5f{qnHm~!CL@8T=y7q^yN5abUv6vi23Sq_W@0ED*S9{1C}$VxFg+C>`qfeXZy?)$
zl;43Lb=HdG+2St~+kc2ny4k<{fu%-wv`TTUVULg(eT0d4bWPufV7RZ=R47TNI<e-3
zLcd4ysKj~YH-MW%YvxUW$*;pv%`fq54m2|5Z<UkjHsUIeQsh%K%Nca+n!pc<GZrHU
z$qtX3&KH|91GooosqIbLIyeyQiIsD9=S``@pi#EB)-rQ!OMO|<YzdM>hnj$&$OW-U
z|Lc~-R|*O|8=OND^f`Kcnilf6V`{yiuU)2iLHqfLo7~1D`5(*2PPYsl?Rzco)@$9{
zF>v4aqRQadB&XPH=cCpB^;q9u!Lp1ozukjkm+sH-9~QwkM-o5J&Py6a6x<^njb8zE
zu`onPIm^rxAvp%VKoexxe0++OT?iX&pQuXW(Rr{7Ri>~t3XFOT?y8BP?NKP}{(Lih
zpQypUNe8BK^E=D(`HfM`?_~wVRzDNPkoV^uN-;d%w@36B&I~WVxouV%ZAN2kWn&MT
z7yoSF$1{esD%5^KEM?*CQoe8fpyre~_)UD>!JTjng_n1l0DGrdmQQ9J({v{|Of$TB
z%I^nEJUfuMBr(z7kVn4*HZqKYW*#Tp(Ei+;`d<kJ(Efh2OIy!6VWmwG1f=`8wrh!F
zE+1A@#}cX_u`N)XY@UPSBL=K3I`nX?q`tF$NtNct|Dog|TTqZQ)d4{(^$?HG=y38;
zv>b<~hu@kR(Lm!2nW-HajH}P*JV<)b)gmGqF651lv-i^@=JNb~$qW4m#a#lxKWJsh
zQPd*Z<aH@gl+8~lm3tj~_RFUkboW&WvFWoPW&cz&MMM<%K<&4Br4~oRWrW)VW=)Kb
zd`KW>qb(q-AfJ|90YMAv0eqdRL0#PdazOr(YAEIVCxb;Z1;j#f5y5i&@Y7^u*h+uV
zY8Hug_VKqJXPa>1Yd94cR}CW;wI4S=qEU|7Ulr*Ch2Y!L^1dsYz`l<9aSyYqpDz9|
z$D8|jOY&Dodr2y_F6@~K;i3_MbD0jTUXww0OwxiK&m`W7HZ-IU@NG!`APHf{Rr?_!
zE1cfC65!C|dmfs$MA1aqsHCPy9i;>Uo%ojzNRFtTRDJyzl~|%a#T6fnj|==RUXP5g
z$EgH|v-l94W$lxG`z%Dx6{<zOF01zFJckfju825K_7ICb7NN1F{~|<e>}LZ7H+Yfj
zfV)#u1M$Y%1S=rkeUwy8@DDiV5^|~%FGEOTV+o2{B$)(ctX_3|hd(jNlp~qQoA$=*
zZHRirbcBT~;fUG^z!0!kXq#_CTKm9ryB9wpuapz0!t0|lSup2<|7^^BeZ@Mt5KygJ
zSgN1!pe>|p@le?8z$3jWn)gkd<)iW9xFt>F&3LirPI{y+(zS}+jGuA|<Ns1F`iJJ8
zYJc4Vjwl)F65A@YEB&^_QrXN>SP)is<t`^^?I;X&r;ait1rFzzEj<rO*Oth7hWIZV
z70;xFx6x$FOOVQ|(nr(|GfZS{2Ki>kb*9`zDVy|E0IH<$!bz(w;oQzMVeSsl|0!eb
zQChiJZ=(5g{>itBTi^aI53Z`=mLX>USx}DsrEmYY8O^##?V7k(p%_O0jl8#vinGhs
zMGGy0LvRls++CAkAxLnC;O<hm1qf~d5}e==EV#P`cXx*XRk(lm&Aw;s(_i22K7D)l
z+2f3Hep1x?j#_KYIoFzNKF>3^r1nd4L8zeG-|2)Qm*1oiQKG_>$ZOnl#4@`V2qgpj
z^uB>A;v{$W4X2cl4=o=ygje9{-=w;!J|B!LrRFfB4Ui=>TBiU^9JZwt;bN1CHYviq
zv9)I{RNErwxpStL71zW0pduhh2`DjnwTi!i4&10hr<9xP)qU0NOdB$l`TcHQEM^0>
zB&@x`Q>{NhXnyF+5OMWHeHdBYHUInQx>7iE*=uy}gu_C20WwGK8}Hi3=(<7^u?(+F
zQ$o%b74~<zLEpu(EfLPIJD`yHRe(2wZk4P0RjX8i<&|^H8mBntMvULJLCa<NQ7_Gw
zOqA<JflU*QJV~XiS)ID;hZQZds2HrGvPM7Dhd0o=eAfQJ%lF&uaLF1$Dvj&1*><*G
zdzlU7OcOV9Fb4z2HX0a;meQ)K6HmjPG<+ejcn-4uI1q$&vxk%kIV-Pd>YlRhK8AP`
zj9-$*UEh7+pvlpcVKNar(G2Yk$!{>bokFmW7r7^5xRms;ndj)CZb^GV{Xv({@*0AF
z4cSj|K5j4~&z%k!p^#nl`fPx^C?hHG8WY4(ts}REEjUKUPcGO=F&ZCqk0dCrVKkjO
z{Lws?$F(*|RreyL)OvA^2HE9WF{Gm0)2K(T<P>8P6J<QQ=(hy%ES5@im4YTGN({?$
zBksU9?&H0wTeJ;3X)sviL&q`mTw7*z#NiT-dSu@Bb?7cGg&6yzFX_pdCv>n*YK`jS
zOCU51P#Hy?agTuXXD>vVd%SF$q-#M9wP~bVbSc8JAAZJ3qx`bjc8Ku9oN6OJ@{+Dy
zZ``hLNSe<h9s2RZ1W9fP-)>1(a7<B>%1r|h!SQ1|z$uFf-!zqfvq5-Fh!a8)2+x5q
zPjUzp$+UmD6+OsrDac($?#o)vL~<b;sDIt2%L2y<fbPb@**EQ*$ieH;g7&qI;*+ND
z`fZ3YnCpEoe`0Bf_$uLVXn0Y^i18cHmu@oHF?SCv<%Ebw`B!4{llc!x6H#_yQP+uB
zL51~U77^?|61=iBx+zFFNrP(YsJUVt>hGbNn^gL_y$K*>&RwOcr@L&dZ#4>7Z=t(s
z1c|v4byqM<e6PfnFPALdiGyX<TP@&?=CNlt=4)@-o9;~~c%mlHV>5<8{4N1ld-~D;
zQvKVhZnkL>0>I}D`R}<D#PA~7zdY>34}EayHi~f?DF4}5BqFs3soU#%k`^1Y!k^1%
zKlO|ms<#*DTiz27ajk(1+Z1?s&6dan(UFH#3>vbQ2NMO1jbvn}Zxtf9D9J;W`V^S;
zrg`se9*Oi0L&a;J?<R>UcO4rtn#6kDQAdC_{HSge_xdt%vLqu+Lw6qPWsKY%USVQ#
zNr=SU9F4dLNvFY*w^V9;JyfR3Y>nKwW$cLK_oPIMS6l4KOgZ}XG%MdrP;w4IU3zpd
zbDTwZk)w;X^kfZqr#IGE1fQ@w<*3k2cK6CB*4m+CA6u+8#xSipb;RP4%Ut)zJqe!l
zn~+4p7$Zjd9N(r>zBgzF_T}3RLnI#qXiIf|*;VYefqv~uhXe1RMc-q4vq5)Y{AGm_
zETb^`lr<-;Glf8z+kueM7SlNo)Qnnc(&j8GSUwp6f;oh);@m{8Z`kD?dU)BUiila6
zh7DUqifMiLitW2K>g^P9mHKBWz&{n+|6)J*r%9w$rx&dJs?)Z-x;?C7siRlhoNzi)
zi#>szTeEe{A7N~PMTc!n(A83F!htFJh6Lx4AjJ4%J<PmB3G$VHp%nTbwPa6=o~#Ih
zHO!|$()re3^??f#{x3nV;D+q%J@=8kGNfTLj6Ee}n8Vx>l&k<|<z3+;2<r(6pb({s
zZxpCL1%Lqn+#jHO1z*W4VWrzG%EvSDuGp$Y|1lx}2@wB;2e;*q<;UsY<z3bm)AV`~
zF+86uyk&+|mdGKVGac$O+xp{6d_VRQ?njCUPNje8`2LTU109kxL8zAsi4tG5gMTJr
zWOgPg%+)xcw+AK~wJ~mF_QzUzy=Bw>oI59IvyWL_LJ>OA%*haC6na@+_ivXX5dZ%$
zLjI0j{iq!~&ia*>xN(lsj`EHI!b<#MlnA~szC&>ROcDXC0kGOc7&iG}Y{?GvHK~1@
z#2BqT&&wwhsYtuBHs56>HQyohy5cM^y<7T_M7AZMfw%=NK1!T;K-(aPzpy6ZOT#GZ
zQ2F)-EMT8o;RQM{ZLABa8)16U=Qs@4zoo?0Q*}Fh_#AXXYXUCh9kua8hqi~ashsLf
zZ8j(KoXzA7zHn9qi{;H`tVZS=9>~0Iu9stpVDXdN;G^e@5e80n?RZe7EP5#)w?)bR
zop<;86<>Ee2it|+Pl(6fnaoyQu%q61cZ+#kVSLu`w_Mu2HHu#1t{1#m_7}<*4|t$-
zDft6ruT&s$-Da6KN|k($7erdlg_4a~pluxD@$N7n-Ru*Bcr=r2LN??JSLR2u*wL!X
zc1+7m+8|*&_Zha$@cM6q>BxQDoktN_G83+4@B>zNyiGbwp)53tU+oKF*=XS&Hq0L*
zOwC0>MoK&KbR#vabyn{&xkHUG#YuKuW8rSZC0q4nWT&X=8YEr~1f0NyVsd#$r0GH}
zLLTItQ@nPw@RzF!lgsGk&=MR4FX;n>xLM>W(?@eSFgK++yj<k|ast%Kd7}AF-wCny
zHi;%mQ**?h*(!tdywKIj(Yo3wpko=2!Djebi+s0jS~Tv%$D_Xdl%oNBI=c&|XF6(F
zaeT?dg9rUOK0kU<PrlB;2@GK)`;+XPh@fqdw(8!y?<s`JpK_X=f5BavB2_Y&$1g6s
z@t1~?m&_j}7ki)z#H&cgk-@`lwGqYk1@Nl8NkZu9{Cq*R?dOK8zxZ>5`YUm)1kv+b
zbx}&G$r`j5h5NGyHugl7(L!cTS$$O(74SDQZ`w%;k_x&VTyp}FBv;NIJ!voT;3fxM
z?9l8|)m}$!d_joq%QOo&IsZCM54SAahoxwfKKnqt;g74AzuE{;uUM>hO63w>ZKq@U
z%9RPa>ND9<S#mAqYM~^<!QE%-1mODxz)28pW<ERL9;k?EmKAs?E_1Q{;mOJb9w3!R
z-@e!?Gu}6N4&4h))fl%muI3KGCF*A(6d3j8vX0&hXdy4Mw)<{y5H|!lq2=bFi)7>Q
zy6VWrEdRUq@}ssLYk!rdR+rp)hHXOzmr0fjAwWXJqZ{XDI#nz(-;&_9(nQ(h>@w#E
z64xO&@o12qe6zM5ttt)YJ_5WylQx@Oj$QTJPfqFFAg7x--h2pQmk`~}3{M#!sdqj)
zVhQGHTJRu4Q7OsZM-kplZIdkh$_UtoO3@_z+aPC&&dY4fuwjKv`wie0&mzig)c0kT
z65{Qdsb;@^9+{uI)P*mX0>k~YbyedR*q$QCXP@n!U3B!mA8%pU7{lNx%bxEww?L)k
zjL~9qO20E5N1oDKV#$<$8+e91*)1q4aHN#1m7_l;l+8eF3>w?gYyATh=tEOt9i}tQ
zGjHB%F3rs$94-N$*&0BZWNqU|Ing3xT_?0tqTdxms}^Sp1_eM)Lup{y7_#{hW-Dl=
z0ZvI&QBU|Je=-<W($|wB;ucpJ1PiQ2%Z8>V2u_E7EgEcJ*V)1QcM;Z$=3mhXosyZE
zf*S~tbQ8RTWVR&8-=y%3*Dd6~5f~*75O}aw;^gye4jr~%$>f|9yeRk9gNB8+$g{W{
z)!`V3FsYMB$SEtz5{cDjFS31Jl(~9VuT<&z3X94yQ;TS+-&PsvrZC9omWz3-d=gWf
z$>y&nK}&}cnEZ`m)2vsO5hJ_G<r6_@AZNJ2%aZFSBxp==3Av5()Q8l|uSCNgCv+fG
zuPczB8r-xzi*orvhVt{MDv$;2+e_oG9<L79$;@PF#6!$&bTH#LnI%7zEzwP*Zc4<a
z+XGW|MW2yt=_M2P>#6hG65V&w$X`)L_M%F25um0N_a6<i1=1$t$cRsBw-!O@o|8-#
z1)IqJLc1)01Cz6>k<4ZlbYmP=SALaTMhhakJbCtTING?uj&YMLn>Y8}xW!Tu=PQ?c
z=us5KO*UrmKQJ1}8vU0wvA>?%XBnM+q>$JUspPDWng9NX)kQ%9o&KSeqa|g2&ZueG
zi4*{k!0*P9w^kN=cIdw1hbe0z+^QuE;W0N^s3C5#NDx2RBv1Q**B^CN>`YXqs4|a#
zCMR{WyaknJI(2|F>T0tOD5z@)NLGI6OO(^1-1e=bfMbm<5fuM4S<-(wc=fDrmL&XP
ztUCR#YvpChhoj=7;I~0aqR?yrq;0k?4n9~+J>nBG#W7&!4wn!P7I+cMP&^2rFtyi%
zDO@AI9yA1Ijb`l9kMTP$L@!nM3GIm`jU8p;Lf67gBNV*)nEO8B5GOKN+BbBbMoj9{
zDCjfaW7v&r-O{fOB5a>U95Y|^<H0$<T+b4F7jbh`6CC^V%e8xzpq#nXhiL-PlX_Ur
z&FgLhl4Q9HW|Z%%W~j$RRFz;VvMim7;@`gAmgnZHq!7A!9FfQzq%;6BxLJn-toctd
znL7DpiYr?=T%TGH_l<^M)wctk$M~tsG}oqj7NWjKu2!s#_xq{H(Hlb=RpZAhlVBGb
zS`$zs==qorBV`E4vTUSUZu@(YA*~%ccl=yT&B34T1=zhwG1P35Q*Aj{!>mNspZT6<
zj3juar&gCk<h%Xo5?=Bi5mk_2zcGK8k<+vVZev8qK7VeJ)?nLrys$;?lJz!5IL#`}
zmMb^i0<|OE5C1JI;oC4-A#{!XI%N8Q4za!Dj!?rK#98;S^;2{R0JL#s6(f~1(~p@5
zPG}qz6y=s6t?;mU{yEuHgIv-GYe(fzo>H!=0VNE+9NL?%_2nW>`wGIK36etvk>A7V
zdi6!-+g!;g&Du>KE?1h6Ui`ce2gWPKqRO_fuZIv-jA#KZVEyyxYcu}7!K7HM8=}lx
z=jzo=eTcabmfHi#pZc8GCQfgUgOymIlCeTk?=F@l#`YBdJpQgZALfnI=bqwAv+oUj
zvpkY^;oVRBewoZ!K9u|-5&LSF@Bg6_K0oF2hcoF{x3(zEM?n}My_nra^Ck46J>srP
z2FehczjPW6g1<La$~F{0h-vq7!>~{YJzOtRr3p@Fmz=8wx#9k$pIJs|73hyQHGqj#
zP{ua2@BY?ANqvKOzk#k)FIu|CFG|teMpZ?@&m%BE;RAS*cNg-`G}PD=_(F*Xj{0?6
zt2>cJ4)D3BoDjq_oa>(8p}qwLyrsTec&)g^Z#u~D2WjhErgtlj+VG**;VoImM~oF>
zW6eL$;e<W5+&PbGfcV!y7oK~bG+pp)8Kn<wU={zTQ)G$89b1xB-XEY3y1IGpV!Be>
z6=+rG4Ss^WDc>OMU2SAdHpf^>p_3l3Nkxo;l;dQ#F9yH>k16ov*w{H?IHJ3V1@56l
zGoJd@S*1F;oB(Bp^I|`+HC3!;yw;xj(A2<$H?Y2kHtu$;E7TTSU^A;wXboi@X^zw3
zADvX&wx9?NVDJ5hW|Q{!W~-VS%Kq`g%xEh?8tNLcj=u;(`>?S*zrpY1N#rl*?frD^
z3zPh@;-ya3(}bu^W+MtYzCS>y=x4qqnT;F%ST{+=`d*{Lop2^Zi2^F~qa5Dr8p#<2
z^9Ka)trkt09qdrLq=Qkj(_uHMpbn@DB-596?WXd(hldJJscqn^r8iDLSOSx&oO=?S
zofeyZ{+$%yFVe?<l;8ZV|NS*Z^>2OW|F+Mt{#nw%e_`JLG^Z`yo3UX&W`G>W;#%ii
z->B>kD_6MAbL#m1(e#JHbXRd@=BfQ`bVJNVYj+Zi=;T`Q#flGsxSoLw-hqRIo#HB7
zUa5<JMZ5oZ>8rjhKOo@&_|nR9wCf<DM8!NR4yJA@D1gDR9_#bNn`ULJ_+Be)W%L8C
zFy55Z1T4VfjSw8GlPyLA!S0x&(WyS033|SZNgPOJQb9%ev&}kab*t-%9tMtLDVwyT
zpHffle_w@Cz-`6s_{w+r<#9~IacbbPYvU8z5lKiMj?s}+Zk#k6&Z$JhIyPS&=ocm<
z!r`;07ZB=20kbTv8ubPxE^1Rst`p|?<3`!L-~*jhrSJ#Q3I*P_Xn&;T<ePH>26Ro@
z7^f5F!6z0g5)z#oeLLIZ6!&T?RK#dxBfIVFj{A@4N&9jYWG!toK4@F4{(Y%<mUipy
z4VB5*d`0J<YiuR9EZxQwvoSB=E>~iKw-;Nx?9UQHl|1d5M?%O5^}}pTy(zGA$`)@6
z*X_exzTKqs@TC@x`I7M|&{W>el;h;P(VkBStmc`wC1+#GD8o*Jc}fR)9%VFYgEncx
zqVq^U-%-BSZ89&*KjQm#FV_@t0xx_1!tp46+LQw2q9RMgA9vwFvo^E88+!CZKA(Hg
z`W4&NIhkj4BM~<v$aPepocnE4Q!JqH6x|||hf;IEmLEhgRd)TdM&h0<i3(d=>!M@*
zq7MX~!oIoh^bpFr5+P|VT_KpK!Nt@q7+-TuzXJ1`?*Z#sJtJPlRx50I>cQm6N`=)3
zt93eb<`6v^X^~v7fa`>oyVuv%YZ+DYW9x?)a^^p4q#PzoIF8{SbuwVCI3jG4+Ku02
zsMoF))$AUp<>IkEy{cwZ|6&im&eTHW*Mmt=Y8OU5yreIC{Fs_iZTJk`KqAiJ9IW1d
z9Z1x#2UDvMqlQo_wfl0NYt=?<a;Z<Zqsl0cTuV}X+->u|vGaL)aTAWXxQ)~6pJ#O~
zZvW*Q4R6=zfT?jVQWCkOgR(Up^zk&T<)x>cTaNZk*sCw{{8zo{RgpP3w6~3p{m#x1
zA|k4E3o1%o-DOD+L=TP8AH7UrGO9^{1coHJBw0ji<LNWG0=W%e)*9nuQ`Q$+=wH@$
zN<cYPed}Pc%kgnA_8P7aVgiw_ZZ&$L)#HAjO#9pMX%VXpQ6^^3OAM7+#*-8u7E={k
z!_$|<1J@Oq1^1X4Fs$C>xSMy70AZb5CmB?{k6(Vs(dn9>xx@+5@B#v=GAhuuZUuT)
zol(MeUjPWT5K*5**7>P^)g{EZ@k0U^z)H`x74cnfRXJvHebia>+#c=GL`;WQ+J@uR
z@{xza`3CD%k2*con4BLNsIMRM{oK;6pA|7Q!3-{1IIQQJB^ZBA_L3ukjzZg1a`)qQ
z=^kQc%VvX3<K}p+KT(>)GhG{kxRrBeCfiy)ici>et9w>@CW7Ra2)~FJLU-{R>fDlF
zf=pdsBn9qEl3`oxoU;dCF@Jzwzl#yhYu3i!!62s7^iEK=0Pl!k`$zH~)WG%W`@DgQ
zCJonVdi8!+l{hB*G!|4|h>iuo&_%?Ap>DmO?>-BaRqN_});7iD%Sn78DOL!^-xSC8
z0T<l(%cm%Rk@65L-DErw=?i(?kGGwOxs{+PI40rCTnLj1bOq==5Y0Z1t5xkV)o}Be
z6=OA7ktBho7%><vHnO-5=t2;<eWoXc*yqo(5->-v>Usua)T}>Ar+L88ItlpMEj+&K
z$go(T7f8nWVlwY$s2$BwkV2!&%Ql*A$VJ&%G)Q|B7_5RTsJ=b?q@xtJXxK|VYOy0Z
z00cAMrTT}xn2X%slw(Ex7Ipg_J&#Ll7ld?>0K}!$tX=~WAQgukYOEQp)E~o3ah{Cw
zoq&nQ>}w-|%9JqRpO47L;-~8scu=~GnA~+r;=@TP2Qm*E;b>mG&(#oP=XU1Uq;RSr
zfm>+~X)y0b*-N(!HgfJ>FEtX4@W<Gz;NsWl<{Fl}Ds<c*jlXx;F?v#6Y%7DO7we-$
z*s}-mbVhR>tj`&~MA8Ik3)v$~R*-v0>(ku(mTwY}B4$~#h_n_4c=$r@<^mq9r&~lF
zeZ+HkUb!~7Hl$kS;wsk3_hD~2;+n<>i5+uqgjSn*Y#8F)u+HQKb=uR_=zi?x{oWk0
zL)ui&uo43DmWqviz%O3T&xonAVwohPex^piPzfd$>uS2YO;muUgs`_EN2@%It_l}@
zxA<!R(fUXD`}D$5kVyilQ{<w;9!kEmo3q|Zajk58K4kG+m|sI!X4rQgAOqO_kNfF=
zntt<d+lZ$&EqZ96*NU|*y3f7lRK9eq?R+~X27pe3zc`N$3{u`JAo4%kQH-RfaIZvk
znhKU6_9LGq>><HVZm8mcKg0VXy_~el9R-Wu_l~hK58&+hGspLzLtFiG&xj03-l*qT
z^4Hf}c@`PaXD84N6RJL=P8qrHou*E<bxNk4d8FKE53TV>TY-qM)a&>zN7F_M!!Ovi
zqLyq~DXU$A)8_!zsHa8<G*)85P2Nt$X6RVzJNK191wF0|xQX%o!126Wp9HT*w?DdT
z7P`&zBR;RtT#*#v;i_dt3Vw?xdN??9@^IR@JU~!hCjUmYa}DD%ocHL_Z-=4%RrF)N
z2gQQ>R;D`I=XH3w4zbJGKR{g_Q1(YK;J^>h07_y+nXhj59YG5X$xKM=X<`=pShVR<
zGx1UbnFWIQTY$wE;<>JL2dCw~F5>^70CJJpI1Lk#*+hBpHv7IdMDwG@&dxOy<CAB!
z>??PBFcT^9h`kc@ynE^<+2MJXAYD^PPGh|c{Ng&-XL>DIvVd5_0GN2U>7EpgoNb21
zb)KfKKJo>1_$%t|?G0(rAZ?acy#$3IVJAB}Z83UEYSf~wvuAOkoTEaK?H;L9x*u?N
z+jQ*Fp+oqIH-lNX^_)O~yk3&bOoKa!_;s<0tV%8j=eGu3xAueRI17-K^l%WjJet!-
zY@o3xBWyWI;Bl}>D{7tV>G+_HQwLZ<4;O#TlM&VsqEfr8F=1nR!!CNg?eO3RfN7Eg
zv@NME*^B2Fe4`0oAdE&#loTzZu*UZZP36K)gXrRoWA&yMz65|8V`(#kPu1pD(V;Pg
zuGfozzKN3)J=!2G%IG{1F|8?-sl^Wj6;WZS+IL6&DT%1t-$@B20d1H0VOd^z4INe5
zkaRR8sHZAj<oASNs_T8a=88$Lb6Tx`u!7#dgAM6qC|=#I*vXm7YpY||TQCi5IbYEr
znl}TC#zB@R;2ljFnWcOcr7MGGjHz^ujoR28S7)7+fygx?oX~bTXn6%&l~vK9CYK`V
zsQlNNt+>j*Jvh87^2p}~DS+)|CKW(PH!^k>{{UH+MI5s3vF6YGWO*Nz`623<%5+QQ
z0zpLM^nnEns2YuXC;_0bFMoh;N~UIfI5$?D)v~mw0H$cPd+8W%Gt#Id8oX2HOcIcO
zop)qiz*hp_h5+AA1$Dv1yIavV5y!2sNy=^l<4Ndh8kDNjO}|H=3Ji{|BG)+hQO;Xa
z;3|PvtVtr5O6{$N*Ovtbh4p{N4UKZv6Cq4C+t}E63qy`sX=FcyzoNCwQCD*WAq0T4
zi}w)*PW><~J?;#$a>q)nSEW<s!U$0nUCt7}3?PBgYS1y67JV2eReRzgh4-2o{7!g%
zM{(-$C}O(gX+LqfG~F>3yrKauCX%%-f2<>+jH{v)k2~+f_aB?&Uw(W)6!ZP(`yaCF
zd%=l)a=ah?kS*!Q?gfr?))w54&-|t{4b~LeG&qkVu7KqjV)1)!nd<+^?m-?iw>T}*
z=+mdEie!^05DgWTT^L#UFF!Ihq1evH=PI)&m$z;T8p3H4)a@yb*@%gP(JsVx!9Nlt
zI*tvh0Tqvt!j3gmiFjRogF}zHcx;VX@Az#;m+SY>+jA<6BN*4CU;K}D4j?iRXdg-L
ze`?zM|Fbyoe^<n=c2lsBo1h}p>0Rb?;zq*aE`jf_WRX^|(nQ)?3tKeoL1Xq-khUsY
zoP7mrdE<}W4Nrva5(`LHZW8A-hOn`&n8rw<$9&}a5g!MfZDd}n%xZ+?F5dXd)J(8M
z*6=y8$m;EUdOsZp8axG-yG-Zb4!r(%<!Qg)^Z%z2z`tt&{ttf-)?Y&R{^4(BPKHI?
zB-r5`i!i~jE)SB4c`_lj*v}`}a+21SJAwrTL7M@*ysZZ0XbLH+4WXWsEPf}e+$>RA
z0UV{5cbbB^H=Cj+?;{oUNwsRYP9863(t1&kRo1u@1JjMX5|G?deG{x^v9+Yn-qyjs
zf7mfT<9HucO(7>bqW4=fRE$vxOAb{mt!VAGGse~~)v?*BJ4xhurl^equks!H)!c%;
znkTY<^_NeL8y>c2cc-H8mt**)QXuD53YZEcRNGjeU!{_h(RNLj2X#T9EBR2yN$paJ
zR3`thBA+j|bon!*$h{4JGC~VN-1>T8I^`>r^4SPb-<(_i`1g==9sSjy4UuMncW39B
z?lIkDfVx;YTf6ynq?xBN?rX{v?`J`;12EUiW;{E@vhOr@@ZfspA*;fV>O48Q)=)xs
zB_SI=S>)w2QwAb|_3P5r7qC|lFWL7OE7E^}M$uhNY^Oq&V#tcu(_u&Yfb`tw)FNNP
zdz)YNOd`SJ3&U^ppv`rjz8;x@UV9Nk(CdiA9<CwWgQldw@Ym*d3)vcjeR%aM!V!mn
zyehv2EZs}7z_oAE&E^~5)7|34PS`H!5!O$<H(6IZVr1z~e_O^@Az{>}pDu0Xd0Ocb
zxI7D}s)l9TL{==cZY~&TeT?hhS?w~4TSsVMb~A9qDiTYIwc3(BxeUz3pw#&El|;No
z-g404Y;X7cS>IF)mLGG?w*WzZ?3)}BQCh}3`&|D$WWlOBtFGgIOf{$?<g$#Y)-6R!
zXuh^d`Gr;6=md_-S&+M@OvWA&+VX5G9^BxKe{cLv(5}4M<fZU)5{ETk>v4cSil0`z
zSD3Fc+hRu%StORZC{_VcetB-kvT7N#kq!nU#yo{K13Ijfdb7x3#tx~IhZo3bI}1JL
zDVg_R0rC;ajYnS}Njtiq^Q-fp*35_Y(J_}1k~FqsyzXWn0U6mv$fR(qk!82c%r|=J
z*kjy9F5QR5+m<e;nyuiqi1tl(+`=%Q=sWiu|GiUyen{%xuux6u@31lox$L|sPmTZ$
z)z1)Y$&xhGHAWFdwY`bQZy-WE=FIef1qi410|@O%8W_@W@=*SvJ3Y@@+ucmI^fo#(
zzv`IM+atzoOm<yZo)_J{7GQh^_{ytx`MC->1N-EPT*48{bG(5$^J%*^O|5W2x9(2F
z`Fm}hqhJ?ZK(`j4!Cu0UYEqa@yp3sm)7Y~)gdH)}brS@Sj<osO_+pN<NW`ziDsZ@J
z3@sBzX4iHGR(35&6$M0@Yk@@qh(k#t!rKm0wP=5U#Qh&2OSOF?U+(YyPF(uSvGNV7
zXJ${ONd{`0J`pV{RS)6F$EJw0CwneCb)5FvOh}N025w%=ol=armZ|jQnJaN!&1C;3
zjB=9^>`{3k9K5&j<B5IKZi75=Y136gI#N?(^-Yi*p}0@RNk1PCE~(5qS88fa*+w_5
zBSa?23MW;yVe18$7V{~n<i_975L;KyxsJb%nt$QP9Rfl<Xh*$~CbO-!T?*t>c{3_l
zTm8-7^l;$K2-c#(L~s31*_GXkd8ckyLHrzmD_R(qK<JHj{|88*vNyU}QPW<~u@!EN
z;<tn>v4&1j+a#f;uI`s$8u54@DK_NghXU>alNhicGob{`%jP=Pyh)?jScRM6tGD&|
zZ)z`b44O-vDBMMPTLftHvYkZcn_+FR<z4NDI>zpz4$lNxOyKkuO+(p;t`>>n7dO6|
zrX~c36T5F*Bj;qkonybf4OnFV?k6fGY)O^_6JL`Z4*mLZ9`ps`pP`4MM=wR}NRJHX
z+vo3^vP3$sg3ULVLYLZy4=OJ)rpV@Oq(ClhFJL4P&let*?d|80=vqrPJMLR4XyatC
zBTd`d_r7^-<#qL($kP7QmJxawjkoVC;`+%jA2fgy_JAb?yMiC=*WB9~mbU=)mEBGU
zWfX<<HD{;2GdNr9^NL3w=e}n92RjD!Ryu^+WcO$hy}jt4UQy1>Ro?=*Y8BM1yi^a%
zY+RhNU!1Xi;~MpdYru(-BObf}r4|E8IyS>6s$|!ywyLu)Y)ktj$N;DJwHw9dIH0%-
zG`;{-OYDHQp#G-T3+F9aHIMO6BXguJt7+nk2fxX$e`iek_;_%!g+~bQC;BSuQk3D#
z*DNw=$omImJEP8%`_w!Ty=M7Bwumm5)+);rOBS$09)^abgV;Fxc10B-`162mx;JkT
zT`;dz)+Ls#CImhV2}@_Px%BObN(<uG7F!<`9I%$A>6XToL~rZ%n5Lgn8A%HcjB2@q
z?lgc9G<xIRW3`F*b+YTJdyt9K*M6K}1pv<~bB^U(0QrUm<<;61!<BSdmhj-c(gNkL
z6{qHrgX+ifqy=Em*PldQwNXU8)akl=c-!k_0+*43AZSz=;7!t)f0(N_nW(O-n`X2z
z7Vm-+;Z&K_8FluoPo{`!7Feo&O9ReW{nmTb1EI2b6vN?`8cXUeGJBKhy+Fme2ZL0k
zP3^j*5<|i$xm|6Y8dy{UU$SbzvT5uqQJV^2sd!oSU03L8U2NQ(@!z*ZLZT8gw$V}g
z1&N0+J1E~4Z=B*3ep0cJN4v+OE$Y2&me6(glvt7I7fbprtgL#o$O9--`&V5$`@q19
zNNH*Iy7qs7xC=U`>)*NPWKzLUZ@d`uZP1Qj%=o=g$nJXRJ3*Onzie{81#n(u7+EhG
zP0vmqPLr=SHh${m|LJs%C9)N((>wlPLDA^LqA54?JzQngY$vyyki0&V%NzB#vixg9
zn3pACxcQ<+v5x^K!&fuwsXi{*MQud{rvy=y%d+Oj_qIHRGYYlC8#so8q15LY^XtGu
zjnUNe7XNk%3c!Fu=v^z@E6?6W8S2&`)mwbVm_~Ow(Eb6L6sXPlMoy_=Y_8=uUKeH8
zW{ms?h=<IUc+kG9`7I*?LJ!H^4Il`aT-Dl!D>jnMLA(NCU@09SbGs}hp)XI}%}kAQ
zpW_Q$Ncq_m)yv5AHa%zCS)k0T*xv#(*Niu0^pwffa?J$K?mLI$Pf74H38-GmSqy#B
zZ3XRJqU>zB28^}9v^tIU=WGLoRtgoy(EzPefH+0d{|4~eKiNs6;nBaRdjE-PyhbZW
zQ{TWU*8^xxPP^7|qV)C`^K+Fn$;kIpQ)<|xgXFy_Uvl!+>i5nD5sR0mqnu^~`2Jd{
z_`iM>Z<*@NB4{CAzfT5QPf%<gi#-qwwa&E<%c+Cs>wJq4Cig4&pdP&r&mfEY2Y03k
zogOtmI-PUK1CDBn%>>BeFr7v@Fx>CjstPRmaII5&yL5G>uJc?1$0WciN=AY_aQRXY
z6Tk6HI6{95G%u4;kqy*ae?7uAK$d>)$3bdFX!Qro;{4%yf;qG{!RI-Y!`Kk2Gc0f_
zs8x!Ft0dVNgg~1Kf^V6Vxv5=W@itV#I8c@69WLMhDD6{sHuEKJ^^1yRHkSOie_9Xu
zKXsE4qUz?@3t-24VP7g1jHWgkl))prfWBGcL4ZK{=non|B+RYR)zru#Cy?H$X8X-A
ziQB4}Y*^~tza7Q-mrc3R4RQSKFbH;9grG8h&pCNJOt3qV>XG(N_9$*DxS`-)eM$}J
z@%JaD8J4okmzfe1DOym3xJJZ>MxvwHRweEB>M5SmwBuLmK`6P#Cwr*e9fQ{l_-Epw
zJY#VDovo@WZ80;o>e`<NUm&Y}IXT03`FVoYLGC%`1-eH*xf<=7Idw3PL7^edlp|td
zykvpES6##sLJSSQmett3T|7}o{IV4<zwRdNk|27g`7@J$y4=k$i~=KJaf7}x*sk@!
zxQ{jimm`vIEG=YZD&2KE;wOaMErUS~V1M$O@Yd{`2)VqCS}@{kkR8~|Sq39+1KL@g
zvp(7z6<9*OWruuM2?EQ-v;l$!U(x0O(jNx9=Hz9wq;H;HuLm5hXFP}7z#*3q&VeD<
zgVb#9tdF-_9*Y-Wrt5q>$+?EaF{2SB%J=Umqz$BUr<qP#Yv`9&A0|bt3at1Y>vBaD
zSQB(Yqhx<3I9Otr#0N%UWXA|(V|l^+V=ji6Nt-diK#hKunvzLx05zzPrO;|)U;uP|
ztV4<6BUuC~?9?DRDN$Zt_FTGRo4KWTB_g|eZzyK<!-p8S^)oVV0jlVWp))OgOn!4$
zWqHwI#7oTuV(;8e4eFun@2i?_JWNa+xJ0f(KZqDTOnjw}PjNhM^aQA=8am}E#Uc|2
zbIQxL)weDvd77N<*IF+H#QIvMHVwmD{5R(5TA(u_)dyEsPp;Q44dxnyNE)I)oslse
zB}ZH-Fd{pgR;~Lx+mtdbC>_qehMzmnk8TBUH<SA5?U5e=Oq*~XQp#u6spKoa-mmE`
zCgLHrxg)L{d9OdI!L33xp7zwCeMuz-*i$Rox+5<TKrQj5oFGR*-tBq{o|{;UEniCM
z*<W_9M;MBST3cH43M}!xQ3;)&h?g5~CCi643d4C=HM{OKQVI>EL}xOytgCwp_g+<%
zq{o`C-_NDq-1lYTH^TNlmX}>J(ncyaG~KI~ni((CAUILvQT_}N&Fb^o&DmJPTs}-y
zPdp^Fsx=PcH6Id2LJUFNa7m156*w}G=Z|P(gz&$gEN`*2WwUkD{a9yd*NNLP>E*~B
zQ<zNtgA2n;{F4~<jl(NwCCqr3oz={N35#a;-0ZeZ*$b=5S?Qe@%;3d_zKbNx;nVSh
z84N+I?*7%=(t-oao;<`#lwlB!yQ5zN3j_r6)znLF-bk4hk<2-g7YKI^H;<1if0t=(
z3nGg$sqi|o34-zE0{ynDyZO9a1nQbnxAjZPcT8mi^#NTWiU(z|_rD-i01mVEuK@>M
z3<2dC{yAi@<#4<VMVH$8mQRH21GB4-5i|}O2aAeyPA+q_)1Z&?9APA_1Q|E<3$DcW
zN5K3S3A>U>^zJ-9@ic3d&F<tyYz;}|<d0sOgN&-pekd{E=fK$en$RFva?t*;Uv|TH
zK-UHonsC%5Fl9%8(c9s-QRp#BDvStCQ~s7%NQYk0ZE5)BIi$-!<C#U?+o5kWjxK0z
zF;w!NsBcrEixD1V;!c#E)aq(k%xqIzBu(h&7z@bG2iIF3%BW^KMO@y2m@{EA;TIJs
zmS;9z-5fz&r5{N%zQ%yw_lv7A=alG#rWGE}mYJ03_PQag*SJu*3J4No!~^XDDH!bT
z-$3YhH-(*SigMz+kS)Ifm#Pf5c~26lYcPvGU>|y|mH4_n=45jhyFk)`1pieB2V!%f
z02M41Dh{t@<%fSW;*xzh{tB9x*pKk8q>H+}HB>_Pq;3X8u~15q#|*33v<j9D`7mV%
z(k_n&d4yx4Rh!Ud0XTr{&i>VGtegi@m`t|;zwYc*vbTDrRDyYvniR^yZ9Q{kN9U+)
zxs2{(whw-R9&c@so;BSwWZBOwo@9=rlH$so`!<0$2coedJVTVo5bH%|H17_JCTzyn
zj8P#`f(Wdma<mDu_<O4)F#ZP`_m{S981*e*urfCfU(&^gANrZSkomC%5Phco&~6$x
z3`z9?5#fceb`G+19~zX=LWqTl<q-ns*RT7SC)wHUc#gE$9d=}?LgYmfuuHN%Xy-a~
zJf|xj{0nVJ3}O}yj+V@^yJDy{R<pNgc2t*Lj}}{lw+hPq%OF`YH=btfN%%vrCMh?p
zEE0!eJ${%7TwmeAO&?~}$OPM-kYI?d9^R_`0V-418h_-ETT6|Y85a|Rh#O9`Toiq1
z4&6O_k6PN7ubvh%#GLtTEAVnF^{|FARP5O}&qY<(yZS~m{R-Y;IhG&Kr>i+ceUZVn
zP9%n%-uld&{10@=KY9t$o_mh#P{$3ucxN)sXsl&2ZxR8z@>LW*a9t{@86;q?50wgB
zHq%haqv6;Kn5PF~;93`kc=y%hAq88+n(B-_`PWRzJsFZLg{@=;x-{ZnqDtS7gjN$b
zj>QyIwHxJaAa~7~h2wBT&*wpM6{)rC1dypM@{3e(&^KRkDb>2ogC~`trzOm<<TNUB
zCr;xC6jw{HX8iaMoF2r(afP*wz3b}WKF~9;zEl<f72w5piV6yfwZaHDR;#zhaI9YG
z*2KQ5B`v2$`j~%B-YbEKI~0PLm6EsyE{3vqA7gd6Q(8==j5Nk!i5+yD(~4#ND*BZR
z8{BR5|3pc6k@c)`lE;lCxw7B5>+4HW3h)=rgw;=iqOe|&>P<?n4<EEYh|TWg;z*W!
z!}8axz_g-y|A2bHT4$#@Eo@G(Gi9ATG{<Zm(PsT;JwKNM$bJI5ghLdK8RsZ{j(Ni&
z1h|p9<b}od3hG#qV&F$Q1<t$)aEuV&0}2s=9^e^2Y8#UJ&=yLORpMGJ2h1UF*HEq}
zgA;y4Wxvhf7|wKy$pDeD&H5-_a&NWR4Ugbw$tU~2TH~=$3KJ<d#^Hv;D!0ldHpS3y
z+|k%0ypn2giN8&3X1;hy=_PT5LqiUC`|0Yk+@ur8D-8n_cE5@6imt4T>gYCq+C?uC
zGv=-jP#W$Rfn^3Y#RiaM{#a#fcbi)E_LWU;Xx*ko4_qVeo7De;(Kz;coCG+M{KNat
zzl|+JM`;na!wR>6aSiO+M5@)A3d|Gmv7l@T?xn7kv(2(PJUu^J$mjMqCq>bq(z+FM
zbZ4)1iC%I_Kyv>K(&Wy8)JwzgpYjV1VvhL|`Qx35Q3LM3LKrF-ZFDv2+ncBCS!<fJ
zs#at}jMK9dhfF5wGO@M{6rgj;Faen1rw8FpQJMQU2kjOMRN>T!1H|E70lq?slL4ZJ
z8nZkjsDriS1{<tp<0UmQj@eZpKhxPHAAnyZscn4*(K3|u-I;#_`#zhgC1z6XQ?ctt
z>MNw_1fcRDr*Ef`);_gJ^D;{kBQNXHdNSlJw?z5Xz#ZUe_NhMT&-%Rfckz0n2Ac5s
zt@2aPE(*UEG$Tikj(NNl`2&P<`b!+6#jG4q!3}wAY?9a4nP`cJ#D}Ujn9-h%A}gZM
zfIK>Y=)0h&8R<s_ap+{XqiAP|Ps`pFjZYbPXCZJft@#O}=ny5wW%sI&)s3~R=BUxg
zrW@BY;q4bV_#&eBouXpb8v9~FQ#_sqV~SZlLk`|0^L;Xl$s8Ofe9iRInvQ)=QFoZ1
z;f5hDoBhE)>ZLN@_Rrs5bA_5=JZJ2J?@#>2JZaM{LIjQwW4sCO{b*k|IpaF?ddX3|
zg$9QBNYOg_U&`6U|CspCPKT)18g=!vr&iv$O6b8h{nTO76d{U|lsf%Ns1O}yL7tnM
zG`hN^;6ar7DCu9VXl%>4!nn-YKyIHi4k($3UVLI<&Ha%md_0S%mFA(BO{(Y(({?vN
zfx4$T*1HRde&&9;TwY0KCRXAUf1{}iq{w&mAwIn2qbWk8X?7k==*%Oy)o=$1+_Cma
ztN<ZEFn{^6`rQ=Eu?1DdS9F}d8&T;UWhY%%WiY#VkPo>VWHDqk){L$jkY9zz?YR~t
z%W@g9v1;Or&6S2(UTIv(y%=I`k;66?jtc;+2@mdT;eL~JB3;j-Ssve*>yzmSE+QAd
zNEJfj;KNv%WHH30dvUl_tfFAiU?8&AU=wA>sMQgzn98CSXwF7VVi9&hj-PSiD>gfr
z2NJ9gp=JR~xWlE6C3cFTr%I>VVJUhzYkw{KK$|#7o3x#%FNy~j%^oHqDwai&$d_h$
zrChgvyofR^yL?upX(^H^abg_=q3LuI8X^cQWX?hVURN1}vM;&Ri9Zd9db`{ehO@Xv
z>%N;PSJ6n*@T3Wed#TRJcHWYVi0<0KJFY=cxN(G`t2*W8{cY2ibvpqg4{12EwjP}<
z%9jzr{a=U<SOPa?8@fQ-SgyTyQ@OaS(y$Txypo-XN@=YCu8=VLmaGk|&SO=nS<)Uc
zjyf%~T7jB?Cb{$Y;k;aqTQ9~@moXyZJ3pWqd<E7ta8^RFx~|st^r%NBH06@QwWU+U
z<R2)c3{e5JaigH(4|j*+*N5y+q7oGMQV_xi9*e*~KznoV{{XeW_lF%`uAxH`6=C*~
zn63GqL94!<Zz5|0rI{o_@zd_~0b-DwerR<s1r1E+$dBj;A?LD0kjri_F>Zn8lMdG&
zBVl2_w@h~|!V-kAyXs)eW>q1aZHKb=1v%W);RR**p@PSSGC1PF_{PR_D^1wunE12d
zeAs)^j8Yc6AWhLPIA?nZiK*wLq(Iuj*0thVbex|!q+Ci{j8KUXc;6@BP)s{F2WSxl
z+<+x-dRD2!v|;7h6WV<hyE-+WR?T-_NDs8TbI1ygLiz-Y{5X9$XCD-bbRoH2{ce{n
zHl)2R(mgZ;GSM+0Be_x8ea<n%9L8*{g!Nf}%TwlGayBpBwniXED}R7|`G5Mg%;h}r
zK$T)@9<BZHA!{n0_6#5USX;X3`LX?ce1h%8)ZfK;#U!2ZD1)PX)NU+FeO@(IdCTvY
zeW;cprPZ))j!*0-2UUpG+z@V+q9E1Ah7aSgImYu0<cc~Xz!|CmU=PuC%CI(CXxvP}
zbghf8lP_-GyS`UA(|p8{xZY7FwVn#?zfL#3<pVuz%3q`Zl*6SSD1UYo3g*28ZX(Z?
z*ARXDoroj+@kKs)m5Thr6eJ_4C|r-~Mt4kRuX=SNs))?xSIY{JLECYjD1l}-{4j+)
zo3am<m7Yp=U*WlNJviqj>^cIRg}tZ6(b|Y?C_pVuspsnV3N5f>f{9R^Q#z$Dk^;-{
z$7HUeHj96IU;7jO_HS4|rZX0H-s;OnXn(@%rNv&i-IIY2G+f7h0yGX~u5L*b8F~F>
z^2L26yw9k#R_e+7@QpQ@(@oqRopgS4r$zZyXKKFMx<*T0G^a4v?O@BQ0qqnMgkppu
z&d}IDim_D!nZAu7u>$$X!D}b#MT|+?4VSO7>)x8Jz(sFLR0{(*s)d(7%z?qOW#!|$
zY~1pX*h>&ZTtPBNNUu|qO!prk5~*U~T)N8E)D$`@<>Y|s$xPoJNBg49Oq2-QS$M8u
zLV|*vy-VNZ?KGRc2v$W~l9k7c_+ta4uQ+MNulfVWF}Xm8Hic6n_?x1_-E)iDFxKJv
z>!|N3Ww==kR>aSUO*+Xa)0T}}nIb^Y473gQhzC(_(Xhzf7{PL}2_e>Rv7Dp)&zm%e
zjYP2~TSuiVqT`8jj3hoxKVO3u0%Pz9^r3WEkA`#AGUsPA^~}#m`4GQ+Ny9({!Ys^p
z=lOFBg8$$a<NfoWQU0?4?FRAZmWy>cn^_o#1dKYfT{dA&!d(Fr9K~k|XfFGh%q`!`
zoG!myM1B<!iyQPSdiPAsr|!e(Y^UnLy0hR>FUWdA@3$_uBw;vDr%#-M4HIVsgh5&+
z!VJ{p(_xPw4srBrMEKWwL}5z%gKp>#EgBjoYz^d|nVZ0yCJr*h6t)7|@2E!|{{I1B
zX@B;FzW~gp#RGFYT#SF^ck$CM{}4C#<m6<60}B&WNyOLIG9(|RC<3KH_<%zAfBgpY
zuisH@tvOVnrBo!MGF(X`YZBry&?zmeegfQOX4H+dzkv1bg==r|%@J@cF$MW_z?wN}
zO^7<T((g45_1Zt0!B|I==Q3Zu3EkYZNM^(~>O?RxwfAn=V^||EE&AVZ^EdUMr;`7b
zf5SJlKHu02_7U`J$_y{0tNmH5e0cUel!yks$K~QT)qndza@_$HQm`xB>etD(6E0F?
zZlOX*yS4LSdA4jfByVkhulw5lh%P+Z_)Lz<_MIIuSj5Pq%!?5r6%Y`6;NK7zf5JK9
zt+`hm<G10qUjW+YWv0a{>5MhqxUDy@7#&>;S&PDw%(?9yeR?}F0y-=L7k^iK5qc<y
z#(h-n$P3!7>)*e5GG^vx#0cU(i=UsqqX^7{*!Ng9(KXc7q3cPbaV)PW4a@ERF2pfQ
z0k3VxvVK;wzi6$y+(*O`m>x%D1Ra*4{(ckOv5E&%820db4ALLM^|Txw&0~}6|JmJG
z{sXA;HcYrSw^Hr%V>8cx`G}0GC{hbT8g=QQX#9SPc_ZHK_x$`1&;~`du2RjfbN6b#
z<F8BC;Rg*Dm%mHIn<q~c5a6y(ON^>{DnWjWdT5U@j{G3H5U#2ZY&}2s!2b>MZG`91
zYr#G{Mv^I)03?7Uc`GYQ^yJnE^qt-JLXO{P#wplO-dvpLbRuHuc5xHuWJv9u^rM9E
zznbo=&N~=HnqLV=5Tt>;Xkd|{y3;fRFb{rIQ2r1RRTuKrcB2&Mo=hEX?qakAr?biv
z8x4Ilxil#X^5hj=CG4{H1iAjt*-YgwnINDWa|Q6|69v>TRD_v!^2Tq_j%1xNu2k3_
zE!}t1%X9|Vyu-v$XLvPdPcNpEFIP;h<OiOab$~+=qA+KcfEKl0Krpya!qYCus?cSf
z#u=bfGwuL`io{=nb16sj#xF2vy4`6MCRlrGD~&ffv_p3f{fw1(tAO%ulT!C7^1jZO
zS=S6fs%FwEDhpZ%M1G`C+mvssH?fc)hP^7i;-6Slj@MPHs<~f+(&Nu6PxTc5q{JP4
z3OxWGMmg`m2nf*i6cjj1at|4Gi9&Z9F^70}`Vm3DC(3W!)797ne0%l%xPi?HSe?Y*
zAdB8ZLo5-}@75#bheFN@UcaKu*{KAhkW>RY9P!+bbxHowVCosO(e2Q>7x_bN+pArL
z-D>OEOy~1IEzcwFgo}oJIPk}8A^dh&Tz24PE$K<@{3Fy^(or2)pBJAFenTLKAi<pm
zgfO)#rA;)vW(eMQ+u@%Q475Jqg=b^0kmDc2fdTC;^k4ZWZhH#adO`rz5qHj0p!^SJ
z2FkC95Z$ioA(xGN{F($alpXCQ=ioSwVHus$VjmHxeN`!6bI}1Hef)#-OH05m2fiW#
zT*v#RTKCV&$b8Kk$D}>8h6_zQO)I)#xVFX}_Id%%c@(?Re|IppKzudNg5GyFX;kt)
z3M<MwBxUsEWWC5k<m@zV2^DRgX~(`{@1|f%Aoh_2)XnXQq_s2LL;L&=$Q_xBfJ)&X
zph}+DUrIMJVx!{K4_P#WLncFge0#!Gv0ftpN+L*vbwhin5&9UhD982D`8yyFJV0bl
zQ|6|o48||<5rP`jmbBHESQU$IhoHUH?xX{$uXB(gA>`ZJv{6j7D40E_mFt$xG&s@s
zu^{gZfSDvuY|JA--QyyFZyAhSJR&dH86~`CrcS16gp<1s7sc~ak2ZrIb$Yp%Hyy{M
z#v7E@1SyZjjb-#BRgEfHA6xzh-UtoaACVVoh^@`Kq#KHpY@wf1mDMDphdQP3Ts{`q
zgFbpv!FP1nwWOzezH5HFC)kH6v|9^2s@eU70<90RNZ#wdVz#3FiSx6%ZU;>D;Y&=d
zp~j8x`ih6D&}dm9?X`uyDa(3B01_}5xC4D4lS(Yx*HQZPi}c;>@$Bxx7fxwf^x^?Q
z6|5uWeF~s?IlK9tU$AXmrQD08czJBSfV_vE%m{I&<!=E3%}QaJ7?rc&PpmxKF*mZ4
zx1HMz-g{1+wl0Z_<&ZGI31Dh*Lz`%H%{Go*T*F5-eB9TvM}fbcAQm$)Pj^48J8mgt
zYM2!+j727SJyb8T8S(k*j|cJJ5ng9kz7@Wcl`cu7-ps;SlWJ?%WLZ-i@L8R??h2%v
zFlAp&?J1SbU5T7Lmi4$?gb*VE{Pw>$6rE{hWQxFz*5_SU4uwFY4;#b;Vr{%CqybbQ
zQ19oSefRzcL-}38O@$Exo5$uD%@ZyuZ}UA6TA5wAj@P2sTw|JL4Bw8|j$f3rQdts=
zBUa(?P?IHy)D18v$aM~$>eMKM835-&0EB<%xb*i4o>{RJ?=&+D<2HgRxhXbt=EQ)I
z+LO|+I(xC**vN!EFns7<A$e9w_B*1Q9(fTYZVDq$@ix0P)|sQf4@Y2f##9Twn{mLp
zF_DDU)+DDQl`Qr$-^P>-chrH#hLt|(UMPcc9H4uh8mSF)Wf#-(8!yw}QM|j2a@HlS
z8Wj6d#*PC4whxvw@r@VNy6x2tXZ74{D>o~^($u*92f%2cdb}nLgAC#~xnfd<Q)3`Y
z8y%a`O}m$#c^R7x!Rj&~Z1H|X?qs!q48J=Z?a6uZ>Gn%Zi8#~IK7a$$1B`7A*N|LE
zcPe3b8xTq5E?#T}|LkzmZJG_shlDBlvTFzE>B_3rn8rO*jsF=3;RWDwMiBpQ60DH2
zyqa-VjoFum!)_YpNR`Nb#@?tuBY^|KhuN`D-=^Z-5V-X`JGaMDd#Y^h`+<gv5y!wL
z>(V2J3k4|SL=TeY^)~ieeZ*hglmFj9p!9aK=JB~wr2OP2wyp$942K-*GulhqOWvIS
z36A~iE-L?(zyI5Twtpi>`d^;0{+pk{`ftI%o@Ygf5jVpaLducToScw$QfqtA5I*B_
z#Wr-fEpvh2k$DGik;LQ{w0ER2r`a6sHYK>pcBFsRIi=HohCt83&!OkqFSrZzIQrHw
z1_!bBN4>FMQ*3Tb-QDR(c29jxzo0F>^U8~gCh+FEy!>#*{@AS6-Bde1?gytPTqxIf
zK}2`aBKThXbN}vEMu^QOMeD|j!4qsd2#aeXKmC`tCRH*`fWD;R7mDTvqH@(P3LG(@
zf4a=3OYx!8e4mAa_~VP<rdMdEaq;ICs6tqqr$HdG=5K98(2?@UO&f1=+-{ATJQ&ht
z>yRBGDeJdPI*({wXM3i@mL!wdouj+EoAinhF}z-{tHf2ElW8_%<4M~P=bsgEo{I~j
zH^*@jVxPXX!0)XepM(`#X|7_-up>Gdyt<CpQ#z}$T#Ag&krb6w5gfC=zWaa1ps^y4
z=g~dq9L6ciIlPz4EY5GsepJnR!PL9@_{Z`NQ{b+rhfkT`>U=yjiK$HQY4BEmhEF^G
z2{xW!zkMZq*6j+@ulE@<=gD9DD&43Pc;tO?vYq6qmB-(6B)oEbXL-Q%nLJmGB-`SZ
zSyF57EnjK;;?|mJAC7<Pzq`x1X<x$aY3FMM_<aJ{FPns)16~nl!k#bW|0rJ|F7{@I
z*{`Ao;2DrIlVaJelR|QD>hKkBm-{0%`C)kD*64n&SK`2XTlTh^ZV6<mEBw5$(NpX8
ziHEc8W*iSZ$H0DJO86l;;4OzO)sbz-efD4dTD<1|oxMByB)_=|*tosjXLdrpNpy3+
z-+zYKKQ<S)0{4g89rwHrT)>w2*=OyZli#0~$ItkBXo3dwmPMp5!N9oOi%MoN6TO6*
zyrC9wF|sf3@;;GlGvMyZm%;xuXJ>D}vLN>5_VcANdI!pR*RZQ3XMKIPZz6DKl=i8w
zdzBwu`L*=OF1f9V`6*{-Bv0bmwSKWs@8tLMPi;EZ{&By}k7i&99{{e?JNANU`7wT`
zAI(REZf4Kpce>S+sXOOjtg>#d(FV(OEd!&&lIPDJ_j>idgxCM@e7^dJk{{O2ijO&-
z`z=wjw8L<Mpw7%3o~PXj6K8#qPTk?1G418mXB>;%0=Gtf{48L3KpnLI<EgCp;d-Gj
zu~x@8Y?Slzzr0XyyZ0l`WzzCn=6N|fyFD2<aGtYH+I7=L`0?h2Kgy46JICMo#bu&O
z*E2_jdFE--QVvIiH$1l9C4535{DKK^Ozw8Jy~Nhtx1;mlWakRqRShn-*|g;81er~X
zn&;1`u0PNo^)0LIVS09@*wv}38i&jhcRD9Gxl1y9TQ6rNf9%i4N4(bm85n`HAh8Tp
z^LEND*6XYln{Zn{?pElwq{7ZFfp+y}(xRXD-rat5t^47}hoio?zBRRO?>-g%^J(Wz
zXZ}6+?eE9@ec-NaVf$Cv^^%?L!Bx>`*S;<jzNy`8Qh2Z}^&R7l9d<weyuJ$Da`NS^
z{n39MKkAofTInvSI=3{3$)nexlWAj`b>z1vYEF&E-0iL&t+CnsW8uTIUeE3UkA(ZE
zx@_eN+q*}Oy;4dPdR%rjJn6Q>gXH;nTfYDY{66nLvYx-j;rgD`^y17VYAYALyq(f@
z_j#Ab(<wb0Cm6R`*c2Hy_O(3scJ%M+e#D!8<kzwZ8*R2}%C4P%>#?E9#!U~HWVg-9
zvsB=_xxQ>sgPre(^R0DCKdO$MyOqx#9d|ad=zD<Q_d?~>``6v9WZqyY`YxX5O-<<X
zACbCyQc|oMW`!l|tTxU~JN4((8S4+gU6-r_e}e$hEe4GLiCh&&=X!S_+X4LlZvp@U
Cb@DR+

literal 51256
zcmeFZWl&t*)&>ZK1P=~@gy4|iG!R^Z6N0<DySpV20)$|}-5r7jm*DR1gamhYW;gG>
z?|Z**=Ev03)YSaARCRTy`<%VcS$plZ*7L0ABuG|T3=NqO83qOhO+s8)9tH*;0s{m0
z6!8i8=G{;Fei#^3elsB<SqUK_5?Om2V>3%57#Q)OXcYuig&w>VjmYruVZm4|ckC=|
zfp>`Xz9(eUh!VcC`l#@@9%_oVyZW%t>u55jVaw7!CySKn>CnA;wXOfNq(K5!>pXgY
z;CvwY{?1{I-()bk_WUW#7Im$@-OB=4fu=lJEMmO&XUPejBu<ZT!eMc1U{n}N1^R<S
zL*c%o{_tVkw}um*)OPr0c3W}Zj3?EU=8FJ>g<|)l**6ybkQC<NU5ctaB8*(8#(M^w
zKBgi84Y)`CuIZ>IuWhqYO<o=F)k^8}JpY9UqnM{zLV*HPMH_B{b6B4CNu9WTkgQ1-
z-k0l<{{78Okmu(y(jNhD^gkP2^?xCDEWgW%XU^*RV~nQPq4`^cQdN~?a8>%2f9$YX
z(mTa*IFPMq+)~1|!E%tvrrMrbw3i9@m$sa!BWk-kh7ZQkpEHciZ<c1Rk*zP;e&K%C
z8}wypAM~`grDzw$2n;^T$o%Fhn#1(%5wj-g296#nHKxATXMAL2L9B0Y5-G7)qnn27
z-XkSu{~mInv~Py!pl8)e&}H9kTf&Ovhvnm%Q0EA|_nR&kk9&buM%XU>xE3kmU1CJX
z%+AK!=RvI6c+HgUhj4PL;ey1pz2E#hX?G=$QJ$=aSPkOYXGC8J=sD}xknyXcgwci(
z4?eCZ!zhb<;spOVGxN!MD?WVStF^CAD8dp*?~>7fwWhgts3wsOhA7Rhu6<}=BCvT{
z=J8hUgFRh}CL(^8An_0mekT>pul5j!8qBg_*8?LG<nM2pbYWf>5$KY2!?yldywY~f
z-9UP*Ndl`T0P`ZcfB7?xTNt7?wgB@5HW}=zXN33^`?$Vj-9AM6xGG}dulMOA!@3{K
zkPae5kf1M0{>c;;T8#XO-Hmv^l8i!&-kFF{SaiR1{Bg(S)8qJ8l;lJw-Lk}$6rV6Y
z2CcNqW)N9_zFuu|#c6obx)sb~xBt`4vznTRvJ(3xrZol^QZ+T=`aADndz=mbtMpz?
z!<#sic|vpd<+nA&ih;c~4AzJ*>|8bi0}up;UWOaduErPz97dfj86UKb$<9V3_q<XI
zdm;Yg)-5ZcF`+9wm~M)HO$^hbQ?_YFh?oD`<UZVs&yw8dM?0QALYc$#>m4{})ItQ*
zA|#6z^|Dn6Pi0+7x5`vnwW}LXt*qDuy3a=$<(MA0b;9+R>x{uof<JL)xCy^g2j{-?
zSDOhy#k(OxeD(r!$c&?HVrGH}X69Vv@X8&V`rewPu6U`|%?D<Aij|4U_JkXM1ir<a
zvF0|+bKNrjU@RTE<&vByKTG`QN;wK_6Xp1dGz$z>3*2<;WeuFnG78*V#LqCyB$!zr
z4w<ejYX&KGA#qRFe6eJo#Q06<@=nA5_>ioN&H=Oali3zI>2YieuI;1wfWlmq1{kkp
zqKhvih)C(Aj_okaB1ISo?YMemcjAax0qx}Z;#`pdpD|uTh~fjkk<x~rcH?Cc&V;>q
zD@}?Y?)>}B5ys|QZSquE)a;DeJZAIIuLAL}P(p>VN9;`TxRB^Y*E1!Dt4wk0o)-(r
zq{j|FH>JLU+wFqM3EtE<jwA1EpH@YU`DCPzWl5zR9BrVaN<WF1ddyaYz1_k0OE~E(
zi>+AAhqgnCq(GTglV2f+rx(%<Pi=mduAm?C-8}abWg_Fi@I-vkYAT4Q2TAvrm7bR7
zc+c_ZTpoXgC_;Rkau%a;S^GWZ8b&TAJ~lhXIhG98Tu@lhEd^GO?{9%4@{aJDEJ1lB
zGa@tg{2<5ZagnREi~{|FUj<kNkvU}Y@rCjw<&uB=dzGS1Xi|F!4Qb2KIY0GN%TO{>
z+K{tHGDg*Rr@l_Wc^*tm84)QIFQg`!pPiIGpV=U%q{OCjsvN3-TQI1sRahr;+OQ=w
z5vE9Az#A<^BpEFqE9)XFBX6UyG;GRhp-w^{sw}P{SeTQX!<*(Mn_0-2Z#RBnUNKHH
zzBOJ_(5OgVsF7bW-c^WS7+XlJ*j%(&_;&L9q^^Z6o3Xi~>5Gw#;f<**i#pRz(=`i*
z$@y%zw-p*8Sw_XU868szrBzDXm6HZsqSa{?n><%MS8uKe++*L!&50&vCTD8pG>X+r
z)ys%Uc4<+onFLD*4B912YrUURkIhWXtyetv-IH37aqo1e<Mmr4YgBJ6bl*P>+(g(y
zIgva)-&z{JH*8@-WbS5DwU)K9UBY0mWdFjFWLYzE@I`N*X;-hQ&9&`aAkj1DE=0C+
z_F(n|<I0J=iM*xhc8-2k%Z9;=O1sja5{Wd4uRSzT*i^hZjtV{s>6<iTRAcL?L{CB@
zx>Wm2^-kgm;sX*T27bg<#e2P}W|o1ZS~N}FG(GP4U>0Fk)|)YtF*G`E|Lu=sA7LN2
zvLS7b(vJzkLI>sjVyhyBB7D^;4VSr%Ia|wMD|5@enNiC}mee!KGuqYUv&_5pyKkno
z%EWegcPe&icNn?To~Aq%3`Bdn+lJKE7#Q+N1#RCpi>v%a$P0@X#9W(PBAf$G1upKK
zt@gsRpZ|~#hF+}gtR$^B3|y9k#7O%0X~v4iPL-UM@Ju03ZCkTe%T}vdqgj{E6U{%~
zd%j@fHq0yPMt5L(cCoej=k;OH>CR#K$-DD{lPz6y7K8YZ`1MAl#kY$U4PkCRCx<7m
zPdJy4V@oq_O7<C+_9EVpKPImZ_1rPp>pGqLHMKc(AalO7vqYea*MgRZ?22QG*^NH`
z^5Tn8SFhf#aU-hz(@1!tr|v^%dPBOYsGT3$K1hC?6RZ@B6r2p?L1aT8KrTkwML9-H
z#B+uuVZ0=0;%WG3(7$3=!0Cmljmbi$PL>~m6tPWCEHWb|kwGYyo=GNJ{KAQGmPQ>%
zi};Z~2Q#g~Ue|29bTBWW*q4yd!p|Or`&j(Ecbag#dV;Mb{w_gpgCw!{sb;7oq7tGe
zqPnP5sGDVwq}3uuIo;bg2r?P733HGYo{x%~^_b0#U3K|?QRB_ADC(MyEMxDQ`H~3P
zYjtfqbXyIRkxgw$wq;5VPl;@zy+vXFq|vcEv}~wod~!J5z}A3&BXCFK9o8cAUGRWz
zjL|{F*d<11vbQK@$9LywN1IfmM*D(>n#V+8@OnOOo~ZVb+iN$EW2K|9<FL<_!nGMO
zK~&he?=A7UKTEbJM|n~er;JkveY0c~K<>n4#W8OqzEsXHKEdB?BfHNC&Oncfq9#tt
z;1prab}G0X7g0#X4aB>DgO=KvILWhMd6l$@Zt~sOGCrukKW>JnPk-`8O{=2Kct1%f
zK8kz&ymrt*rQeI2(MWsMbM>-fo&Sz<CpH1A3QIzw3vTIgBHk~Q0F*hrT6*>B4UN~$
zC;IUYYHl;J9St4$K2DcTyQPmLA+eS8r<!$k1a|SJGXsBM`&>$fHRT$99rnu6ZYwh@
zN0+$QySHwk{Mq}HI4L_RKIK>vsy_F9(0-xZNoUfev{?>Qv8;MD&tC4&HQEiD3i}i5
zq6Nc-e9N)if!2ZQAO2SPh2<qQOYi2@D^E9n{I**6Hn>x6T(I?+IV<HB=H_zg+_%g>
zD=1kss<bdeQztLpoZ)pokLLa9L1<27QN1&<pz#BJCu+~5iQnsf;86rZ0y+eJ@?|-f
zqFbDIP36=4MD;0^RIR0%@Y!K+ieKUmnKr8{nbKk|FIc#h?KIafMzOzPAIqS!;<BHn
z;-v2IMmufU9s94<=Zd8^En-cLI#V6>Usj}>$fO!1TX5~W+)Y1DMG<3wFiz?mH7HJP
zjksU9ge{?0yZxBZI;)$v?YMqtaNT`|t@5ZOr4G+?cgJb)RFXZzrgG_hom-)SrPw<C
zUGd(WTeZ%@>$>C1y&VsKkL+`{y;y!GciGdx1!50wUhXh{RPX7lS1Th6T@77~DL6cr
zqipxDcg-$Y#!My_N*21@1wBf5h>sh#11>t&7bE%E?u$Ilu8)Rw{Cuj;Lie`aDc#r(
z+z+3BWanP2Yqr`8+x0nMxL;U5&n8CY+qvn!aotns%{57-_969ByPnvySgr21Ie&75
z@T6T6N#%~#_h+!NG2DSW0!*bM%*KJIC_yI2eGf_oM@28@g^a45=r?>B81t2T4^76K
z`b`+!r8llOAFj3xU_WQWhHs$BvR9{bxhhAc(i#OI4#6fGG#mdm@%2%_F&QNObyxcs
zqU3VmaLssE?fGe4@E06F6v0NS62?+eFf`yZA`Cn%A<Sd&2^PHgV4wf}Srqma%%g|z
z!8w6uF!296MjE_B|H8owdd^?(k0L+8Ab@{iftPC<+`o^8hon9F_h-1L;5V4J3PKVR
z;9bGM-pI(>!PLf4ge}e;e1T*uuI2y(gGUK{!Ai(e9Dwns%oJ4}Ri)l@8Q54c=o#AR
z8!@<A*+R#G;c?{xAFYfW^+;T;EUg{5TzSbJj^F~Hp<gqSkvtsYXu(USDkV!IWMgkc
z@`izlfr*R{nS_Ld$KKGGOI}#?U#EkA@sgQ3I@)qEGP=09Fu1TV*w~veGIMfrGBQCJ
zArN|S1igcswWFRZy|n}RUxWO69AP5|1A8-DM>89166m;k`Zi9Eykun1iT?ik>pG2G
z&HkCm+TmZf1#XZL`U@j70~6!lV}nz9px<)Inz<TTstKD}0h@t2_}CzCcpi@bKmGF0
zjQ??_>OW^PGqJP&=c)hk)BiqI$-&58$i@oH>B#rbdj0F<|NQY^C-N{tZ~Z?^@t2(+
zz6FNnL*`-pd(rrirFh7z!8*P$6P8f~?@-SKf3Q$5diB>k^s{MG;6BO>42%Gbgz#HM
zSJ<6ogqr#G#_BFsh4&k}@0oLi1Owq2FdiY{qoxPgOTfjx<U+!DUyGBtxMH`_cn<SK
z5DDJ0Wm#YYo~h&U7o>qu>)@u#&H7C4(RcNuob?{c_4_<SWmzNRF26^1WbWfPaaB!h
zF??a*Nnl_R1z_O*{+I?2?@ns?EiL*L+5hP`7&uZ`f7F-o|Ko(e&V45YLz>AdCGwW^
ze~kxz7U*6k_`jNo6&?ffZcGse=bzdCz7T)Zl>d2M|DEPxrPBUuwEueA|M&Ywzyj;y
zsE={rWOK|Q;Cdv?+i)$(VK9yue}EDL-tTfFGwjRP!2w7go$=4d_>QqkVNq04kn?$m
zFamz>hRfekGW}#Bbox(FpXvTsbhoHDXhl&8?UVXDjtq;#ArgdUNzOvU&}>}GbDIXe
zr#uG&$6#RP@uP0?FxK2uZ}$NHmjk4>CnI8sg(Y<~IFLwA@9W)a(GQ|A*IR|9{mVZ;
z4m`%!499JM;eD~9cdaZ*%>rTnJHQFBBas+7Vv~$OL)5qEmN;gE{=+U}o6gYZ_;<Do
zdVxg9N+M*PBKTV02oXrhaU~UaCQ4{aYV?h@C;U-^o+VnA2NCePI~ses7<3W2u(B^W
zDk#ad2mC?&J5p^)fGC*4DuDtbnm`cZ3(td}BP!>$>nU^ZQ+R5J!$$dUR?v;7zw70*
ziFS*3jN%McFL}?=RnObUyD4Eq&X#<99C|2Y4LhY4Ie_&B(gR;6oU9T49Uoc0!3c^9
z9Y%71(Cjx!`MnjHHjaJClZ9qEgt5s~S;MMoqi!<Dn~B(YT5oO9W4{<NI9$#zEB_^R
zifcyg*{sFDbeRF|(Et{NRmn6vTDbtR`R-~vQAt^?qv5ER)_6Qm4)^@@1zkly1ozJM
z@3i12iYd@-ila6f0P@A~FZfE=OuUbddqLDGOwTR9`_=b*jiVcQ^)!PaGC2?lFoR$6
zx~&A@_a|7C5YP(6gqQ6%T_^N0)YIXe)oqus)bG_<{B{{&n~LK-n<lTD?AiCZ_i)(E
zj_tgu`6o|=Quv0Ch}UJI0qdp(AdO3Q%i>0jy~lv4XRa){trJ!3l=pQaM^PD?21a(^
zY0%+GD~Pf>QEc8_H{Z```rMu}?+@`^_XCfmUh8`NcP=x<f%X3)8bPTAbw2v{0vh&b
zyL0Tu5r-G6VJ7N^CtDMR=2qqYUxKiy6Q;C1=rv^(`O|!ER*3`0aE`2Dw@kKd$hU58
z?Qaj-(ASEq7a?GhqRW#pnYIS+tFcr<tH+65uW!#6Js9aqN8`<M;#;jmDdCHnuC_Xd
zXl2|P`lob!thBvOmB{gkeQwI)Ax6P!x_*yai{Hnpo4>9&8I`P03kX~CI@eM)$-dvK
z-%p^7)f}a3zByVUB$&N>QyOk}`6o6o2G<*w?_&Am0m0#|Bg32CHW#cn&ND{%OCb*b
z>?#g8=jBfyp*h~~DT%Jn_>;3PV_p0+)E$lN;iXm-#uq!lHGekW-|j2Q2fnZur)yMF
z(PEo6FD%yg_D%sV<*Hk4B_-8&?s<2){-~Z?)HuE?jF{N-c!1qcD-9w`aLeOZQa5WQ
z{)pE8qNj5(S0?qPDoSK<jko%{>o|k2NIQy2s`EuC*9QEk&4XOqic%_W(`>Ik1io~C
zMVqhaL#qxyXIUXNQK>{tE3M>T_Ca1GLF|Q{Fx@=3Z*9!Z^JlE4vUCkvv>nX@od2xg
zE7-B&afufXyF&ew??N`k+3MJRddm4tR+RL+Vj21F?!?6oe4V6(l;e5Jih+Wd%O7*r
zC=?0fz58IHTqH!rf}T5H4=ciEsk=Kbe0~*LiEUvQ@w!1#@*IRJX-S5)T!@a>>4et1
zr9a73h7=;XW76F8F*ZkJuRSfR7o2-YG==_MsrY@u)HM$xnpuIy8Amu+`q8`&!ur)G
z=r1h?Iaa>}Q$#9A(!ePm*8B>l?br6$jVAns-+U2J7V|v+JlNITLrp3=&Z49?!&G^m
zNsQRb(NXJnJMybe_24W~H?YtsyLZ2+?eA~TpIhuMdvpw<^WPkdJJIU<nQLYhM?}p3
zTsxbyVdviYE@b?!C;!!UNqvdi)n+bBo5Q2OqsDj?h#I_Rk$6x?z@thdqTBffn@K#Y
zW#^pWUxtT@8zM$itS<OzQt|tkMC&PSB~#Y^Z+`1}4wgQ;ii^dSOY$%teec_CokYW=
zVBeI$<R|*ToeNc8AQ8-_8=cLVpbPPxO&b_TNxeDL?~o~u4!pcM9#W`gR1(k5EtVRa
zyt=r%-VdE@8culDr0}A+DxKq<$H~S>8HA>&%96=t(Jhylo+~zuT#tGnD1Gv1*q!1|
zn$=8i%>T>p)>3lOFHDaRp1u!xd4)#IH*Z<L`(wnbY9B=J3=8Rhf;%|~?yQjvX3&K(
zeG8{<Yk%~+N3_+S5o7u_igDxVq$K^JAPT+VTB^_8xW`dH7G&oew!^$#mk9qw)j_s1
zE5RE}RXX}0xJOS=KVR0ObhrVdhiajaYqpGCQiow75Axr8W@@ojBIXpyNOS(dVKM0Z
zG8`;S_BDx)#n5kgO6yL;x;C40Bkg1VkoOr2%aZ4DPv11#TS3W3+`Dblr(A;k=V3o6
zC(g3Wq(eTvJBfE&iw7~|C@R(e*QaLPyxywgLGDBl8)@GOlJy4?dq2~vkIGlb{S=Z#
z)q%-<+Bv<+$?S}UW4Hn_AAg0<>nz;;d1V>yU#skgg(cAaN-tB+5W4+ONy2ZC@7u9u
z4ENOKX`5L|g<3yjqu*SV#|(cZY<kXVpLKaT<g@3Ylq8uH=QOECd-Erq_{3RGW~@}F
zdD^QZ;<%W%q~YjY1&Cu^tr9Kah-53I175%%jdPQoAwIwt463VydS@+4iH;@fWjQH2
z-^Vfi^gQgq7i|p}MnQTB^1@rX;G+bCublecG>JciPLoT9IM<WL7ZeR~%RxRMoM79;
zfJ6${eEJs#NwWlHP4A@sH+*nQ0?CN1^4;H2LX=gtE;^*wZd)Fq#|^MA&7;fPrBE88
z5<n;_uZErT-R6O3+3YY-m!)Anc0b<(SEJp{C8YeaDT=Kf6bI`bBI{U|42>tF!}`C#
z`Co1<lEpW~k$KA@BY3%k#USggffyK))63y=|0)DKKvLcAHztJj)HpNr%~4cHG6=*A
zE3D7fd&aTEK9K)A1`Uva+=SZu8y1?I9wSk7_`9ncnqGaz8|$~1q$?BNL2y<HK)tQ0
z*61}hirQ+PH7|5>OAa^ufW=H<OtBEkzG%7=lf^bwDBd~dln@@F`Guu#mNI_PZ9N%3
zrtMq?8xpw*{AXaL5}!ZChk;c5L%2gi&PhMZP>tl*f1p}Auoz)*Y&*1|sbuhT#(*rk
zZx@euh*jjx@wU9z?h7qq^}Iiw(plmGHCR)u#NGyGef%ofYeUn5pys*Y$cuIUd$UP(
zi{gZQ36fd;5PJQUQGZmmHlCAVA-S4#p}>UIFn+!|=@beVnHj6<1)3tINV~sR4EPzD
zZ@3&|WY#079Sq^Q<ho_C=|S-jUmt;E)yg)d$&&PI<yTKZ_N+RwdblD(?QmX5seyd=
za7J2>8RP7hqDIQ3z53XyO7oQxohmRyl6A%H*<2#`UJW_kQ1ivm{f*2qNVH<w>WZfz
zp#+t1`uz=A|5t(FSP*_#zjVtmFV}8ENPjkio13i68qm&Zg}c2kNaj<lkWPGA{>8RF
zUA$;`@AWLQN`~Klq~NE$+ml>=wCWP4mCEki)Y4;_mogkH@D9I%s5=|R{85W<%er2A
zw6_|m)#vpmq^#D@*)%Wgd~bW&gunjJLg+yk;!}+O!$Q3A%L-3t+-bh~!&r!SwOhUP
zG}o98cwm`rkCtRYm8j@s*skgd7Csj29hkbqbNf(;$BG@#s<gm~=SE9GvvXEW_&gF$
zFd^Ko1r|ZEs3ZMHxRXST2PlyGL7A&<$1T+z$LCcSZ<6AsuUGLScENehYT#g{E%SRh
zB%akQa&s(K=64s;znvAs^c5_(&NGCEDwRmQB^cAs>!+}f^TNkv5M}a64neYgVv_3R
ztV@rQ@!c6MfGMeUj$Zhq)sGpfx?I+Cu*qAj+0`Q^L8XcPg^2sjcYbe!mEFnwpTB#e
zXj}VM+tB$@7hP9Fm!nm*gF58+&KF#CTxHG|QWr=mzALcHk45J0U7tFygQ8he*C#x;
zvto?<!+NR@UkxbX;Oet2b@8iF&*mM5bmwdA<XmZ=C}vqaPHx%zffX<kq;*VI0}9ti
z^<!cpuF)cwo*=uGaZL7RlI{EoVSK3yo0Bo{ZD~S9hiH0Ni}u&6TTrwWXT0xye`-fJ
zKVp4N7%jB<(rH`{S8u)o<MD6!zq2B191I?TbLB&QG#ms!nxks^eBg9agHbc;Co*Q|
zoh<uVpbbq5h`<T-)$^lY2-#>(n|Rn~46z+RW(eF=OD9btle6+d^ofwT{>WFUJgoSs
zq{f^(LG^9xPW^sUevdiq(B18IARcn6=TT6ZU?>zkm`{71&2+t@UmtZAj_COC12XX;
zxj<pDShd(xSDJg5HMa6{z{cCay7o`Z#%!|cTk@o>k!qVITk1G`ZP)ZQ>(?|<MaFmQ
zK6hP|*gu>`M@8BHRFA%+4N%bFyV}Tn)xcdQ7*&=LRR8-QP(mmTK*`GI9%#h^aNMwC
zaI*7kzGQ%g(}dCha3ZtHIT8AEQ2r*OvCj{`@H!b$sy1}mtjR}Dn6+&~W9Qg!6}xST
zMT}V65reA$&eoA5o)u8|C)YjSdr-@DB_OA8+p323X0?mh#}ZG|_UEug@3+^+9k?n*
zujMZeEs`~8<4hNx22|laTN@TaFUF;>E2Vd){Ji{6@@9c1@2EF!Z=uPX8Ls@@0X^~&
zj*RkitY^l+G0ER%#zaweBGVS>9^G_LP5Bgiq|?hIKuavH?IQIalSLKIt}g^pZaNln
zrqyUmTcj+uouOb2=BCZq%IZD#8>?#pHZkzlY77_YhC`vl5K-|me4Mc$SGR2#o&H$t
zwpeTTB!bC{{!XoLx4e&G4GL2l-A@Z}zg_>c%bVZ{d7J`%LpM1Tk(dqygA$p;f%oxs
zXV9eZ_sQpeljav!+ohR=$?jYEehVL1kyis_n{ml9zE5V5OxrZyLXKxaJ^BIRr7H<C
zzxU0B?wAz2l+@f1^{!`O@egFtFV8Tum3D<KM5kXf)gG5AKl9t#Ee%`AwV{6pM{8&}
zITk%7&5pG5l`Q{~2A!L&Q|1lVR2@ZKtfonO_V!n0L*&Z6!1}YW-s`nQo3EoMGCO2h
zKP9M>zr{Bl5rOiiWA=qNVbVXPt0-0rS=-m02Of|s*;mUKqKVMTuw!<Td&<a~ovzdD
z-HD8~f|p^&ZBvQOF10W$O`_f==%f6dIU%KdT0bP<h2uCEPQ@UXMTxl&fCy{(xhatS
zhGXftl3(<-zav*t>C_w-iKw@cXwUrd=*rzkSP7$LG+KKJDQiW{M;a8pX`jR+VN*R*
z`?TQ}-qGLr+~$S)Dvngx!9PaV2BCFJB+~J3XZbt+W0-OPDEEn+>e1hb2f8wU73Nza
z!0ELqBoO}I@Bg6LPuKv>a_B?;!J7Y_?!U(T|C6hjQo^NroyEItqzCpR5PM?aSp-qW
zFwI%lTcwjV-QQiWedXR`A{YD0+URlq2Fgp#*!R&(HGAJ8XOC_9DGh<VKK(g<9z-#V
zNi~yX!HzpnCJ*#6He0^*xv@J2z`%bVnNmRaVYlQx_j5nlE@6tImU+Xz=$Z*|iAjH!
zL{vXW{8n!OKtD*{X#IAX=Q5Qim)Y;K<b|(n0BE<N)0F$Ur8{G)_i?fnTe}?Ad1_{G
z&`p*{4z?v%D@A?^x$SzgO9BYLQ%R1a62<@>4AFH1*2q+vW5s`?N%%9AdKhskjsg&{
z%d&Dr)c99>P+6M85N|cem6d94f1(ZUid{*cDCA1Bfodlw-Y7;55>oT?$#egY9Qe!M
z6r`EEUO0TI@8@~)R(G%I+S&n76N$iLOjv?AE5Hg&Ac*&V=|g+C(O1%GBe(A5!ch1!
zQ0jeJnC0Ti>%61NqTAv-?!mjuY1E73FfJz+kZ}XJvt+d-v$33bP?EDd>*zNuhEyf-
zeMKeaYfxr7283js&do1;wl>Va!VLs*nwW?K@mosEs3arNv=|SRzAK#k({#OuQnZ2b
zNy(MgP!Vl^=?0+nN3mhQ7yzGBRynEZFd(3(J~LBlC$|6=e5OF8DnkTE`Ls$<bmCxd
z?RbcvI3%}bZwQ%y;hET9i}lFQ_+;Cg86=alJQ)|SYmje<o6tdVu+kC?<9!gr)x7&-
zE~8w<Lr24+Rlv#qbZi8fpmwVqnzw&lnq90bcXvm>^m_=WB+0%K=biX)f5?{Gk!~+a
z{;&$nIF3oR0ltrPHAcfa=m|BHl86g^Vu|p=R^YcyZqj&fli0q7!c1O~bFkDvd4C);
zIdZB>_jjEJI160tr{>i&#);Fq7^In0_7BeRN;r++wvIOZ{^fYs5tOyP099M-9M+S9
zphpa$DLmSaBO)W*1Ja2_YB<~htkAKZaXFekGa2GNM?K*?xd)u+{VQIJT6q?qhXH*B
z-oWO>OtUkWG{86AU0EDYYS_d>X{(}kP{&$7<vkfz4Y)erw*g+961Rk>VX61hb=l9B
zp)@DPwD;raB}k2}w#cQfO>+QXwVkr<;8U}B-?*{>NAj<(`qU0k5T0~8lmzSNJ)d6{
zvxs+yA2;9I8|2=95wrOLg@Nh0b&Z}P--OAGQEYMC)k@BXntaNywY$o#HIQa4`yR?n
zN5_mRW*nQGcw>l3lAWk&FRf2b%xAc#&;ra0k+nkS9&qVWOHE#xn;+Z*w;EOP0M6cz
zM`3JW@}$o1SHI7YN#&C}#0RxhFdNg|VVIBqw$SVFMD>iSw>O*F34>hsH^*X|s?1%g
zAz~GPib({>n&oKfT4oiwW5QiQT?78FYJY(wyx7hb0j^l8g*~_G{&7a>iyj9xi)l7e
z{b&Nftvy>*Gc;8fhos;N>)RPZ3YoyZ*6G*5Z@%5NY4boH3*G~9qcl&ays0iG``u`*
zZFzUM{#|VI4J8469pCNASY|_6_vi1naL|fK13qD)RL5htN=}E*a{7A<^8HWR(grL~
zz}GJnh_R6M1&=oN6%fhro{p>9?aZpk@u(S2pQdJ5@DtHSw*Si3;sFE7%>x#Mksi_e
zAtlTD!YD{0w~(1yV1CzO%XsU@pC-<I4*;P-z<B_w6q}ub!cEO&9eHpjY+}6$Jt~=x
z3&gNHe8ZdePW62KalLyM(XI-aO7_M^t3<ae08kn3Ts}lb+DU&fAm!c5=+MjFhF&(5
zFqgY;2K1Tex;w&#K}%t2)9h(4E63Vf1R@;T()+ukp<ThWvPg;mREwUtgh9}uzzm6X
zwrj@}6SnILeIAPk&;M7D5%A5YJVX0L%LuzmrjG#MsgC6zJ#E)T6xxh+2>`q$VCmV&
zqvV!q^C_JVaHVs<KWg1L1M7z^U8`yk%9f$U3nkaOg~z>vARv<2;%HN!b=8&Wcnb1M
z6%Kz3R(1fy6LT2fRbH8eyT8a7%{l+|%ut5@Ab{JVPG?ejPGeH^Wsg*j0icS9GT2`-
zU%$~OZGv_(zDXbX=Tl0MJ<ykV=zhj$+s${Ur2|?n3rbbXrOkKl6X$Oyy3rc)=@oT6
z4@C?_TpU2u4&=;dxVyGa55Pmt%{QN4PU5fn4eiM|hlZM(hvoz(c^>r`M=L2ahcMLT
zg+#iK*9d4-YHUZ_76exfilYTmKR{6&5?O&0l2`ERF}@Xf-4)ZhG%IwF9iQk8l%UaW
z_5Qv=w6Co_S#LH04658{qT^AYz}3Q$K0P3)bx^bZ&77^|Ne2;zg*tW`Gkn5CDYeV$
zFoSji*-Ep_?%D6qe8XgU`nu^{n~0S~AFf{gG4JJWtpe&)Pl$VLPAI88wXnd$0)0kg
zs-h75DG`q}F4SEEOwEEhUCKE^(!Q(u3t`fo&^mnex_Z{F@kxVQzp2&&CgcUu1Ld+M
zlcsiC*F5My`Ek_x#o(p>r~UoC>fW-Kf}foKjCO^R4HA<B5-eR@u~iV76a=1tG>FRN
z8XF?O>yL_rIjhdlx49kFYT6Rxc!iwpVP^53qV4Rr6PSpwou-)QDJeNRWh*p+ZV>tz
zpKfDaC+nupo6qYYj!SoKsEZ_E6oRRqZ6w>cF!m_vTS*kU<oqfI{hQif7>Ey5t~sb{
z^9DX+P?9VI=2V)TrVelEX89w!ewD3G7zNMtW%<bR`$wUmUZ=&I)Vk17tlY?Ua89gx
z)r5C!ZnTc>T6N_<I#aAf`^3b0+5KWAFmSDMPrZLP;du`QO$jE1CVcsU@Un(le{T0(
zrb{wj<H-W(p%E`sY`?m!R^OV(hw$B9tOl1$Bpu{_xloX-%&Tf{n*rgD_;*>!GiH6^
zcB-D@44A)qO9IxQ?N=*@`8^t`dt8D4yHf)oe;%RnLHRlX+QNE9QWb4CBU5CL1*fUU
zgLSP|&F=e+=c13$vNK(SOLs++$KE*Xi8YM!fN=P6r7bYfjqCn!wX1%a-fcXsFWRYM
z;0?-4_l<Pj-`x>EK{_C?teQ|B;VtIlcix#^1`UueH<cDa!s)ADxfzZW)Ybpo0_=y<
z1X5Jq;5(K2dedDDtIe&W@k!?+$?{M#duW)0feS!nE!3IX)7tYY;9FpLM#OV-(DrjU
zUC4mjXCRI_Q}!~E;36l{I(Sj+iX**>7G$NS<!OU3v^Vp%Z3iixm<1m@G4HIY)rBms
zPB#4(llPLTUb>em&3%}cA>W^sA21Zuh6cahcL2c@(Tmj@z<XLCvA2l7JbiTKk1hTA
z2}#{PAnC{`+P2+6ef9Ovs!eJPYAL;H-?Fln)5Lh))#-@$dAai8ErntCrB2?>b*!en
zgzLS|{gnz`3UMd@l38wY>%k3_w1a_LARpfFUWncUX|ZIsS@Oo(=p0<JGJW%+T=|<M
zRx^1Kj_PMMMQ?6h;<<-ry(hgm^wcG2`3b(E!C}0DR(g;hghnMKBo;711b0(p>@~ya
zM<CRY4f80eA#-e_EyPy&I@Ir!OIq4y{QOeH;hD?&l=FA^^b~b=CZmFpolWDZGKb96
z;bB;LF9vySlr-1tP9eeYd9ZP@)GBLg2=gSC!>>U<fo05PWo-IW#N}U*4jJ3jv3U<)
z+`=di-Q~8H+k94-c7OKIR`?5qnxjH#kcL#!I*N?EHZ-tlzT`}H_XIPm-8)TbvBz;a
z74(rg^8gZ0P`O<DCzNyW6}Uw_k(dARvGZ*jJ)&u;>+X=XiWEC&1u%iOWl=m}vA49@
z{~QCC5eKVZ+Ef=0*s-k?u}th2EXH5TsCq=N=NLg7*El~r1_B6B$;=8$uZM7BiUHls
zlffge80+u8ZBCWyT$Cg`401kKEWDz&$nAMXs~QLGHP)^r^i7{K^k;-Hn7f~DDc$#5
z1{2b<((Hc1i4%rW4&Qu17YB(ji0mP{{DXp_DE8nU5GV%}(BSdEK&>nx*kajcG|0#g
ztNlNJ7JM@FKganCKWCEyRyQFhBkg}4{Sd^#G^qcb=5K2CA8Y?VH(J3N<Hl!9@_rCP
zGK06cO1W43vHQbGh3p@z)PXX?uH-v14*c7_@9(PBYuRT_$yscdTbOv0w@YcI-5jGQ
z48nM0xOJFMkMot4Cq&HFouIrGq2lJmN2Q7PHAv8ODS|j$x^9Emm?--71%YVZ*`SnU
zncRMhdzjGtg}`QS?RF*C%0pfyN17%!<3K!l1$%8DBR^csf1NR;g~0v!^*5D0f2$TU
zr`Phv(Qp4SM0(>XWJgWqe^J|{xK;W=#z4?K>m;Wv()(5w>h!!KzTUk2ZjA`)mqe<+
zd9$5M&juwIX^$2MWxj<!#5O4bokTl~4NN!@o!>q9^*#&Tu)>%eB2Uz4j(ez6lepp}
zo~q+amfE-sMwzJ;>O^=xe{Gl*%^CVunV@`@vUpF;Nt*WK&1i1~p0ZnZfReWvLvg>_
zX&;S)dYziySZ|V2a}gwWg0ZmQ>{Q`~HCAb@^1GUmbYbeR+-S~1Z!@F80-XqmNq`is
zgYs>8Z;~=NoV~ysdWa7B97VCWNNQ~*#AKpWJTa?E0kgT%KKu4QTxaq)21v6jBBPzv
z3Dvx592VcamR~EnN%F~5HL7wUEG#nVjAnLwFZt-96ik+eR)SW37JPo{3V1uqIkM@j
zkKdZD*-0EPsN3!MW-5noG}-JCy17#~Ar<X<_5*(1;4A-~s{?>8?&<G2MB141*lD?^
zAq=~!Ij`|WRB_E787Wz~%S0-byP7E|D3(^9^(Gu;z7emZTYSgfBW3iyP)uF*Pn5E8
zRp8P4I_Mn$6NDD%D94X&L5*+~^fm&L8;~vaOGe&hj-v23n8%@bl<L%VZS>9t+-~=M
zlxrbws;KOww-<ooYk8SpA%GGvK=r%#;VxXz^fzqYxvBUnqPofHqM%wn>ToAAI>Dk~
zF`LucZFEbFYIB4RwuawRD$pML8M>sPG(-rqZKP#LGOX~YRIG_GEw8ESMbVd@z5+bS
zy@yWiW4M#;%j;7*Od*5Ec9t$(*QE;@aaSNFFMuF4Lge1Qi{<y5q$CRdOGeA6YPQ-7
z9I4p9*uh~I_>HM!yQwf&HTsPW&0NH1B~5B#YC)!SX(mVy-mCzEqN91=xozWufHc_?
z{{BMtPX-+@C0x{d&=~p(1@`LnhW<|x#1)}49-wAx;}&07(IKn{oJI}$sT=LeHW6F~
zkoaa&-RM+Y(uXkY2XpIwr~E;QvJgrS0D?b}A0V{P1ktVspf|+@jnW9L5GDVABC9DQ
z9Cfmze2s2RGl#jyMEEQ;fOZ2LMGSo6{?8a|DgifEtklVKJotQWvxpMqT{;w|vqFb_
zlom+=UnePGTsipd&Cnjz!a!-!VTZUhN~_r&YmwlKhisl!2{j3OvMQ%`QFKJ=b8x|a
zXV6yV%UyB$S%2Rr6zbFcoOnV`6f|KH%Id6mq?wV}d=DE^5bRhVNr{^9&-*X-1s+<R
z!><AL%9Wj+Z(@cBWHGcfw1T4k8NVnvmyJZOk%b>hxL3GP8x917M?9Fv?+hy1N!fi@
z{O&yrDY?!n^YTDfknDfV@RNq0RuEW!7pj!vor_kFjsths)M<bb8;1RGyWaw!29Eil
z<gM%vj5rGwFL?Z*t)gKD#fE2S7YPF~wIDQU|7-oC4XXe#77sKOR6cOfGr*&xAm>rq
zw3Ore%vR<br?{=rYm_CbYlG8kqCRp=c6WD$K3tP}^utsV<%ArzX%WN^>z)hbsvF;8
z_Ey}@sh{3Ieh&W@JvPs&;ymDs+wR?gLe6`$h1N6Oo)ex+?gkRW6J3LFHZWtf?^0>S
zXNNZhhRuUv4ys1j%TIw)Y7=U}TO?EYTTIB&$1{WmM1q<5$VcEpES2Yhv$)5A)5r}C
zt3}Fn#JYWz-oWkw*GPu;YsON2sV0a?$@xuvT0(%M&~q+}baxwf5P@)Rf^>1qB`E5w
zL2>QmkNW@WWMx-?pc;VkeDad?MnD-C%WsbmFz)NJF`O<J%C<}dF%)+kfVLL>l)(m#
zD5cbF=5sqTJO&m1b8uVMU!ze-nRUnr{+BJu3~?-cW%w@J=_-c>SJHlirYbF7{P2VG
z56c5H1+#U1MCUVof<|cgy^^*w^x-G2Ah7n-RnF*>xTKlSJpI4uHy}R9!J@&=#<3v!
z=L>A;R@W~)7f8m01iuyQe$^TBFoJqK7@=(P?cZN;ML>uA;f_8|0ETq@XN3QoLkdhG
z=9Sh03U)xlrqKu}upU*kV@bwMC~FK9RWG_5M~L8Us%NwpfL#ZKcLfN7qDnlt>{SQU
zC|=6CKY=F4)B!lB)nZGrJTI7>-Y;D43w}D-r~2?Y@8k8qa;`mtDgd)YlRWlnfkaNz
z04lu6UF@oO2Hao20RwV$oWP_fSkp#;lDTFgDh~XC7m>uUQjGhDC;ds4fb_?8KyigJ
zB9Mme_{^UbBL7s0v6O~-TQK(UkC;U3x_qcy(_9n^a|VD!LWSi8Yyr0Iz5sr(_d<j?
zf8DOi%D2I;bCK=?Gux*?(}V>kmNwtT%i9Ko>gQT}l*&gUpzh&{Y60|){-lnN*YS)=
z>K{0gzAxuXK1&;)f#OU(448Sl3W@Rq419u6TCx8Z#-@u_sH|WmP4WUy%TcbP?I|~F
zpdpccJcnI|+wTam4A`8tSEY?-vrghOaivXHz4JiJU<{Dk8qgrT0SX}2M5`b1YM^Pk
z9qGL&3=9!T_5$jNS6I-d+W=587y*4%m1?UbV`C*$-(sC3m1w*NNbqT1tt7x|uYD%}
zns?9z7Gc8;E))y$frJTP%Q4~-5XJ9P`MmmHdhCoGM;8krQS>hY!e+&_#$|sIs%m0}
zDtn-%JCQs!5-@l?{Or4Kjz8ZN`hg!mKO9rZh0+Xl)~8WuZ>C$Iono4ttxV4T1t%Tv
z%j_9wdmi-2H6zA>gu>VvXpWHYMcqKhGnj4@nuzkGm!XLkN)^%#gDSA?ydNkqv916)
z?!yk15g6!!eSyP(NO=4k=zUC)pcfCAWp0-3qKZ#WnxF+6+G$gjl!IQF3fO+9en2K_
z(YlW9Hv=wh#Zk+P<0P4GP&DkNl`??SONl@snAU+*86AFD-F!=539;?(qcRk)<k+no
z(>#=F^#1}Dp-&SSTCdKXU;uOgI``X8u{5E~rss3F4}YsNNs?O>WO#9<Umxy2C4jV>
z`n6<@#j4PsY|evdVAJ`9V3mOr+#kJYOYu+nAvTdZGQ)Y`a8L;m@}_AqBX#%>IED$6
z09<7zgRZY=A(`G%+-$XG&LEWMTz2u_jmR5WKeqk`3s?Leco3@JLlXlS(`2vle9?AT
znJNS|T8F6F|4FSoA_0&Tm256G`^2U6ueXbA>bNX8iNXcW|3o0R{Dk%#FXdQf2wT_b
z+S4k{FVVTp?knybQP35!k$#fnokK!HES|PrL5D=n$ij9dM6r*b-g6Nmlg!07-=vsv
z02&zhCDdg=tU!3%qY1hY##{DIhH~d1R6alb^G;{~bCcKgr_GY`UK${Fal@J#j{tgG
zGGv-`o%_I1Z_^FlGN3CE({my_J~F@2_p9|4z()cVIl>gG@@$OS{kc(m`fJmwBs>JZ
zW#31wx>$&;l-Wv2IVzx)I;dy&mG8>bVJkm30q8oOCtDzgCgmhq0{VZ2^B!aiN)lZJ
z8>{uA@N_vBT?BK#WEA{)&<y$sR87z4fq2P}2vNDXl!)R9Ak;N&Z%?NXeD01@KLTE#
z^}EG8#ZUu;GC!|$_{l9&j_aL@j{w_peOC~Mo4ZsAJ>Setec~+jg(=mdwDC>93Z|m-
ztr}^jxhJ>o)19RC0=*&vyKgC4`Fwbj2`GW*MG{&EKw{OtXE)*iuX(*~+FWv?`Bt50
z9SCUX+<$cRA`{SS|FXxGxm^YvC~lt5L8^oc!#z}H^4$VE09RoN8U~(Bva+)^{MuXu
zVv-pq3{Vq0d|&zW+<2`S#4Ci!9nRh*?@$!)>$#;Tlk)T|Ul++`_-`F?x{_5#c!pVO
z09-fhbaI;X$jVdj@dEub<0F^dw4?osAwE5h)6KIkAra*>6hje$T`mILm8354y$0rD
z!k7BF7o@{Xa98|)QmlwJrz+&wBcm5aUO_oyeIMuIs-4zpZe>ffCD@W%)~9D<!>&Ai
z`HVj-<{zJBs;7D10R0KtiFA)Wki7?I7k?*o0Crsdfbs#f3P{Dz)SR_y4rb(iY##r5
zWkzc(d|R~W#sM}V5Bz(#;DqKZ>2BL(vm;j^|H`H^E@p?TzcAXN-1?=y9UwsDF=~df
z*-8;M`JO&}I^2;EM&p#7E#=7oL$bNxr#=;as*{Xvdq6RH#HA>W(tgnNzWgJn?YyYl
z;l<<r$=AY%2oq91mlpA{W=E5I_3KrhIse|R$vDGP0zpo|nq}U#H>dKb)P>sGG#rYS
z+$g=|?x3gp;2&prIa)Fib|rnAy`+-cA#pXs=%<9R#^8k{ghds3c&t6=6s9RZatP$K
zVcEGZ3T)?q+HP;9wXulNqDMX_k;RAB^~`UAqxm|sPua1mE0U_r#Y3ascd5T^j|@LB
z&sECpoTzVY)VUESY@;3{;`4M3_*{H=m}t{PAmX&oTA%FrdtG`yu1;>BIu;%DzS5fD
zx#YWL5lp2_3@C^}M<@6z^Nif=r6V)0%A@+C(r5<jS#&usBZu=sq6&Z1{9oP;bb6w=
zjylxY!qmxH<Mi*ScS8&yJW~Ut%hgu1$74zImEH`YgzZ({1J89@eeSQ9<d&BC!_3y5
zxre?BNV7_ztX>xbIVM_ui_YCqtfrB2>Tc6XtzWTDQ>8=wXGcY2we9SHp4Hfi-1>GS
zO1XgyN|~2e1@VsVE|+$v6BP-P<?pdu3p>_d!q>fn;^A7+vhpz@{o8<T?gAtN1R_US
z^B4G2L|!2rp+={Cw&&$vooO6VdqICxmF26X1+)RS+PgJ<*(1-nx#RwJ<W_xhyr%|A
z-Sdj2?FOL6W6dpa!NnI}eq`YeWXxc|X444|=b{@^zgz-M+A>C)J6~m_4UvpeEoW^l
zQvznUqu;|6gYn<<S8o8Fmf2vazr&?~mVqmypt8f$PI5_>_huuyHhXj=mMtF5g!6nt
zR_A4uw6RYKUAgGbswc2hR9s}BR+Y$nVmHI-JejN>W981rmgXou_WQ<!eOHrz6xuSt
z&n^)=ZM^!K98<dQ0)$dxm$O9N=AWh<(X#2OEfDyNf7foAOpz6s4;+E35WBplF2~BJ
zD$28VFEvy{t1F)lNV+xr$SU`4l6GPqN-Y;suT4&v+Lfk2vCWtz4z1en84<>|{Fmiv
zwqBBKq(Z;GcMe?dH~ZjdPvT%HFQi`aEtN|uU~0``X+Q(0z-?^Nm))455#G=twwEr_
zf5tp029Sj(@q&Foq2<3y&0Z{{Afc&kCRn0n%3oJ{Vh89zPQ=Qpa8gY2+NKUb<q|q8
zH<|59u`l2_=^n)fCEUldRvdHdvsLulW<%zz-V321-ohLx8T+{RSl933{W&f5xh?$l
z8ql0V8Q0+15fMinTw1I{9(AeiJhrY;TKBG$y4d2c01bq_{mYt*L<2;Vo(A{v<Fz;Q
zVIHZ$lPK@JZ&h5YqW9R^HUqCq>i5*;jM2((-A2H2d=Q_MadHK*+`t0CY&i)v*Fr5r
zobKGsl!WeQJASv;JRr0OjfjX&rS0~q@TJ9q@ZYUR$?yfWZV6A_g~8xc574O}E_e|s
z<)}Y8HB>Wi4ABo*g@aol5ok^!){dNL@q2VEHb^jff1xbZZQFA>mNPcFZMQOxaM@bC
z`1-C!V;vMkM&m9}9+Sb`v64GW_<W+&jHrPwFg!x!i4WqpL;(#Hf-B5aN9&hKP3D{~
z(|Um-$?5#deO$4p{z>mAG~Lzo{_LX=Scfx~kq?;dRY?}U{3uqPmb)-kPsW}9hoRJb
z+fq?@=R=@&O*z@+Sn3_l%ldHPE@3;Xj8y<lCYVnd)JJ}kNDtwc+`*4FZAjWmIe;d(
z-gs`!x@rg&``l(<O5YNZQAF$-g+M*i-o=p59SfdfPN$gy-{_jnH7474XR|tk1gDSu
z7cMIlRHkn?=%`6G^czu1Gbioe)n!ctEh0@M3FGohB}?Z0bPDsmqP{Qn$gi^5F>Edg
z<F9dqlTbFof>a)Om#5}CC!f8z5d1_ZUclL`6GbUDda2M9bq*EEs>i|zezHpZ`4Its
zOLoB>t0$jC-oJ_}0Gn1bcL&`6a=a$bATQa~=-RIdq3(l$n@H{?s8~Wy`|4;`oVpKA
zg^|c^2HsN|lyWxCEl-=HUbJtD!Idz>8|JU^$C_DCb$|GYp6MVJkav38j7b^eBwS7h
zl~vG|kIru!)>L|{<c###UM0MeY*6<xJG}W|>YJQ|MP5c8){&gz#_v`2c9fHOE+Fc=
z+(3`~aDY=+mWX)O#k?f-vg+37Y%MPGORDQr(HoAT!A4D&gr5a+cm!r%mG?drZYfkP
zSAwEduO~~CwD=QaVxDK_c%3*LLQB44u{3%gxw_f~H+o3rMASAbo9{SH*TfwK;)Ss}
z7Io#KKmWluf+Wc?e2C@@h5Lr2$>=5ReJ)yF)QW}HwKD$I)_T(9t}KC~=n_}QLh1QP
zBgKm-_uM7vO-|8=AY^He0bFgMpx%X*sMUwTX^(E~+mE8LNd3U8*x&AG3-n?U7k4Lo
z(JHMUn?KLh*saoRWx0G>3ah{I#9pTe%$WN1i@Uh(+xt}@&prESn6k}V1s?GT_z*Fv
z@y<b-$Hi^Doa}HthgO<p5#8*MH9LQoj+p<Kok^*)ZA1dzz5;GRQG4@hb28XADQ4%^
z4|g)+bR}>0r043Y^1@d%v@Vpp@0`b=kFvb6g+h9IF=zqnbs6<)g-Z-=xdc26auAaC
z<hy#_fSRPKtOrmM1bkGE-@kG=Wo^DPufP3<M-_P*Fj(aq{$vYsu0&n*bQbMBtFAJ2
zQ5B>Fl;@_o7%n;}xs5kReR`|c89_&iT{IO70FOb$y*bA+?9@s5t#pgu7CsTW>Ww=q
zx0Oi{#&5O;_{}vgl^;Rf$h=ahyIW?OmmO+N2g#?dph{44Iv?zgAkVxT<V{tdS*&GE
z7y8(r;@H#?Pg^mkC!&mDEk^Ak(?tjSGpNaXx+KLSIC%dlK)}y58hy6@oa~9mDD3B(
zGjIk_Sy{XP<wDhu1VtY_$jSf9TKVXOvVjr3cRs%lJB``}mrtnESZB^%f&<}_IjLJv
zmHMlt(^|sBAs{LFk(S%JyH&yVGDa!R8SmR^93O-E{GJVM@p%NiN>I(-eRSuK63Z)7
zcA%j|w%xkdat#eRU6Hl0yU@2$ZwU530Qgk>lYj;iL669QW?=SL^Y+h}atLcTP7Qxz
zG+H|ks>x3GbCc^uxNegrMq4kv?^J4dw#_f>lo*#bA1|X9jVbG%wJiZ5QofVQODxGz
zZBg$V_at(|-?<yryn>={s(<_9laePYx$KK$oTf@9*H^u~SqRgNtGw)Apcfjsz)IJ0
zn;)@KO0MJ@_ClI8W3URvh}Bm4%Dvy<K$rO2BP*rVF9(&XZzD&v6Zx<iU;Q}+fwsep
z-2(Np1K@eq;&-M1WmHih)?Da4?4w+mY+)Ji2q2F2zy)EmH)zSPJf*r5t1@?!s0@!X
z60Ne`OBt1ND6g^Aixji^84M73uL#uToNNaP+ni*t>L^y;I(SB%XB}N>z?z@B-+p(o
zixN}QHkG7aH|!|`ZC4e473yTxinD5uXxs1~*uF;&!Bu@4#*OMQtedQ1Lje)9T75l`
zj_Q6{O(}<wzAOUJujV&G`#n$Q2WnmSyU#?=<cn^ap9&9G(paFFOs&_f0*s+JQo6OR
zT;cpGU)k-nqG`DhBFP$_olM)zZA{-(baGqj%FuA7QyeiK-3%4QDqG%Lq>b_eSgU_t
z@3GRm^HohjmQ_oB|2fEFjbXN@q)_wZ4JvwF5tt-l@`apU*lF`t+b<QLU&~Ck^^|4Y
z@6<Vj7T<0L@QJ9sqP}M9Q1B^w9CW*zIw5ndP@r|Sm&Z~5+1WyOLr9zcP{?i&fTCvc
zk=mm%{fQozkIm7pIC0pZ*!+WZB9CMh=y!}b&FR_cJlyFW#DYR!8Lnno*6|()cck_g
z8yjWsUCg;v4c5;HEJ|K{sv`0<p1uz;`m!4ynw)D@#90At8}@@|Vhw;CX&<47r~0F`
zfsSrxFpDvG^s*N<wt?BW|JgA>sLyCC<A`rJ#Cqn?>Y+mV(9lmPS}-uLyrKUWKvi(i
z=z3X*Y*vcu3ccsqjxW3A`nZ6rU|JA-(PxVyGdnsl?q{sdA&qfMfGDxvL0^v3k<q%D
z^0|!#eaW(&U6KV=SgND3Q?N{gcRnFJjo7qj34C5xu6&)Ug<K9m;T@;l=-zQtZ-2N-
zd_S#<$s24gf&~=`{&o-C&@vL)`hMeVYds0bl~+VAk?)s+-*oP6{jwGl>!{}L7oOrO
z%Lr2g$`yTYO~+8*`=a5}DP!<tB03@W!eo+dD^jQT>rx^=40@54l0?HIUA-MH3FDI5
zHON_8x*AY_v0T3oP2H{hYP-XG+|LrI$99%t({u@0b<Az;-4DRm9`4dK;C~hmbY;cd
zJ(%F(ppDmXSP)wbQ&w0yNHC_e#rK0hCC}Ie*}wWWWz3u_PVnz8kGzw%K%by-u;0>F
zzg(Do=dBK~t&Hhf5}vk#o#7=Zi`4+_6j_Ek)3}u6$?Ao}u-4Tq*5tkUX5iH2x7P}-
zm#Yj7U)4hFbJRvncmEIW-a4wPEs7Tg1f>xr1VxY#kWwV15u{rh1w<MIB^4w@0hMl~
zk?w936hylFP=eB-(#@Njt2f{HzBk4j<Bjp&ANSsKIs5Fr_S$Q$J=grjWSGr4O`2Ew
zEj`t)Er;su?giJH^~KVt7F(INoFEl}tCx<sbcu90_ks;3!YV$W{Xm<1&Jf!6lh)o2
zo3-%MnMv}plF=n1b3^iwHe;C}vRY!d-8x=i9k!(`_XXjyc$R$7NW3=CC$y|#=33Nu
zTaH3Iy-7@Lz^TYc+HT+$YDnU?VL4C+Z4Jq-0zjLlZOXJPL8kp_C@#$}&cw_}efCaR
zMZxY==2Mx+uwkW~j<GLlxdW=wwz=+PmK=+|2uB9@n%kG(I((({^}GM0;QuonI(7lW
z@g;%b;Fe6geD8HG|F`|4z`Q^0gV$_3|FTxzcNUc2-*SB{6{dSh@LrRQ**KEA89pxb
zVR*~z?Rc|ZS^3o%)04N%mmLqCBv_-KZ~*upg`vjzJZ{9AODH26EzGap!3vU~r4&1t
zPIJ2aBBYC6@dzjpf7_$L_@OW4ep8grZODtph<{)cv(&#_$b8<>%4>!s5wFq=?}L$(
zgB2SgZt5m*EvI!bIoLRM%xwe}zDnCU=BD^G%og>?>{@p7yJt!3?4odaDL9sOdR22s
z<#f($JA^*#jLBpuHQ+2w;h`fJJ2KsHxkk`PJPC3Mdt4Y$555Ol3Zk<O7;haClHcf0
zlGVu|iB!0UDls=1u_`C1?JMfi=3!jnihQOU^RZ3jXunW?8Fy7uU&%DuJnfBp_T3@s
zTPD72Wv@tQf3Z{L+c-MC$asWc1p4bv3M>71bj?INdD!7f8&EE+P2wB|+MdYX@wJL~
z+npy>73!fizuA9+NbsusQ&3ha;hkA~3b`LYvR(O@h$QoCQ#KGvUyGxc%ylDoCX5Zm
zB0Q){4tDF~Dg}`y=iQ}VT#<Q?9|Sl<Y$5f`W*SXjDyO25WUUY6W2LwscWQ1TYMt4q
zTP`w8=t%Z7kGSB@!<UOHW{p>J@4SsuC@But82%2q&Wfp;0H?HEdbxY$c?6HIIf1Ij
z|8S{n%78z5^{rsZYVE+4Zhe(4$b5JS#A+>`xo+N*Qox}H%9T0?;jGuJ5^I+P0Rs_)
zC*5wSpa`2POGHJUyJ@!91Nmy#v#K|b2@=q{=v#nz$RswOgybe49M3jLzpP8+>9=XE
zZ}A6_5q`Y}S#GXCZa#p1auITWDJpqc?nO}drh+`yu_OaBsb*);aVQ`eB4BC3{hD_e
zBClM;Ey4knL?_E>g;KlTDY)i%OSf95`5Wz{=Ec0Uk$|5rJUXWXBqmXX;hATly)wu)
zvvd;xdB4NhuT*o=j{s8Q_k!+AFX7r?CGdc?Qo^|ZgC~_9?S;a#1Yh>!(6kDLykFuW
zcaus7Td3ORUufZf;jmY&|Lbs=s*+a^SwecebCwf~fVL9z-UU5;D{*HKTEyIZak5{?
z>I^i^*&8q{x0QmHPM;v?rKbQ}nsqv+YA_)9V*bx_pQgX|oqa2Ov~O1oI-w|_n(%P#
zAwnNP;As)GzxV{$!whkverG48#*Q}e8R$eMcm&JCd3{-*0GL_jzWAcTCDxg?5224k
z;;z)&^gSen-#|B4PwoW4AnLk+BzyQ6wGj+;21NhO321*{rLoX$&F2dqG?)&CpxC^F
zRAPd~=ug?OZbk*(xh@9T$;T$YvcFbZj&S~WPCyoI2L|pyfuQs`XbL+5FTM-SIZ1fo
zTn+hJx+vfsXc*IQZz?OBEDH0b)t-7)9auNNDDqv#-WK}|`+ZBmQ9`gkree=I-5(4Y
za3uql@S<|lHc00dbebSP-oL&)GRXSwN1cO0<R==io^Y$}uvQyR2|eH!=`aXacm|lP
zXgYA|=GGY#Wj3Umnut);KYqo@h5rXYxc?u-&u>3RYw3Cn>0_9^zwlO+ZK*6SMWE2;
zo9=srnlksAf-afBk~ISv7NUnm@zM-*J7yoUMJj{oq$mlVG!P!c<Net56CUe}^K9RC
zK_cotHVUcrWB$_D&;D{mqmwlO;p6VMXVxe*9k#`9Zd~d@RF3>PAD?DbIZs;9%w7=l
zrh}Eo8ozeCfuk;_K-Scg16fguPD|rbtS|Q8S;;Bx;Cyk4i15UvT{Y8&W5<O!gw>{c
zr>1#B5tm}uBGKald)x=+^w|3W;05vH@`-VK$!!D^0dQB_mw<|&>k9jUvnTC_5ipZ^
zK_+1Ogb1uTh~p9DkUt)VSEC3<`-v}|#2R_{ibcamps3^qM}`OI(7G98&O#RfamoYj
zzT2Tj^fg0o$e;)AK-#`uI>$XMf$DgW{>Ha1=2`ZXM0}KSd_k}MbA$c3e~*8R!2Pmr
z+h3^O{Qd<lf{2;VlVvdKr>yddg^(sGxj7y~&s{#q@==bfYO`8si+8u$utv)hATXM)
zi_mwdWGXpTJl-4HkfQ~rprNelN<34>p7YCB-DlUE&mPmz5M01?0ih(MU9Se4ly-!F
zJZ_X_%}?iop!o>1Vtjcdq|{EI5cJMg01nJWuo|+E01UA{`=nGXis=MA1bCapA?t#p
z^Om_k)4}8vrk?XW140#l8ILVw!*dr_(Kuvjv_n-*TV!#litIEiwIv+w&t@6-e&EPw
z;rue6X+PGo(NY_~=H}ZcCD{5&cAAvs7;sahMHacz5&!V)z|Z(KT0aq{ap|C*0Or@d
zSw@NDDQYG5Av$@Irppj>N$y5Lul#d_`(KK`*=Tw1D0}))PJzC@_b{k4F0UncFjLW~
zHHWjWm78<o-G)kqWZ{(%>-TR$06|Cdh^L2ZCAbAYuBw=EaNZ(uIyIx9IazhvL^Gb&
zf$8?;W3DX>7Aud}FGK^+bhX}C^jgvzaymsUapkrVn#wn+l7*1+cM|T~$21~eTqgf4
zCA!wT{-j%fj15t`y(Y)SLEm@14cz&b;Wn@IXJ`#W-fhqy-)a%OvY77pI*2-LJZI;m
z$>csK_jK|*pnhJTyh{WT<+RVh6QJunhjoSfF$xH$cPxfVIEg5^GAxGj2%o6W0Ix5b
z)~eRce4}%9tyM3dbJ6KX19^3mwuF=V#oD$!o1%sEALiCzm!haJk{{`-neE0N8=!5x
zc`BXV1>x<a-_SO8oZo58DUeu*)R@WHO+_e75+k4TBLyCM@u(aW-Ke(Rg|BnFE}Ji>
z7nz0+Bj1Gc$SltK2;2EvoUL`Y8(qK9|J8M>XDv|MiuR(@k<a=GbMuAaXh20(*VbAW
zI*Q}@1}{r|CU6@r-eo73oKFuIY3&TxY|P=Tz6ww0QJx?>`>_HASWVoVOEK7kI+$D>
zYKx{r<mFNP4i?*ytdo%Yj9BOV+8D@t_Og7&k8+NqtGVC&(NCPDe=mP@c%WJzaDlIg
zb_ueJs^n7$Hl?8RMAM{SPEo4AKR1fl`S81lxp5w;#@5E{C=91X#pLAdgw=rrI~_GP
zZIfEJ!{^CUvH02ccMtXpBwAu4R*=;m-bmbb0F+lGNlFRP$;?I$*G6iHjFq2I6VZC2
zm)+*(NozowUyU%nH25fi7iMc;?si`-rA?wP?I0mZ;ntU<yQrhkoHFaa>*+oATYN1*
zB0C7vf1HraN~S%{bi1}>>-A!zy5mG<?FC_%itnWHrv06`yQBu{GpP|eS#=`0ot;?|
zl$JRvKgTy)m&S$o2Z|N?2zNu}o+dO+go`^>+cwo|#AL*FFb96qs!z?n+FW4%{mI~E
zrxurue$B>~g_e)R$lU++%6Yc?r$F;h%BCz5Al>rsx0Z0wr`^vdU)GRX_=CQStb+?O
zAGCvRv<5h*Dd-^o-y#M_v$FMjPcQXnYihB)f8~0%lPBkjI<gYM(g1Hzhf9e>qR~Z3
zg=g<e>IQ{i1BFEl-ues=N*tMrp5j=J^OI35cO~_q|00xnm=Fi4d$Zo5kmB~eu>&W_
zm4s{b>!h4$`Fab#hTdM(P*vi0I7cal);i#Qk3j`C()6(I$@VBOtI0DO{UCiCrxRWU
zzeZX~(<P5Vsbnq{m7^OKtxdLiyi7hoqQ%RKZlB*ji5hIK|D&<TNB{9r0wI1dNIv;}
zsb9B)9O3K-Z-c<MjE4V{?|cw*3|YS)e>>CfkLw7p;-a|H=ih($-8{G;2?N9#Vdo})
zhnL5##=r!bu7NipRZ9m0TNfD#bP<Fpkl$Ke*&gKqvRraxo+In9S+YNW{3*0Q8z`!Y
zV<w+kNO}kLIGpAu@|r~2WZ4>CG79}}b;AcCwjezt+KPn9m6zX&!sk#V=*8h0-gk$N
z5ybhkjfuk0uM73TYH*@eWR22~39tGDOkZnZTtwA!d?GyLL~lPQGQo)uE*uz=MqT<l
zf7fI8bWqUS2H{hL7tFlzDQ+j+lEFe+fCE<zOAJP3zH)AcIUCYnI+Cf9iT8W({Lu=J
z%Sfn6`=IErQT12zo<j4g8Uu&^{L8Or4xP${FmTQqgp&UAbjOVzql4F*klUwUo**Uk
z9vrp%J%X@xnnBE_?mQ^;(uT>tsM7(tunxr3Vg?q9>mUk<M@TJgf@^JtJY)8~k0w8Y
zqNX@+C-~f-S9t_Y`X%LnPCIjUd#Sjy4WaHS-+cm6r#O1O`fU8um1_{;hF32Q*dY8z
zc(HM7-nH>kiPyH?-~O5Aesri^{_`bFxm%)mnGZp+IH?~OLQjEoIGYm8i0PP&_qa`Y
z49orTeXyIE)5bX39jtAGEVDHT5iVVKUY~2Ldk6HPG!R!uZLN(WVQJgWY{Ele{=5<V
z*sdZ)WsC@5?i%1=!b{x$S($$Sz$Rntqvze{vh~=pG3>e}$fKy;wm*WgRpS!8i8f@b
z8ha@O9CMuZe>VDK>Tjq*8rU{4@-+rDssWiVrjmZvr@LA;IK}{p_v1J<_C;C^Y4Mq>
zjwti=Xc{uWP1b+}Gk~wj_Q4b9Y#T`Gtn?sAvG{l&5d#PpL(=pQZ>|NoL=`dbw19Td
z-5VIJZC(OG9FCU<SjaRk0V!i6*Nj&c*{`-iMES6fqdBXsQYBe3>S#Og=uzYa2w=`2
zgH^?_R9uCa6ZMuf6M~g4Tue8hBs8W48imW;Zv;=RXou>wS~s)@H9Ip=z0yFKktznK
zGtWh%yTI=;a0dIR3_chW?ef>#)vmiD_o>U+B`fC5k%#!fnM7!t%sV$MoKw|cfDNCW
zg%37Grq&NnQD*Eg#ipRln3@?&n`?1f3;nH2wMU0hRBG9!&u7lIVr~Ni4coU2;d22|
zr_8#cZ^{}rjD8e|Uk)Omg)#U>V432&$s53hTlW!q4j_yH$NpVdQ((UpS@quUG^Z`^
zDRgAXP-7?nrRq6?I#*s$gKA<ELgplZ4|E6n_bNLX?&pAZhjenoOGB&yB5(5OxN0=#
zQ}biVwqT6l@%gM-SkT*$S|E;0_MOo_HW1-vLBv=wCkSuQ;HUr0OG_#7>D79={bSO=
zy$sa&Q9y%aW2F?6%q}@S{Y=ybBlRq2$!=AR*v<<06kIjdrqZo@K68z~y!mpBXPrm&
zN{h;gMuhLNlnCQ#k_jjG!Vy9Us>*X*g&6sz9jmLI0Qn){pPaQ|N-=6&4v6Jgoz$_z
z5HUL265oTNtJeqb+C^~>Xwh9rMD!1&-^JUpCcrf!`H7PfS0M6&%Ren?WOdDcPWhA1
zI~tr*9w*P3c?SIiWLJuM^lo>gzMIo^c$U?zKFN;kRY=5(WOm0xMZe%t;IUWSo8cEa
zI^3$QH@8G&Z^t=U<8XB`mV8!ULRWk>inv}%@}dgmgl!}UMZ~(X18K?MxpXDXcQM$+
zyp>+rVTbvO&O<6(k5bD#hIh7w-pI^){Gt2ah}k_YqDx-^7%WDR+6w`5Z#0__RJDHN
zic1<Q?cqXSrsRoXFdjtZ!1GqA8sod!G)E{jC6?%hY1AEIU-YQe)_jt*OeZ#{q-r5S
z!`osf-K+D?j7;cduY&}~@h+O`hZ8lDv>!oLbY3Uhp_I^v7r2siKexvoPY|RVwgI(j
z63rbZGy&G@@%K+3^X(ueANOPWVV^=yMnxYacfkn+&4$L6z$Iz(ICb&d8o;!t&8RV;
z*A*7(;?H0gl|353_~1mg393rf_HHR^s*k&{>#KjVs>HADs_Q(TN~+PS^GNG{)^{WG
zOL*5@m+Tca46YRKE~kh$NuQad#9&~!cgh)bV0m}$39$Ot)r+tgg=G;AEb$c3ifcKt
z7}-}Z0ddplO~ms03uw1cm&$`MDK=BhcSH|Nsl7F0ktNhamxSHNL^Ct61M%l+QTPI>
zEyAV9ow=jxMe#8k7_~%8xGO3QxsNVrO>!@&-o0^TmQIDKvE(<^x0e!STqZJpV!FIr
z^1LzU?J-0`{2}@-c~;i0tK~KFp$+=1wA{n(t=c2kCf!-sPqK$~PDkorl$&tNWF?uv
z+MFoNd%sM0a)k$>i74O<&0i`M{&a9uo+N7GPsBf$*mr|HPhj^uxj)CUR8jO#z|yO<
zY6glbO)f-Ez=}`dyU3nHDjDY8ag6-qn8m%m&2Q;-+Cgj}gr)-C2^iTuCwWI7iQm&y
zC!%*$vqPvdh_d}8{G)g7F__k<e>FRub;I#e@&~tI+^@qM^cZ`oc{)5ECqGkuvwV~p
z`C@E=xxi8d^TkO5+*W<LrfuNOoY>pQv$$AgPOk1#_n>Up!~o^w>6crEx-w;^;FZjE
z13D7r`)rqkw<6oY{(hQU!60t--WlcdlhN8H)jZ1TA20jA80T|}EnR5hzNgeG?&i4I
zCpheB09nHXtr&RYxK)0wdgeU4jZxl`_&9t#+c{kb7(X6y`WK~dl}9{fr$oAo0Nd=H
zWx5%rqfq$=9obt4GC-&>>;g9+o&w7OiAior4ayaz1MfH#F)=^*a3&WxqGWGeF7J(b
z5vk|Qd;>D}$xj!2bG+`7C`~_w=X}C)nApq4HGNm0q4rydoY+kr!xb)oY9+Bmu)_jH
z?_g8pomdNW^k|`Jzb$^E1)5otw5EF3RXYqj#C||6cL5ES%1#AIocw-|1-x{9_PEwg
zf-~7T6pncYBwJb8d9v+v?bq|cV95snz>i6hFZP%0L$Z<-b>g>?%%H*-4)-0o^`o(a
zS|x$3hqiSY#VfLSvwKuemkJ3oU9PvV8tecKw5j29ez8FLqLNx}^)1K2iRv2?fsWnp
zvYfVgH(q>Ds%@&SPqkZZW^C+Hc6Q_qX$t95(w~KKV|lM!*S;HpxUIxQziMqAy^ju>
z%woxABd^h^P5qg6<ko#kOhAZ!)3n?=pr;6rteQ0`S(vrzU0>rkY<Rlg>o7v%8{38&
zW16O`Ow5~~mq5|gl$^GOJF!nwbYi%nFR7!)A-XwP_d#>u8R^%G-ZIsBmyNbBe_7~S
zt9mEqtw)(BFFQf~U=d68TWV1G15beZfag_HG@?G6e6+T5<hJ?*VclSbg=?(3t?CRI
zJLP;%x`h|ZmE2Y;pJcqt8a(oy+V$j_v}H<?%+e6-m6r<f`Be*9Z*K&VQ13soD1N7S
z|2ehh>@L4{U*_FrdkbA(aW=e$q-h!?n_G-Y$zX)2T$QPd;dP__?1vY=b`XgTHid*4
zF&Rd#T%why9|A*j)%Fiw<7JX3MB_f^@Zwsv@!cPw81_8DLBN9;O3lUB?aVynj0^8?
zFpktE%UJ9EqSLEwq_fxq)?Z(fG$ORiMCOD52gG>I#7PvfurxKNrO*@<M04||yJK({
z(}517lNY|bbAbNkqkUli=7&>l9Y+^kfeNlSw~i7P@0xs!<_WTQ%wF^<STJ0s{rC~6
zgrWH3L5q^%e3kK@hP3EXsu%~{&y6P2GwIp=-1|`EGU*!<`y2u6?>wnf<4VqjJzH`2
zu%GmmA8~>}>q^UPj%CPnYgHLukVeX6%k9OC4YfxQEwACs9l6onP4>k1Pqy4(JapZN
zk(tG#D57Vd0$zI;Eh~^fkbzJ6Y_B7sceaM)a=-rT(=`+^tW5hiXCXrQ%1AL39)2RK
z&HONKAigVs<o;my?pn~YMn-ULM`#CcB#Z#2__pusnx7ohn}W9QmwWrS+k_<y%x6UQ
zwrV&XGp=C}44krcjTFezm$ftZRfUM^NFEQRT<aXEU00M{EzpuBTBpEms@a<VD!nu!
zgxx=8##8ew+5Ws+p1i~-qt`R+@c~5(EDQTvkqHs*E43`H?s1PBFjCAYa9~HVPmS)M
zzV5DMnC)D>f=%K0w9jYCOnwFP{)CwFqS+8t2<K7H0FGEw#s}Y1nEV0A{Wf#lDT;mc
zA{*Ww^bgeBDZQ#Sm*?<K@1iuUzNFc^T>1(aA>mpib`XPD#kvI!g{~9r8ZHG9*#3T!
zarmLIw(k|;qxGWiO8y7|MAl@?E8>YZK=tu9PxnjdwfNcWwh9P~%t^}@hxe^H4niRg
zp%R`N{rQ{ead9C(Ls-5d>}OIS1kKpgOecDV(`s-uZzSwfVXS`3y;{UBKhUWw_rV<k
zl=_sui{@nX%~U5>XXFrCrX}VAvyrq>*Gu8*aEon5->X|k`gl(K?zvH+ms`fcAVcNJ
z>l&TYT87da<<?atyWF`7-vX~QSRlNe$qq#nfWuF}6BtpXW6c|*XK?cI+J0-gx&l!>
z$+QEgs-=v3rKECSM%tgzX5h~zJWOihkDNe)9(s}cmwUf}+pifL1{IxA2AxBkO?>is
zd)Z5h-h=y-5M(o6wg#F}Od4$=ZENK`hY+*Tw*C=)&2|U%E6ejW&U~JJ&cXCl*!F><
zwj%-?nupd(sAlHjt$8U&)R)qy49(Qldd34Jl<vFwMe!2(#;%hB?%ZyvsfT(ewgeUR
zFK_5|ttTW!a46C|p6)Cv%G4k{dg&`C)d*Dm9I-cpFQ3yl=DlL<Z>bl8UgGy~#-(s#
zExKEq=;as6-r2{sL9vUL$vk>SO4qqR;0)Wh9@hDuDQ4Y?`LT$lZ1^NUa7tQ4r~~Hd
z08Q65G4F{=ivmXDvS^B!8lGk<yS8g8j4TqvybpWpEUdrG;ZYo-&YDM@ipKrQ-xW{7
zx&Go)xVWYFwxR~p%6Mv|-onJf0H2s-`43_W3<tWzeEpT93V|B1`7+q0yjPWPx)L7%
zCMk|%tKVhCHXz_}uhkn98jO^h;CqWo&E}4!TrMFDZSAD$t&ttL@^V~;y++`&m$iuh
zHzMNKH;`fAgbV|-Sz+Ehhoht30AyEH(W<M%#_+jiftEsv)<bxG2XDdhLUie7&=gC+
z=Rs=%e4M&wVMexuk}=HUTRI{&EiVx!hInEacm!boRM0WL_GM<O*-zol>#6{mMdONt
z^ir*$wLu!)=P$@~dn0Z?hOJH_`WrX1)~ULv&fT@Q{liysG&Tkg`)jJI0a-bzb3^e;
z@x%GV{Swlgp24ZO<^Z+0gMLRU0>;?wz<|xsh!2UobAqkRoxr2z<IJ|>I87Btk+{N`
z{)I`1tz+g*Q_0;37IJ+To05B3D?6%+MJ+*tpfciMuDh97h}pMQd9=4OH{kq3!EH=v
zLse*b0Z5H;w3@Xm7hDSm$zSP4w_nv9*lU<hPaap&wP~i3Gg^g!S>N<>ILSNfib?Ob
zf>B74EGQ+$i%GthO&9(OHoW+aVOJ7_F?Pw^n_c=IS}f)KEcHB%b44d1S0~np65+GP
zNhL;RJvb$lYBPXahk&Z2)1@Z)GvLL`0}Mmc1|fm`&LKX#&v`Bnk#wKo;|rPCq3Z=F
zR1MvSVgMj((Xw0kw%lgPV)6zyp(8^M{B0nQh@oJ2s>h}K?8AQTyZ*s~<_xehE$2)d
z@C{F874EIjKGe%ogf$P@R)AnV={u=Fxm}(=slk1>vC1KYg+ZF+=UPa?Vqwq7JOyw3
zF`9e1>~%HkKpFSFUAVY-j>V0VrA^X}zwmihp|92FednnpJ>wdPdi=)mYKrxz+Ai3&
zO+K9#KB}iaycePCIDGY8>GP|Izia~)*c<H(ce;_SWp8=uj?lWCs<N*}nJng($E!HS
z6_4~%vg}s34s|PbUxR#yW?QFD^*I*rPsje)lyah9ukd|VzjX2tVnlu~(czNwnZ?%E
z<VGYX$(f=)jsgAOK;7u$mwksmA@}~r%<psxnuxw8EG~wog@VvUNGw^x+hq}td|W?F
z!OdsU-6=GSRfK$;YI-(3oimLsV_wO!TTD`0aV?{F)n=!%h~e>LoB4Sb(^Zz(sUh?S
ziMghW^C@kMM1Uf#3!=Lhat-R!xBEEcE~njT=ud7t?BmmX$YR}>>Vt9{Nu3X9&SdQ8
z3>hx1TkK3|(P9f&VMzCk1+cp`DP?>scU{fx{rRFAAHs`*6-jy4QqTK7{X|53uz!Yq
zy#uW>zc0Q(L6&cCB)!1I$lT&^pxZiK@m!K`G?;!LyL)rHdX=v=`cr8|gZX<G;$3dA
z-S;6roeC`)&`!$gi!Te)?2y#WTdYrzG5F>sD^iL_=%xQT?6doRqx%~$hTs#rN97X!
zl6&#GV|-~Lt#6^-yLNB~(tH^vgY37|F*J1@!h;#axJ?yY{o^7u*`!s!!<}k8xazvM
znai|QU{LQj=8;m@B8t9DHu^bQ;JiS4=g_?4TkerS@DA_NxwJE`WfUxNh4p9&cYg-V
zY`O4Q3q&PqRtxZ0&sU#(RalLFYb*De)ly$G*gnp=rqrPLHYOZ9s;cn=i;@cLn(0Ks
z3n68crkSr7tdD9&h|v-CwWhN%=e6T1wjx%kIgMTwjsWl4Z|v~edZ3dn;?(g+(v4Cy
z(a_89WC6Vl@|RTFARdCce9=;KEVl}i_`&Qp%iK~8CRzGc_Dh;|g}R1J?@}{gouNRa
zR7PDtP<bLojv!1JdOvZn^7ym0__;MZ@&Q{~5*2zgY30J|`g4|E2K&sy>^Wm(S7X!W
z@<_*5g6_vZnZlV>pEJv4YXh%aE$$9T>gcFYOB86hPxi@ItG+I@n_y{Sxg<fW1VpOr
zw9dau0VoqergI}vni5GJ%F5mYd>EVUb4Wv^`T+PN1y!vmb3Q1}UsN%9eTpTw<aW8Q
z8)W||b4|ZhgOT7HaI#OGYOJA78>qw#hs=}Ld_{rd)@eJaN&wOjDr1TpU}<gixu2iD
zh%1=XfS8dOh)V-ZXS@pj%v2_}BDqVyfLF+0T9icb7)HiquUxr5*mn(NFpggCEKmBf
z+khcc3~oV=m9j;Ty3~TW3`8*+A#FC6TEaj(9H-u<1jf!<`;bN&0Lw7J8O?8Ez7?pr
zatlePA0JQIDKYlc#XC}6A=}Q82}k>V>ArB$xB(Om1@<@E5BC!)uM%TpD9|M?uPn(i
zY(8#bPUm7f$}>{y<Q&PQx{1O_lDWL%48|w~ON5s^$h7mOOCF`ys#nO-EoOM}l)kS3
z<5cc>MCPM>sQt5ezw}-<S>3><w^Ll%{y^E3wt34$-X1Xwa$cuU1ncTT!=TihKT6h3
zPD9TOrH2U!1qPl;crPE=w9|dq%>KpUaKnfuOu|ZR44^un_xi0DTPa$}Vad4?vX(yD
zcm(}AymiF;4U9l9DE0<(dk(w-m{YRCtfa@mUFQX@mF3U6sQzou)1u6X%?`}7trg_D
zOU@o-zO(+>$!{Wa8-nSZ7$|3TLl&|THtc{e@c;9WzznOwX;p^|yKO0XDpG*}SsD@@
z;ENLL5oJ#JTnJB$vNf{rEd2M3XvJU4hb|&VmUwgv-omm-dUjnwD-Rb=nEjrK?e`Ls
zD+gaVjChB@_R${_{@q$NDe)lR3`VPC_<hXjIa(l=?2bTEzpj@3vN+fAOoC*!O4VhS
z#~vF+;k|g|znLn3E?%dZ5U`u+THSvE9RwIX&FPFUM1g$AMD79Go_a)bHv{$u52Qar
z(pka|-UiWto^jE;p80*-aIhINL6*A%`Vi}HQE?m~GT@m20QSJ_qO1$ZbZ5ZH_2Cm?
zqdykV<FTcz2S1s%5|9Nsr)Z)%CVSN!t=$)oQ#^vT2KC-ikoK(jgURvxH=<5B`yyy)
zXh%n1WupYpepMw}9y?WVp&_pEC)wm_84P(s$&=L0AI6CB)iGsan`xfo&)m3Se~)Pi
z!{;8SR><qmDReLJ&;1C#)$vh`c9#Cb&@<6*v38@26R}6)-<R5I;<ik-+G}<k!%M1*
zg@ncu#>=bMCRXBaawlBCLB~8Tiso?{1F5ZW$09Q|`rF)|DkA@k0kLQdfR=lD=NM?7
zp<$4oegg#*5JU6xkO4o>Yke0>NB%4^{P>9c+%1|Hkz$Nk>jv3GW6hzA)x>m?_N!eN
z@X#@LnSMY%w;Ga<!>$m&xUy!tZ2?8U8lsiacCZxOQ2K$5v>`DQ&BHHEQ1CSx#tE%w
zqMy-LMmc%>MI|YodhuU^%VX0#6XnG{-8A=EFS!GoM%fb%rzK%dHb%#wU{ehyKE4n5
zm)R9jnq+1oCE8~n(8y?lu00bd_fwz?Qu(a+RuRsDoT$qm1Cz6=a4_RK1~$nhI2>D|
z#Z3E|sAQ{}(Ji!JCyK^p3c3bnb-XUykI42OTxkG&=xP@xwh?~GMK-6W4<xLr*_<3Q
zFCq;)ZrvYEY=$Gka>O$T|M_?L(g^Q#Q&ZU_1=8QY8+-hTDGF{S!~whi=0E=qJ^KJ|
zZkR{##a|cr^@-suO^`|vVdiazk{*OWmDOj2^7(xf32mZRuR}u!Jcu>dtkWvEH_<$-
z&(H*QE4=4^w!m-Jm6*ZpvQ3z%n9BUSM~Kn!?$4dN<Qhqf^tC#2_R@<SGEccF4M7&H
zVr5aPBV!@K{k!QNws@WsZ?|t|jJirJ!?SaR4n>*DH49-6<>R#QfYD=mBfycrZq&mb
zy+$Clx_}fNGm8<r+u2nwi|i$JlV2N?`k?af4q=LV=KuN2sl*TmeCP>}0O+o{8+@na
zafhVws|gYYPdRcLvz?BP(A-O#eyFKO#gV&I!f!O)zge<xK1~+Pn|;yTfr<7Zu|tQU
z%B{FHEt2jU>zwPZNqT$2sZRF04q+On`mZVpp~B(F6Yo1pzVFo%kIAhytwpm;!Iv*{
zY<)BAh?iA`q>-UEjkTZug&tFu<;#(1=`+bOkt3E{7On@8FRENnN+UhDb@Zl{<oNZ9
z2%VQ^Q+bRA%SzsXQA%q}dWW`C@xrLGLG7jN4)f*SLvx1?3;k$2jkw;`1ub*tY}eyI
zXvB^5*W~c0mva1Zl+!|NM5J)0d*Hlfg3*S4{NP>f-IP~@cTp3L8hfaX$G10y=NB#6
zzwe1w@6u-<_2@~h%edyx&WG*_IeEf!S5tjPOmTQeDDxeLc{11j%lEytMhljD;gK5N
zZn-jh4VRV19sBvRM`;&g2WnW8bDUGcgBc$V{PZ!?k9Q^sE`P<J?qotisxm9s$dcr`
zoMop`DbdH*&Xhg+`%p_7=-|fIP~OES(AaQZB_W@t81+D|WI?B5Kc5Ucow1^TNaXGz
zJC<WxybwIFf_zi75bXF4KK${WIuEkmWK_X`&#J{R6Z!Wf(WQss;mhxvMf<log$!$D
z2bc-!?pR%Je~zkObErxhCN8-HnYzTkW>4Y*49cuS+{A-_%uqO+eK1^~eStOW&l&S;
z3XAq*VH>F(kc#fXvi6TT3NIxL3f(?!H5kLchxPF%k1QCc8M`dcHtGJ=<Sq$hmB}d1
zxbpA2R40e&wV{c&EcMTeh$i~N>S3>9s-phSi@>~LhmKjuM?(evp5(uqOpXKdB&j1Q
z>)&^&rVAa@ua90r{qH8TBWr;~oP_$n@AChS2jhW@;{tTX6@XD4#l7)uz=OcvOGi=l
z_uY7S(23F*epzQ=J6=%DXnz1+T?G+pC0N`sw3Grjvyxdgtpb6qoAm^}{_Ig@9(c&c
zi^L1Z>undA$2G&x6_DA=&szc`zt&~oz&MON`Rg$&Tf_0wE7TS*kB4Z`hs!Y!rt$!4
ztm*;RUCeU8GX|`_qW}5$MJr)=g;l#}P$7SU_zU+w(;MP6O?}sXHV7>yFR%PHhQjW`
z(`y!ZwQ&6IBVqgj1nf!(iU$y)9<D_j*i|%;D$4xz(5Z;wp%R-AKL7J}prNj$hzIN_
z6b~2!d#g#XHuwgcW9@P%k|ELH1|aqd&`9kds@Gc|#lPP6<#0p)s2|i3B0ihlj&eFb
zoWNP$UI%*@_CV$B-Rij-+Qy`h;qa<Xkt0ErFhK0yUT+^(PLm;tW{J+DA1_7hA(O0l
zSCZwiFhGqNc}QMJO>@NbGli)=Ad1TN#<|F5l81M6-2Ldlx+g_u1)R)nTgn%*vL(@X
zwN0WJTJB``kAh9+Gyq1G75fl2RhACuw9cPH<?$e(v&tb7KR_yEq8(ggyT5{j;j=E(
zL+K8$;c~VCR}dSB=JdWH1ms&Q1mPAoFIuWWr5U?>+tp0QK~w+W8>7hU_u`DL<ST|F
z_Sw759jII@1Ee6xAR8zMRL1I;TOt{om%xAa5ocMLdc50>Vc^M*LI^|g{-i#XyUI|6
zSUv$lA){~kZZ3cUBrg|@LBKi+nxnI9(s_gTw95R}ok6)dJymm?YL$Q3tS_f#2(a2Q
zaKyjY?rXiln=a84>JYLBbpJ18Am**ww{rg?@C;eY|B5HV^u7RkH8^az->L7EHKSCJ
z<V+17i$5J(lV{!6VkT0ow=@fZ{C2Sa+J0;kWT%xK4X17`#19Gs<T|miy1tIMU05*H
zFFYs*qJ!!(5Yls4T25J7Ao$8cjqr`LgI|GlpxBO>IS=lqHd)#hejQpyl(kf?38Mr1
zuWjqsA|3(_IzL-={FLxqh;~`P+p3+0%H;cjz@kWV%2lk|xS6A)QC;sM{N}8{bZ+&k
zJCu!J8#D)dJ|9AxmC9M3rR2d6z^&{nK#Z=qsin<^E|%TgGB#w+4_%JzCkGB}G3!q&
zB?lcalCXrQCLdcn_LxNGI28G-H&V%;fM<b)Wlo{wn9Mkg+w=W<2mqzP7%+)50e7KS
z4G6v^_@bJ<8#}t9KLWGG!maP773`I==Rb{Z{!HewXnyu;^8i2$!x$J5>>c?BYSeL~
zGcx>>gfFo8od#{{sxp>f16`Fj1O#h#;j-05o4L2PB#AO7>B2M_qn#Da;c4eLe?;XT
z<bikW9G^%P{dk@rR4Q@R)h!LARv?~UZg&Suiu=0m8SPmK{SDUn(AUw_+_xV;u)FQH
zXSze$^aNI~MZxvEoC#%_y3I5y*;Y6wC^V?EUr;HHI80zrC<*?&HD;-bj`{xNItpLE
z*jWLg<&+}mufu_%q(&aqcfac$ud?C>n^9RcT|Yyo7oI>3t#D&sYxVRW8_a`03Kvpr
z5mrgn0cr!*k!mAm^1r_XP_&89Uc|M{Z+f4%eidkk80&V<V@`x5Tqd^aeXYT19`+*K
z0m|`6C_Sv2Py5Lugh*W^M;vS;1)UaK^Sj98Zveky%3{Zyn%lkk$M!bQ)HguF1Ix|V
zZ=I9>u_)5exs!>nqRm<8xRZIR`}I6%i?n)0&fmQ65lr=X^ct#B$@SG1LeH4u-<c;V
zMQYQzu{(vFH+}H4qc2nSYtcCDmd!ZA=hI_9-6ZgjEih8#k0K0xt}-Br#mPFHL5gL-
zx??WL?Os}R;|Kt1Yo+Nbf)f<Dl1$9fDb7REC(rX4dEieM9;%|zM6m`vf@SQJ(vHLX
zsVbDE{4(NIVL|Dd{MBpPvhkgw<JG;GYjR_HV1d4A@A9<i(#u<o{d4$oZj|y0=%gcq
zdhVMp)z&yhC2|qAzuz%PZzi%~G>dvG5|Rcf7ljtlo}l2jQM)Z#uQ;?cpvakw&v#UO
zw)1MxTUzkCDcrt6B)ol3EzAF1S1tx#vk_xwR;0c_a+(U7hSWhzzW&j2A;vjbjkLID
z2o1&NBHs9r&3-gZx82)pp~x*Bwjcvzts}3SjCMt$-jZP%7jdr)*+J=<a@whRUIQ9&
zr7NfJ-F$%*5b8*nRXo?!C5@eGL{qQ0#9dc+o7e-NWfAb3*LyOmUaV0P*@`HoWPjEh
z>1fdkygx0|v<iP(>Cn{2t9@zJ(|3MfDnQ(1R(^H*uXho=tKMReHDS*#TUNaaiNk)$
zIQpI{*nsgp|IKQUGky(y_vmS`hacP_4W;h=UIP0I{%Y7t@%g~VRg;SgLcLBI(EfoZ
z{W<UeBeN88vkY!IE?0UL@4=RHsX|^LSp6cFU<dHYpzKn{K0sJ0*3k;@R67K;J@9Lp
zOa%My19B87S9;do|3W2XBc~j<y@ZnIIkWWNgwS#BV}y50dqe8$#yD@KQiZ$HR<3)A
zp3iO0AOlMWPY)72qYNc(f5v^&gMr>x`2KHHKD6k$H+K^`#i?yQ`WAq3T_@qwLMXm8
z7oO2vsU_6TDe+zCwV(~^&LY|Qd!(000$tE07CVWv3z8vaGaB)a#QvLIIMH$Ii-(9e
zQ9=nRag%bCqpbzwi>f&Ajrv9vzs)NUufV5#RjRo~jn8qSh3Eu^4rX5u*uxH1mFc)G
z=Let0<yq(UT87%4E((hQX1SpS0XnrGL*f$e=q_n2CZ#ao88PW=r7f2OFm%f|K5coV
zg~$J}koj6m0=|*i2(XQ`$kz)+z56dM^RNU3M`$khEDFgx#g*g_yjI!Sjk$gMS!)~A
z_xxZMb1=vn^j47Uiwt{^<sP|{98I#c0`OP=S{OXa@H{>653X3>JZVKO;P}A)zQ(6l
z_;D34>$e^1(m#9#Sqv_b5xtVeMr(EIqn4Kg(oM3Cv`#%Tj1t;y%@`LZuhm&a`<%I)
z+iC+WzrhkkiHg7#%WGYa_{A&qv$yI+C5v2y2Aq-O&e|_pPFper1+$7&K7)!~))5?R
zLBVbM_B3B1D<glAHr^zer+g~Ki?3ZOpDOo#l6zS`2IZs<s}L4vPQp5G@5(Kj<gvQ2
ze}uzcxI;Kq2djw0yMfWj@jlr}&IeX_ac{gBo3hky&<j@%rDIU+Zj8CT)fnbCL5caj
zH&2Y)Hplev{`ws}d~5_rt`=tdWjqk5?0X(=&ipPmC+>^of``+b24lASc0Kh%nRzvS
zSP4@`9$8YhyH*djAE>-5(p2y@A~1+uWndmIN;737-FueDnLJP1Y@}GW`C{W`+DpW2
zy*uN1NrUhPWl%PTfX3r7#b_>Or&q#49cP=AYcGeEGFsm9UdVU!J9mxN`ej-(y;S2j
z2i(C@Y}dNINgd{P3<~G=A-75F1fx5?&hj+LOfoHhPZoFtVAI3$lR*$%9-0YD&RLt0
zi+Yo{_U2Ps<BR7p9Q)5;afU5nD(=`1cO;#ln7p3=e)l);QkQ;YKd1tM(XA1Op7SSf
zsEM#0R)W1+!=5(HeTeb`$}?A@KfldR(t7HUy~@fgoowYOb7v&)P8`>W+oW0WRVC!1
zdPnh&@H#!u%g!4GB&VAlQNF8j>}VBbGE7pE;yP<xWoqx*TyD3jK0(0~C+&J#F_j;@
zmlm?1c<0@3svqfhypD2Mv6NVcCvHCJKO>a><qIH6=WB)ScP1PeF({VF(p>(&9b%Ff
zWvp=wNj<9(m$+a-GI`Z0?$JvRe6~%qT4zgrEIfNsLbeLC5z-4PJ!+)O*_>d;Ma8z7
z`NfM&M-75*zpF!uatSjES^0VL_ZtUvF6fx4jD#bU(Crju##cJ_DvhQ4{FgMx9OmW1
z4L!*FcS?E{eX~O^y#4{%KB-PGJZGKi4bKIGkGTE;LAe2MzXU!%zd3raH9<{H_nnIE
zoHK+xpN-8gY3QmjJg5!NMDs_PqGK!Ysf_<fz<R_bc5@y$sQL3g=d-dE>q@&a2V9kl
zByMcPJ|}XNcr(`=do=$|7oDQN0mG8B=j%5$s`u6;B)vBMFE^vruijMkYE!q2F$>o4
z_rsB*Fu$geTltngw4m03PxmQR$pU>KxP);F=jlCk?5Cy@-%vlg^L|~Ym***g8Kyrf
zlP0kR=fx`sQj2}ep28C6c%SgE-3)w^!1lx=M#)52TPeKyaV7-@bfd<HOQVcV`izmX
zj-2O&YbV_HBsv+Su4`RXXlx3#pH~{;eOH?Oa?Xfbkgu;fUy+OZJxyD}yZlgRJnsx)
z#8Ny~AN_({bo~9jg&PrliX-t3G4F1BFOMu(p{mY1vg(#`$v2DoP0LlQ(};V#E}5+h
zrZ5-@A7p<^d9gfv&*19i<u?Xb#TR|(*XFnri8-5>7XrV9(X=K9Gszx9&WrXi+pU-y
zH)Y0Km~&mQ%EP!8>M=|vYXE$Z4{TJXpmtqXQB$lwq6_M_HfZ_#j@v^F)BgFx+OiRu
z2n-?~Mt#PgJkhiiP0Cm$0nhzs3J$n)gzt)~`rbs@RrSlnDe8oCane=RUc7UfzNN3u
zJ(`U8qWD&Sz@xaAkBC+MS-Xt2!nqVK-_7V<$e>ZQJqY|UcqmRJ#FaM;?BTCpEp-Xy
z_-jzvq4Oo7+Wtdb9BZyJ(S*Sds!R22S6?#6ZibdARVXBraHU?Ncy(LAKAb|1vxp{m
zx8wPVO_%I)%CTUR6#jYcRARx15LFj7DSz9-(mQpI7wo%kB68HCkJ8D)T&s0G5MB|_
z?lf??5l;}c%2sovm#i*3&``%*G2F|JYp%t6s%YuPbTtW0#kQR=+fu;#RHm{%&Y3?p
z9VhfYxZe;s!s;Rbi#awhhCfmBp6m2+sG}m9E^>Hld?%CA8k~IW4{zmMQzd`!v$jRR
z{mcrPwh{4GGW9_d0cdf$boR`NI+S`J9eR#}MVU+YH514tiv`{^z+TsYSoGoa5Yh1|
zvoSBiqv3@Do3rH7z6;Cl?Y6*hwmH=F&a*fb{%1DlADIueTlb(03L-|ywp<1s(DZ(i
z+W+d7eBVWJcGmN+4Y?C0m(@>*8@f<0*p}0nX`-{P0mZI7tTrcV-x<3jf4Iwb!{m*@
z$?`5SywccORSIs2#(a^U)k)AJEX+WK#E7-fLLnsa`BgUac$>m-R{6B}N588(5+6sA
zlE2nd!hduu8{W3uYstk2?;y5g`=0FQ7A&&`9LuqGu3q`dO%^$m+OE_)06yDknnK||
z`@<I2$}P)5Lc|JjP1CVicfVIjWO-j*(Dr*HVFToQm^wm2j$;#9_C&9-{pqb(Hw6`C
zLs0r(aq|BoB`q3-wXZHe3j5ZX{Zth@^<Mo_TAkv|J5t8mEG89T;6H0o%(=&+Um?d{
zNx$W`j52__t#*g9%$2?g;M#u2z>D-hSNjzXCEmw~jsPTtqhxdexHR1a=Xr80r3|&i
zx#J`~Vl_hZo+KqIMTO{oOzY2Nh`yK{91Af1)2L{b<|;^)8I|{%{e3w{5^YtgFfbl(
zo-dXBTQqV!NJ5~0uM979QT`RB9DnjyA%S#9+<|27-y?G5*BL30+>><_@c+-b1R(oj
zZ(6Hj`R~hLVTR;d$e3dCzt1(N2nn9C;fH<yxx6eO5&vgzC5!n7P=YJr?K<>(diLK+
z?Ek$#7L7#m<*S4#yT^aY#Sw%Wh1O7Hd<Cf#>8NF%8a@hk3!`8tFsy_A?XR$p$qV}H
zw*86l<BX?SIZ~^A6mlV&kKg=Xn)EvE010fECSt*ye-Gi02Uj8;3@QG)wwwR`1{<EX
z1k>%4e^0?2Cu8hy!T6MV!-xH!Z+^o^|4J7FbQ1s#n8Ut)>hE#dA48WGlDs5ur73<t
zx<6AsUBu7;DuWrC|9tZs?K<(VX@HK&PYYw0uI&Qmf4=cK4OwPSTI0XcNxxIZQb?BB
zGwQ{EzafUc@lj5}|Mv_zJ@*PmNx)UR)BpK~Neudi*awuq|L&asX-4w@;fIs_$iwFA
z=Ss*02^Eh5_Qos73sue#-z4v4FkN+k>e^;Q!az{;wsKk}g!<Z<@y%iCTP6Ia(+b&A
z0wa+cfwU-qP@Gm<1apr7Qi=8o;8osD>0P!oO-MOVApOZNyJ-3TWX}?6xTeGz@GU`y
zj!;d>_17eaBB>h*@$-vMHHY_7@|QzZP>W2<-<6ztV|}*;WLPvC<+7#R*IzG)A@lwU
zn=;)cWlr=XthAXUrkF#yY})}hRD48Fz+`Pjim^hjFHu0s4B%=-y_}0r%$<4{MmsiB
z0X=dBgv(Mt9@?9#RC9EPDM_hlm!s1%kJYtja4>dLX>YI9LcjV@v1OD|;@wQqVOg)9
z-SSds$)V~|I>T;?9z*AZjLvw@pi0zUO7Hg*PDhFhPhUyFh!EYtJQqAEVk0wswkw}m
z_E1jIzSaHrBoGb9T-SZ|{igbY4FYjG!QlPYP5W>X!$q20QP$(vVOO7FalMylwoX2^
zk7VQ3<Y81c@V}F9?>|*AQ1YXmd8gs6Je9ca?{`=j#x8o(z@?wP`q(C6MUfdK01F`_
znK6t{^2IZ(Yg|URJ@9|UMMw)fF$}K0pnl=JNqY(d8($tK>WN}~xrr=rhmdesZ|w=I
z$x}NHm5@b-DOB`a+m~v!C}-~~_-#5e-(;h|e!}3Pd~qrG8qVY2YmdiWjOhDG&rT+D
z+7<>=pktmyrWtxq@K_)I-j``|PwNHlh2J2R@mXb>7c4PhxtbY3OZeT`|1MS}$ck`U
z5<uU7EeRfq$dcsA+4$eZN(@<&>ZC9I?(;wAau*KVX`J%Rxxal=a#K|EMep+j*4^Qg
zcYSW5dGx@`kjZKgzs<+o!Rn4d4AH2^aAOJXugmk(C4N_{qP!4U^4*VD3Xt7ZWNB8?
zS4+C=IIo`Pr1yFyE%?{zyCmUqR-aNKLSQOI-b;@=KV)C<@bef<FF9rjz&-6s!DBkA
zuv5c4J?9RGA55GI&q9ZHz5}oSuc*<32U=uC$1XwlOf(b0T&kX))SCwA{+)2D#P3Uq
zz*@K47?(`@O!NqOcOD66s1X4CmsM@f`fDjG(H8xP&p@UUcy1eF)+<QI(o!j0|96;h
z4jWE&QjDYL<iA(P|Lvbt>3@lPJZ=L*jlgP<GLAiTTRV>^+{k+}5ZFWL=cltO!w5vR
z2O)oVx-Fm+4!q_)^jiS?RKfPE0Z|WHBdPeqT=259Mhulzcb5FnFedQK3YExq_s3s6
z1$dPJI<kLmwx{Gab=jsz>q~3%DlZ=xx-wWKr<K1<fsV%(Bxk;Ri~2{DgX<88I6$nd
z1XhJ{P{|Ah`CCJkN+@a>FdiuaUMCr^kjl%54#dgFcP%Sk>Y`Ih_0P}ezgjg5ApB3j
zB6>$b#vSb9(}+zIq$)a*X)jSxXxLZ<3+VJ{%o02!(Jh4RF<!IS5^cBBpSQ$w#UBlW
z>P$wz#W7|DEJ1aW3d9fKcHldZ5mrEjh*aUo(w~sMw?(YRtHGK*<|HN5x4K;pISpgd
z(5eFnqaDBuHb`JTCG>WJ(n!X=#*^nC1Z|RxKsHwbx`0hAm;h#BEHVoRQ>U#@d-&OI
z5tWLgc^r}sfv98*>@#`{84$m1urnLIxTft5`keu!wta`B?$Bn*o#_K8AsD-<wor!1
zX{{iAz5-k|1y^CquMm381Ej^r3(_D{V*>#U>~$I#QKx9)CDA<4Spc*A;Aq8nUXms`
z_CPEbT6Ba(Vss9w0LYQ@K?Z3I9s=sNp7HM3cQ<kZq3`CWu*ik?e~9w}pCi6VveOc?
z89<Rmb}*aw-oLVFDzX=K=g~*3F?d+ZX^L^EpjlAiIs*C0u-=ge`qvT$t&SZnIV@*_
z9z7>HOXl9xY@1!o?}0jwlVBU)oj2zJ(qDE;%6*V3w$ICrBM2d4kzc;wP>?Q(KylsV
zbqJek4C2KVP~I(?N6tC5rw{ByEhvyzSy;o)&%9~^x`^zc&Jn>4p~_wt8z?BPp@iM*
z8v72A<tc2nJBNr$M&OSSivng3`}N@SP?qw<3k;0rU=q+MJVUBypI<ex>e_e0y6!9s
zNM<2mDvQo7WFm;KESF1!nP&rpx$|zjy$PRvofnFK02MTO1mrGLt`1TO{=i$d7~{DJ
zH@%45bU7^}+%&7l?US5{Sx=;(%0Q6%K~XP(3|@LJ1icRG{XRdgcib=>D-$GIXHd3F
z<F4HSZD!Wq?d%US1No+qd4CM%?;2c;yTMWyUlTb&CDTeoE9w^eE;+FgHoRn5p&^~j
ziT2?5Rza$vb!kT}Fc8ORUHTR*5g7%~#=kNu#?B#T=?Ykg%6bg6p{RnZX~K+9X?wJ&
z51^}=fE!cT>GztEdT!zAAJ#$UchOZ?>n4(N6ade+WrM(STn(l!E18?*x+=zy1u0c<
zDCZvslPIc(t><sNe*6tMNS`opO_&N_Ie+>9?^9)B<d*Gw@^;F=gvNE0V2q@xYkG#H
z=?MUQUw5~-gMK1NdANoSGdst-Ue=)C{T{PDZagEiPcJpXi8LGa0nl8yxX9_8rONrm
zXxema)gXqU-Vt;6a^hBdNf_xjweb?y=xHUET8@#Pfc(|y{!3N~COf%qD5b?~a|hZ-
zT&`jtF><jAfw)}-+PSf!&4o>XZya$iWG_MSn|h=Q15*3YN3w`{ZbX)<m=MYxGFBhs
zLppTVxM-8}Wxj7zxmeyniQ{?XK@|u#^4wtOs&`ooSZznk6Nl{=c}7<tVYdR4?RB}O
z=Vzo8oyg3eY^<)3XGgZ0sBiB5-Mnja2}DL=W%1giYqf3-8(SWJIS<&Z0F~adbfdr3
zeuPqAqhqi*EOM|J77hy5{*1#IdwP1!;}r_{x}@K~o}h@@+a_K6>f*Ly*X}Okzq+eJ
zSqdUMVYiOJmL}F0j4*?*W4Kj`wx7EAwN~zFXUU!Srm;}F+s61JXn;|Gy`6g>sTKb%
zmneDwR4nhGBqV*eFY17G-DDY3jrluYEYpHfAe-JB1nbWicNI1?7cz8VN;`aSl#vq}
zDBL()bZ5)mF(>C1S)d)XSr@(#kzM2?jhM9SR`P#_Kr}9U4ykmy`=H14wo(Gll{@*3
z1#AoFA+8Bi5}jZ>sH^98XGn@B5{}4!&S#PGJ>SjVHt-1Y2+<c(LxAeCdq=dkDV%3V
z-*JjV@K7@;`iQ%BhWtkMj{ZXnw=wT%YMqgi6{MD>!rr`smi~nVcwEu?cFFU?ZrBgo
zJBjk*Y(7(zB2X);GvA@xp86<Tqn+cvMlX5QStY@1D1AnBs0Ay3_sGNpzqI~2Ry$z|
zr#)+Ga-c9jd$l%=xKlmP2ze8HDU!o?<&OT1P^cfvy|-PFbpm)qyGo*r-J85)OBx>X
z3~^6bXi%*ALf~68TbgPRmsLE@av<#;fwHg~cp5Kksw2~qp6Iz8Gppg@FUUM>>U83g
zcH=Gxx9)1XyUPsb<hYKn4BpK`iaa_rhDHh70kuL|7~S=8$rktf!P1cRxbDEBECrC`
zg)dRHo{d-1is`W?4Pv+iFDh(%1sb|#0;yWDU4wT7D7QBc+Vwky-A)j0^~V(Id*&D<
zz(CDT)4x>zZt?(d<-ARpq})rqR;ZFW&9bZcV}p@m)d{3cT^VQhyjt={$Ccd%yahe{
z3h=X)iUDCn<GLb#)7NM<a?f{!<gg{Ctq~ZlX0^|1)lB!d%-l!J%b`GugId(p(@_x|
zqXT&=kK!_l6Hvj<Jr~M27Hfp*)}4aF*AFv@8MODxi^h})ny=ItRHH&z*I;&-Hx0%R
zxK+vO4pZ6;2GDmtB6(Yyo)OHjoN?WlR@uR09Fr5N5xFhIuuq&@%La?sW1>ecm?*lg
zeh++CirZJ|<SEAr!0o8<Valc#vr~*9&ckU1MSZ>cbx09?y7|gzr@2wv!w-r@4)$}i
z=s&qT_Ry4u&WZKuewR2=f9YU8`h6uEUo?yZnnV@!=B?;&h6lUUCmDJbnd)6hw1hw|
zjv9z|9Mn!$jK)gs7q0bI=@m?t3ha1L`~1UO&D9#0L9hl&OXh|m)B45{2`*y2+;xT-
zhdi7dQM}&`vDE5{xc%PErDV!C6EpCq#nyn2ZxWG|6FlC;XV23p!*i&Yb!4DUhR%L`
z5;bXSh%i`PROnI0<<6rvJM-g6L}mwnFw2E!Lfl_M)1~g=2XR1o8t(;n^MU9ZGZcBj
zBeSd-z7a=PkJ>38mL&7@flya$OpVBuVm-T}@m6&F+wPOmlt~-`&T9jwEB98aIS$3L
z&|WjR2KTv`3JgHH#UlFQK9oG}L)?bFIjptOaj6<-%8@#`8ZLq5afC+l@q&ET*{B@z
z>+fZ*NF4z|QQ7ZV4eNsIwF~HJwJLFfHS0Zc4yvv8x84uVyLOLPFKN_%X1Q^vPsX-*
zpftO~B1W=PJ7!5EnUzaq7w9%wmI-Op3-3o(y~=l|V&t2*ZnoxkQn|8w)rw}Zw<Ii)
zFDG|KYp8M#<!&hWNaJp69V}}kJeIf|b!T`o_xZVIb6<&zkwvF7LY}?n*pXu;A9&!O
z)U!47Ltm-4a>MDY+o@W(&BaZk+H6La=*L$oYHD9P^ghiWx8q*Ei>|E@qnb!7zw5H*
zJM=OpX)jvR{JRoT`K*5J0pv|qago_nNizftAkx~OW(s%=uIPj08c)T0Vxnr>?u?1J
zXO?d}<$4fbUUuJ^j+F8(+R0A4<>7-Aw;+`DTzgr62_>*pAg(2Eub)hsbFr!&*JvnU
zlKkGtz*1SLhaW(u6?tr6fqSnwi`%v1xybGpWER_C0NcoPY$9{~@%r@DsQrHasV%Oj
zfw#NpzKcaR+df^ZnB%uZC6+c@m$Hi&hno1~N(aby)lhnnU3Ps<B%+_=LXc`5QOD6-
zrWD`(0Qc^YTb25~<X0()5?XmHmQ=p9A+?_@6%u@#S2wg0a&eP_;Pr6M1JF3!lC*<m
z3ww2qo~^nP*N~9_!PTZJTIcJeqhy6dtpkr;{XOx@=TEh2q)iS#%~`!Jo0IqS39$9C
zsxAiw$L8l|WW6_-Cy)n?+fbQ?_5Wz^%fq2;-@l7QmZU_fP%5&-BN1YhP-Kh9GKMG0
zkR{odnW&V#Y&~ClRQ5Id+Cyc{K9ns&7;Bcvm|@=Y_D#?4+wuPO9>4cE-s3%f|1r#5
zb6@v$U)On_pXIFE_OQm;tn0-GVR2SVQ}D_7o{#Vkx<h+nxDCP*=nkecNPYekpyZ$2
zKN4lw9sLCyA2e+UOL{vbJ*6SHt>Vy8>K$^JF<{vvsJB{eNd}E@sAX~J`P1dC0ROsy
zGiTp+#={_9)XMo-Yah|Y-o<r_ARel4!bi76(j}8aAp0ak@5;E`KuxO1gF@qCx^;^(
zkDgg;S60ogtA`#X-<<00tGqzDRsU5MLLoxnZcA(3^@Xax=VSnY_vu+YTS4&3e>J4V
z(Dp`ESKGF2N?qOH3*<nbGd_qx+8glpKC|Vdp^TTZ&3KRhq%fUrm;4>n`o;8-+>wDH
ziKiU6bVA--A?InEUOU*I-X4E!n)%U36H-0lh7G2dEY>d{RpCF|9mpb>VfED4Bx{AM
zPG&oZ6||0wmuIj0lp)>@(>&p(ZptD{B{9z9q6|i0&{1P*IMtpT&?s&Fz3tdF_fvL9
zlv(hk6Q>rk)Bn}DM$Gs7gJ<^g#cjS{8Q^+^?q~eaL$7EH2+cT@!y&f438LP-*DH9@
z8r6n$R!S=W?N#)-IREh5!llGizCB&4zBAA{sUvb9+C%4yOp&9{8)tA9s9(9R0EbXJ
znvP!h>Cv@?=loTtrh?3SP_emg;D+$Q!7|1(*`qsp`CBD0>MpfC_6>Ht+g|vhnmOa)
zg{KQ2`<v#P?c~)gD<Khb5owv$u+Dsbd?p>H!gKFej~$-k=6DQA<r=<%G?XIyz7g|d
z|J!xQ8a?rE=}4j(t&JQzZ?*ULFAxo4k|RV@Q=6T*mG+@G<j?0Q>IJHg+<q1MLUz1L
zbFGCb)z06=V4INXo#K*ubJUJKm$`-Ho^tT2_-><k*>g4I`52>T^!|S0m|-@QJ}fu%
zE@0ni`P7?PFc>JXDfC7nc~vWG{OhQ~7FE?9>H<1_Kyi^pEN;HXM*3$P-M_#>diczg
zjj<~krEdJNbI5r`MU%za#yF5fI2Ok&XMG?)xkkFg$N^Uz=Klg)n&)q9%WJl3rS0Hs
zTzOS+M>*#-sPwq!Q)TLa68J_Myi;SiPOv>VAWQ+(V<^{{2C0&Z@%j#<dz382#`B~b
zz6UjD+cU_dZ<z)A|LB!I^2P5=aTsrAtEZx=9VU}BolkeH5z|!bnvQ6Wq=zt+Nkf&{
z-1nMGC*`pnjN%ZP1ny`0LYq<0v{jhwb&6%Y)9;c==%>i<zdkYc>QbF6>07nWP=i|H
zu0}~wrEV=0Izj4u&QMU$<q=04ks6u#iHxAv<qrjq8zk5h1Qo>g+_>Z=MDHuiGD+#a
zQPX}~zS^)z<Y~wA^x>XffM^9SFTP&r>{;X0f(1CYPsom6;Jj{$xT<S90igY?x#swU
zl8(J=PX8*-jp7)x2bf1U40u{S-1!nUgC(-9JbkzNjmtD+A7nA8*<l#e3%;>WYZQ&!
zy4u<mJpT$JP(mkeWLF5WW99%U&X30<Lh;T@81NL|vrKZIp->7<7(}|hofj!J_Se^s
zUm3b>S8T%Y)@w;yaM&X0Se&}G)$J-nZfuO{Qc9Ylp3Xqu3zMpe$Fp{?*o^beh`7I8
zyfd3&&~yw+T9JB5Xy0IX^4X(ml~fZLBC}c7S8vZhXmnW>WiNp<&mKurmqpNv5Bs=w
zb?zw~ySFyp6kct9>m#8^+%B-%-@%(%{djMqjv2${+K&Bl_U=>X*~GP2;)D`aeLls$
z<V{7JouyvH`tsPSxo2u@xB0kRM10nuzTLY<0*UOV6ou6=(RCtRg*@G|lSvP1mG1&~
zdd)~lFvXMavKOAIWJ;Zvr_ThrP=sJ9UFZeAHIJnJu(NySNW7xW*wKAH`^t;qT-Bwb
zaH;U4GiL@JtnPLgG(nnPH{H}TeN?-1!fKVw)uWd(SLlv#;ma{e;=ZgQRP~b#5_cjE
z&V4N-qz_wo_^?@iVzThLRlHVxm^YCEy8w;ufj$H6gr(9+8pCJJM8!cexy=42srIP}
z=p6^Z%VQP@zTNG&Xzz!}&j5o(--Tp0(*&ro@w9^I7@4v+_`1;0(bT|me5;Otj|W3~
z{yF^fH9m^@lUQlH7=+N;OnJ%mP8D|M%D4FSUc4;IkH710JoX-r=fbktU;j$4)8^E@
z&3DHUBCwpyvOZ66)G32bZ2W7iX<nVE1zY)ZD6tN*3NabKv14GJI)QS-ag>#3dE$Y*
zh&L_KWY;goC^vott9G_F?1RMQN$<D4<ZYLzRLJ~BsmmW$^9K?8<9J1sC|TrQnOMm{
zC$DM=F6L=0IJ|$g+bnhc&5qlH(yF+b_+ZWZ%@}vG>jw8_uSsg;{t5aE-)|PFB}O4$
zRFxm+R*R(K*qc7pm7Ac1KO6z(Y&wOE4Q<7!)Tg)NY^;}MO-s(O!#Vtgff{qARl6><
zIwC_7X-_nmxUrm?W`;Ye{R0HW66G}jcdE@FI`dIVZ~4<kmsj$Tb|jTOxWlV}_M(N{
ztWW_*#31?XRPubo#^Xy9ct>YkhErrO<Jkc`?FU|-Ru`l9*>fmYWnwuYRc>`Sb4egL
zrP34p!>%MxP$Oro`+*6!uc~Ac>1{r!ddQB`=W$rVRdSm)M;MBpaJr^gP5rbq1Sy{x
zb~^K?%&c}J5W0|Y(&vEJp|8Yoq(^y^H-8jY9wW|hj|Eq6OC?<RdmIWPvdp(A82b3c
zLq{00F+guDoS6ePzY}<7-=M~QU0-vW5r4(w@J60FItx-hdtU~wCw{EV{zC3KN|%hU
znb4L{v@<v&)T{#N=-0ju#g6qVIpN~dy3zad76cCHB~D{gTDn7ljooXtKBbN4VelbJ
zfAe+(Ub&i}tgM>=l(?XTdVz(3)A$PZf>9OCC{G}hwdyue;l_5o8dp8E&RJVNhAZM_
zV|%%G@)s=HRPddeg&IS2i>R<IEY5(0AGZCnHFGAn%n~3p+{>~|OWI?zNm<c}ywA0|
z!Y7+f3x9r7jQE$2SZbPFYPQugtByMrTozg<X|EIrg+{WC6NOsW&V^{#H^d}A8<3gK
z=Q7jMFg`D5n`lsC+c7QiO4AKnlqHeKm39*5m%KyjNN&{Bdt*+;X`4$iC--VK7T5Uj
zy-kCHrB=~GpteV<pDpv{K4*J*e|wk1e4=)NnR03I!kMC7qOk^l334d$Uq^&EXk>$A
zVm<DgNF5Vo6QEmWp+LSnf*3YmlP*e=`(efDXHNBtX%*iSV>8x^b=qjsO>bK%?7$>+
zug}vq9Ha{B7S}QxUut>ePneVuq}XbbY?XUx<V4zCBuuR%8OTf;>fxuHO2Dq_LxnYS
zsH)}iS+d9@*&1s%OppbB$DRc`92@UMZo>riBC$1>qQkroe@ZVE+r_gY4QyhyuEMib
z@^;20!gutBjpLckE(xp~>E7}Q6iNilG>M)reS}T=So_KCCZ_2}p6{~Ni#o;So3pRI
zwqav5WiBx>(Uj=QX@vOnWEO*<f>4B-;!q@A@b3dEWPj(esI{Vef-dKpmCp0b-FV-t
zu6|s(_C=L>t;RyyK?c*rqu47crkz}QyPF%zb9H&kBi$v*<ax2!TT$7wIHFd8H7}Iz
z#Tk};)bWO<968VL&-jD$oXFcJth708V%&W$DMBve#)A^6n#0aHD~O6w?nKgiv7*ay
zTn;^J`?}U^+dvu;z~$a`F`5UPq%vN(`jYEGYXnsgr=P=>vIM@7V8r&KYmzxT;)Udj
z_BgWE?-e>aCloY7F3L2<(e5VfQ_}e)7^z5qj;cf|(uN33K%ASdZrax{S*@7z;U|=!
zgU|%VXSi)ZN-_)Do(|m2&^-WhFiDgmpS20IE?)rDf^Xpy6jjQN;YUL^%#Z5{N~%9r
z4xatVRPc2I$st%oK4#}@?GATx<tTy!T^i^f*84=?zh<%Cc#Uz+)6cX`?TyU-Ir0{J
z%L>>K|5f3*^*R*SK@KSp^(f@GH^6C%KrL1?`-#X(bygY%7mK^lMdu+xq8|hfHVf)2
zaIhzEq&JCWKq@03T$M-9wYTs#7=eHc`Vp6ka1idxL)m{2cCjwk05B{1h<$<i2RuWF
zWxvpP-_x9*pxfvmgn|8L_d-ZFfN9c)JG@Cm)7J&7$1vbPt{J{3b=`tf=rvU0ZAe}<
zMKPJt%ZC&gRg*3ZCkLH&f-&ssNXKLjLwCA4a61w$xcqM_>+0kCr)x4e91y^DEmQ@;
zJ2Md6MpywI?n<mP;4251;@k5F;mf-SX~l2BpJN1?79acpauROlKP(&e$bi$H41gQl
z#-|@`YHz&7mH0=!Smt1RC<2&FKeL8XRRsA&6-(OI&1KyKb4mYF01If`7>G`v#ReZ)
zLq-+%ocXl^{*ON=Ny$H)>s2Rn|38Y#pU@hDq&1|dnEehr?U^)$FSOp&_V@2ll@fm#
z%;<UJv0wbEUr;1&Kdjtb>fyt`!G_y+{BMR)=a;Y_C{5#XHSrVHWq|RMZbD^UPW7Yj
z#qAZWw%G<OGWY(`oh31Dub9aw_>RtC6x{?nfN*nD$<>y8=oN<jLl_?)+RtTitZ_V7
z;_NT|t@<%@6w{OwDY!xCN~#VUfPHE3%|o5;<>}mKAJKULLD9cgxzfGuA@ADRFO;8n
z(lHQtn?uRm9G=qypVm#*dW<BL_gh(8PE`ak|5Owzp_o5K?)5vc{%O}?VD+dY@?XwD
zomuryPcK(w7w^pe#SkoD;A}bY$Dfy6cP1cEY6kx>o<Z=aj`4PyOd3IHdV`EPdTn|d
z)jhq4McHX+U_GKxVpHyoa+CR({*<E?#jnOcX52F#0*t;X_WFcBGldO-&l5@7nfDAR
zopEnK@o8AEu3<E*fxJ*a9LOPA5oEJBLH0yao8ClW<?@wvud5ge2tF4pzcqCtsCwr{
z@teNhoSKM&A6GD#dmdjH`v`(Y>(5DY$=XCO1?|M{`9=Zce#FoPH!OT1UK!ut-W_4d
z15F=^wFW#LcLBBOwQ7qnlwT7!aG^~5;>hjZ-Ij}(PjR%bx+StA4h9N+AWijLF7mOt
zQvq^Oyqp<kI?hU$pg3hq>k!<#su(rm1D+PW0OnJiE32zR3pO?iHp~z~EAlQ6gn^Kv
zeSE=3`0LWk7N12Qjj~Nh-VnAu7fM3(e-#!WPf)uHES>V4<}``l86*T)lDdP{spDJs
zdh3A$$93P}oM}m605bpMZ%Qrz_v)9-W1+Xjf6`mfi1K*4Jec|S;dRjPtPp0sj6;9_
ztX@P~S$;aD_P38>(*M7HDa6|@Z?|pRV~TVog3<qfxvc-Wl>ZkO+keAd>D~pLTzPxY
z_ss#N))CA%12@D<hyCUzP_!}%lCX`hH)A%w0ymx)(z`DzZJ_gOu?It4doYDb6Uj_v
z1lXz&DFzKdgYUgjXX;qbltQx77s9L1hr}Lp0Fed~7m?n#E(gJZ=g=R2KJ)@X-~h;f
zIgeaiVbxpw4&J^K(a=0wLZiDH1sREbQ{<^R(4IMh1JHdF(8CSxmY*4Ru0%Qr*xVb`
zs`6d{Enr22mRzF(&?P(ak#6ZFq(0jU$VR&W;J;@N+oc;~wqm*l4F7q{EYf4Ue2NpI
zR2&35Y2NvzCXw7yu*10&CYGphP_ZXJ`o^CDX}pMu5d}>5JO|VcB8~gfw6m_-Zym<I
z8xSLOd{KD&9bWMbXFBZocKaOgfqj(iK&hkT23J)Mh>?igltaU?j5o+BT?EIl9<_Fe
zFB<1lC_`+(Bu`3zGN*R~X;cN!tk093IV1m)wll6HWwAkLWG6(0OI?PfgM$IB2(g*F
zNnd=gnb$|L!uE?;?V`$Az_Xp8WwJ#V6*0o4gn@6gP#f^IM>!7=PkjV!^c<jYfo~9K
zU)S;Y(9^rO;e^)Zw?-(XAol&(Kqw_IwV~X=B-Y;BPi$+8RKoA-Mhp~SFLn2Ythh=V
z*bcqa30-O%%@2bFmjZnFWOP3elfO=gCemJ^O1?k?c*7;ZF&C?BFvmQLD(4q&4vS2i
z%0Hr9(u61u&UXLN5>Yu%fAH=gRp@E?&5TOu4_i2LPPi^C_{opyk4N~Jt|&yGuao1w
z`;Noq{XA5xUD_UdxL{m{07xg2$^d|jnY(GVs-#-@1z_j+=5|I&VrV`6R@{_S!(fUx
zyg#~mG(@Vs)jpUmR@8m1BOu?82inS8DUWkL%!{~5YsS?rW!t;MoR2yIy&<z3%75;o
z=MV?DgA=+q$ht!<J;8Qx)CUx_#gkan^^|X^F7NvHE3HBic@&)pw3zbysu=o%i2lYq
z3*@+!B#1og>*at!Gde-N{M_AZF_cc@8x?%3jwgolK~Wb$jE3gadf!nf=_ocItnbSQ
zyXAz2>gVFzzXp7-2JP@8yKh`uLEC@Od7A_rp+xjfFdqzLvFP_w!gX|X<!svR(~`xj
z<{**|4IA&RO6%A7n?e5Bk7>hhh3Bl6t1Al0&zNx;s!|zsB*b4lc^$!nOGQzSUgwB<
zCGp>%eV!FGj$I+K<5!hUdk_U$No-5Y0uU8)y-s_#+whM9njHck%^nb_v#q`_`TnC2
z=UvU(J0+=wv3=mpsIS*P29@Sek|8fc$<M_CXWYLax{gejNn?SdTZhh<9YvK;+|?sa
zwpe6w{?LY)7LMpumCthz1UDn@Gw6dzCt_-xR!^O!Z!nM+(!cdiq{V~VnXZ}qcSXq9
zZFtD$f+LR)J6$%7*823O69Mp}qE@^!`}w>V{$6$G8Trbsn~gN~{65FrX)UGkMoA|V
zfM`t0ZO+3TOgu;G{WzI#eTocU9j2Au^dP~$)!+yKNC+`&INXdLQjF|V+b;d0Qa7Ch
zE2#|=UPwkE{xUO%9zl+<mpK(84%}|Qd-~HEpAsS+$TikYFSa53#d}Gm-;0t+({V<$
zD$YfF@RD=XE2WfF{F>koSf;Y*KU9L8*32I*b*T_{k7yUCWX5#cQS0gX4qKNtefY`K
zo^WQZ5Z36cR<T>^G8h2r$+t}hf@3~;M~&a<v&1w>R%TpAtI}Xm2jnoN%;F*+E_GRl
z8-1-e7Sg{`Pl!TOC&MT6*`;Oq(w@u-(z>pjcXBBZHWXK%wX_1x{BN>g(s??ope!9e
z<agf<Zg*g1jB+s4Y^>y_Fq1VRKJ<H=C*`TX*0hp^O>^|-6i)Ipr0Q6ni8mB%%HcbO
zzN(JS!dP@DOkvdvoIx7`*759mc0JKR&GI2-9*9fdzZXhmhFspY^vUPYGmDjc6TXH#
z(Sct+Ag?&hFZt8Hscz1;HRl1aAzIQyB?}@h0+V~QaI=pm^KsuXc0~jpH{yFPw7N6+
zGj$~3go7%;J8Yna+FVnbu9qr@FE|pC^cM_IR4?W@$Yb%n|I0=XIby^8p}(qN%em2D
zIH3}PPWNW$vg2kM-CAWQE`i20`Gt#B5ydz0^$4~5zy$Z65PM*0d|Q2rJF4RbCS&zs
zb@Cpnmew^W8o8|%bK#_LE#}CH^2%(}@pBfnJ4%{QS8TF?6d#5F;RXv#It{lTquVMT
zy=XVITeDcnIld=%%UNf^+wQAR=`XjR$csslBm0Fn4Jz8_>7Ijg19f@>+7>{QmG)3w
zwvmQM|CW38G+x$_K311w{3nYUa`$yf2!ldc+4iQ;l<<iSWtp|q=DcG0slBP?h0`EM
zI<)(hovYP>OuX{Rnxkz`SW)YO_KAI97oFTZYGdNW84}Wvml+-B)O1Q$@$;$1YZUTZ
zjpe^+zf#2lrqTlahn}0ee58hwo<QM~mkirxdUDU|x~tTsi@F{jT+`j3Ua!7WGO{sj
z=1J<c)s<7po|KNA9UCJ7)qadku~%cl7c0K3=M+O@9|8kiIN8|ajy*hEXwy_jl;#&5
z3g^`Z_3WLJ2r(vx&zIvzmkKvp-|!`?vnb2rrw9;U7vo{(NzSGTr};Ggc?OTa?cPh9
z4c+unczQZ1ogF$DSW_)~^ux|6^UXcfGtzJQaL8eXdi@$vec5o*aYLP_6*F?&jNdu8
zij>iu!Wg6e9i;5?e34JMK6V!)?dV&>BHIV80|WR1hNHnkU&ei5I4ui~K2(Bw*PSLc
zeI<6<qab&i?^p5U8>v&g><Y_ImkdP$!cwqD;3N*z=&H@w=uB_L_`D;F#9r#MSRMl=
z9}!F+?#duR4pJd@Lr(U}wp`qb$VBhK0NzY|ca8*^<8&(qM<6TtS4_Q*9YA`DmStQ$
zp*iu{GD5Q!olHS6Y-f4kms_uCDenYM-_HbQ9dXe~9F|3BtnF-c#sU^#iV=`WO!Yj6
zTH(i;j>W@fR11NX-04MNAvq#p0QH7jDDW0aK$G2VXG`K|wVVv(#W4phJ-4wd4(f_h
zDb#vQ$5^Fl!JlGua6>YClfYMCZ5IX(VS+2_$1DySb00UIxC!9_gbOVZu9#@aK`0;%
z0@Wox`h?3N-(Y<%>8&Ij&m4($XL8x3C<`Ut$J{uRaO>$=Ldp~4$9hKI5-+}K&m0P@
zf|GBwNRE2@wrOq_rBTFjQkYlqz%@N`qmR^)cVXAk$d5D+g~mcF*)goXcDA;K-nrUQ
zY|P`=6~`mc=o2lmmGf%m-Ltn~w<Cv#`S1($wJ|zfn~rR82xA9=K>6tRUkvF<hwZAC
zs}44s6&7EcU90z1rB*HpXgTAd7#;LFworKliFLYOYX;?PlY}i!&G1@B1Is@pE~R-m
zyp~%-s<xhyt@BtZi4XY;z1-tpm5r@`@f||)%lJcHn`Ne7cMpliQBc^KWOUvQW@t>T
zoW7vg?1~3!)f~{vS_v<~B<1}q;G+nZ*Ia~2m1OUBb!iG4{q+&Eh&W0M-Gmetca|y*
zX?XY_hJY|MRtkHqG+pWkqfLc6{Z*A;UqlfZj-Iyyxjomp5g-1znz(cdoY79YlNm>9
zsN>>;5c9>`)@WJrrp*!~`RaeM%li=VRD5|PuyeO!z@I-oLTKTU5w%;x{4*u-kns1V
zYkdCg4RGuuHJ*#Ir&xde@768)pJD!+CymNCSZ~Bcv)Jhd=7Za=sAyfvxoGb9zW{U@
B0L%aY

diff --git a/docs/source/assets/kernel/value.png b/docs/source/assets/kernel/value.png
index f585c77b2e1449825a3c704cce6b102f567696a8..56b0b9e0f56df00ed15aec029cc0ee1dcb82b780 100644
GIT binary patch
literal 171134
zcmeFZbx>T-w=YV*A-Dtx9w2z|V1q*f!CeL!EVvCi3@%A<cN;9heXzkLKyVmza0%`Z
z+{w*3@1Aq()%)Xj|G0JTAMdQHU3*t|ukO!U-Fx-wuGRf#?#~(;g{+sIH5!_tB0Cxu
z+W(?IhiF6}p%xC_XwT5l|0Y|aq5U~VBluwHY;J{S`QvZOvp<Vy!D!F^_vxSiMbH0}
z{wMoC&z}9w{rmi9#{cfepC4#MSm;VGbe=yWLi>m4*>j?2e}1B6{e}IvvVW`nKV-tf
z#>9E{0s|fQ`QOI=Bk@1pFVWC2o})ec=f$(<FYquho{^&c^BnyJ#xqPTY$9TkSEOV(
z<n#;_yo~zZRGJBb4KsRajl$`3lpkFA_-75iNHZ~~q?*+TB%^diy0m0iggm{pS(_mB
z01tMe|4&nY7SQmX|3ydioCr+{ZS~iGK=|+Xe{10X4;pwO<PAT2HWMJ&91*7u3FFZg
z1;l8kOA0+?SKS=fR+W_v4Cs|#a^O<^)S}F6<V~+r_iNpPy#@bd%Fr_)0f-JGuu>*+
zOz(?jylkg()U!@kf*{U~)`c$f)670{^pCXJg&s1Ol>5tHbaSJT{9drOZMXL55AyA9
zj}e9Z(J2sy8Y1kJArp1N2b_Ji*0a<_X|KbyW!90E*3#uhe{Vp;{;O$p)v`GL*DHP7
zl2(WgpLPaMHI9!xgSJ<Dua3jNhH19Cz>Ef$vV7-94hg;W3hJM3dzHB)_bsZNYDlJl
zc?woq{le9HlSXybBb%|y(hbHvmJ=?Pu{tB*_?sUZ#c3-O=X5`4NQ(g}O4ohLE^@aA
zD_-09{_1jRUa;MPYcblkE9;rF(~MMny8#BPQvNNcD^X($QbGN|Lo432n-4WJhpR18
zu-#aXKxxM#=K{o(2nG=W_r2PVY2FMr4?7f_lGq@V@n)NSNP&{^0b^4nRSzAYWb~`(
z0*}u7jI9gQZ{mQDsPScmy}cXr;7>`InvK%k{{BG^5bcf=qnVxXZBPLzBt1`u)q1tp
zW}K?4SQbmfUc{E7j)B)xb0?E~aaEw~f8>uUM|32+Qe$(^+Ap7wF7#p_`e&dU%`EZO
zjVK62_-En}X!HNWP&(~>iW{utmq{^T5o>>Z%y$S;%^x<FD)9UnVpq#k5%05TrU%xw
zzwk}N5pZA@)3v__ybbS<P2{r`aX|@?i9o)-p8tV|X!h(&*imxzpnLCxZ&C18E+e`s
z^`HR`jq%hGxEM^~RN)1a%?qdJD=GNBz)PgiTYnw#dal1d6#4vZqs#K5JB^?*IhRph
zQYg}}Og6L0sRk#H<6EsYpM74LbyuYM4l^$#dPDNFR5c;J|C|69P%U@PSNceE_7zU%
zb0v^7`7o29bgp4ELqwh(<vrJzO+NuK;<8fQ6eCVpz=j2FNAnM?4dlvu;h@SL>$0Wc
z8Bt7c(7`~PC%^X0y>zh^f-m~h9+_G(KFET_yVoJaV@+U~DHBqyF+`58zjEjk7QSGj
z_29I}VZ0sIAXc1G;@NHsec7S8bM5~OfvmVC=Um_+<<Q|=PufKaxjZtI^y815if3LY
z_ThkkG0A=1<9d#Ig>kQ)c}?50e0S7ytts5-pll>tOH<!UT;N$f$<Zs~Vq2POSCx@s
zy;&-v68W-^1;=fNp8D7X=;z?jUV9>Ea$laxZfGv5GVV4#_&6mO9f&C|tM&dj8K#pp
zNz~_D_JiaM^GkP{`}3olfF|TSlKIwPnr}fx<#}7>$=WXS!%(bRf|vRsnxx|f{FH*4
zK>km?ypY87=04Zl_jGd@trCOV<fX7Hzy4Sr62G|TbN;Fo`={63(aqpYw!Ib(>3$@C
zc*5r|+38w7umz9qeRexc?wW{(7fw(u>*++ov(r|z13$<u<b-B*Q*D03v{M+AJ?#56
zVN{pq>S;`V4(X(!8(MSS0(HVLsYQ`;YFOv1_b=!wRB7ZU0x5;hzZEG|0iEW!4DG%7
zPS^4qVg-uUk2;KJe2s<ltr$PHIE@BAlQQvf*K&&zgBV~>E33NiVPa>aZ>fwNBySCQ
z2~|nRymKa~-6pUpn)P6nap%sMbT{<fo<LALjm?v?8@(v|u;i0CnxSjCQ)nQV>sGHN
z2mLC*H-BrsWqRLc`Qsh(`x#`TC(zMtcbRp;Z@&I&;SZYN#Xc9EqYfvDcCq=go^wJ4
zv81Z?OfeovNE0{}qd6H~kb(4jRS0l-*)w9AVQjWOLn7TL2%FL~?ytYT$}Kf?f2^~p
z4r-YaS=nAch$^}>sqUIT<l=Zatd$aD_x1>VJ)ofTJF-eslm+__+Be^maG$DKwvXFY
zh@F@dkKivL6qg3d&?(8Y?p;MgKsOsI<mYSjo_RiLfut1MWgAo6Uj=mq3XoSr#X*?S
zODpPr9*~A%WY&s5e3S}q%aDgXsKwptk&P{8B3AHjtAQ&fTj^E(AG8dxc}b2H-WaCJ
z7O%gO`3`-S2V5H>7mcc+?s0Q|6e_=14)M}OEEjsulTx8De(n#9{!H<%^qFPjTp)vv
zY&yth)vM+W)AU&C=%n*mid0i5^=}^Z3rYsTJyP;pL?khdjw<GPE4ULg4?qHN)2GoS
zPOct7N>WZQX)lCGeo9u3p_R%z54XI|9*tgZY2w>}bLHYTds|h5n%%2*^2P?Ig{FNr
zoPf$e1bsqk?Jn+g{*?Oh{)jtzR7M62q%on)Ov4vx6Cs8O_baI(7L~(A8(;ZwV_0&m
zhL{z;JP~oGJ{Okv&=aUMZrr^V(}M0^bIuQ_K4K79ZDy>eizYb14pKOaB4*Vwk$i4<
z!Cim-=Ez3wi^;Y<IsdxG<As?!J!82hr@^BQyJe1g(ORArGozZAyJeU(KtfRwFV9cN
zUKVeyD=M)_hWZMvzio9sD|7DM*;H2JM2oY3HGHdxGZ?k_Pct~F9QGcGOKYg|+Tyb4
zfHmP()@_w=v2kciWnO(b+U5--qIHBPa)EbeqRtz4lf`NOyr5EDUSQ?!$sPo78J2YN
zx-@^Yy;*?F2X2rvY%|EIHYXuTc_m!;NgxJ}qXsR|O^n59L$ahgLH{y=v&+#SxjLEo
zy(!+3Y@IAIGqW&{V_x?K?&XhZ7uu*7+9I#I)MUt?5!rVDAM8y4T>_5sts*8ph}jG-
z*APB_sHTyE@;t$j9D3!C=ATGL)<b!}goGXp<*F0*r<0ybByW7~4>ZA&30lLp3}<yl
z<F22ia5|9ev=DXgAtrxnl6$$AH!0o7!M$T+GDyn(ifz%G*g3+hDK##-PinGuLDhyZ
zI%9sV_4DWo=|leISR!_8N9>*&aOPouIK`vvjs>KYhQM`q>DVrBNszJBv}BZ+<U%=X
z$dZ+rTjiq|jWhadYSOyY^2|gT#jCu=G8XB+*Z6M?se3e759=%C#2$aQ+9+4t#3u%o
zlNZ4_RYHpS3WrULzOBix@;}UWU+=tl#v;@yZ%bgDzhmVdus1`+9d!B1ETQ4Zq1`72
z<H~W^Q=O_yPNGKYQ&z=6Lxr9unQ8||VR*#I$I-9U<Gkv8x6js{Mgl*DZ^h<MwoZDK
zjnsOGZg;fxj2Nt$tL;WB^3xl1@2NKiYwj0(gu5+nsgw?>uq9Db3<P$hpX6|Eq`7EH
ze3*vGd{hT5WN(>2w>16q?T{Z&X$LQcd#JvA64?^ubTOx@7b1_Z4d4%AH`<W=qC)#p
zJo!q>3JQ-m@!zLuA2gio961HC+kg&_g)C2zISBqmRmYhZrZZe;e1uHk1X~%vr@3BJ
zO3nP~*GyJ=g8^(oR@MLa+*pc6FS`XH>AlL*JOJ2)1+l#`+wf@P9(~F73JPlJl7tVZ
zl=ZQlq0Ydo=|?l%61&`5Z&iNv_PNky*+}$U^5}z~6dueQ!77U|dHid#jrNPti)Ob4
z(fc7PXt(Z>H{n6pkdh#LF5sXV5H%cKHX%@4)+~=PEc5A$`yaF<LQNT2=UD1r3P)_+
zBUbQuKxN~L=^W`_FCj=#0B;!Tz#IO4s|TCv09%A%YAEZwsoY3?w_08Tr+11mE7#ZT
zHkK3P4`VrjRERbxCe~;-Sj{180uR1qS~dFZw)?!kiW~+?9(AQNRtN6gff08m<|2*x
z1NvKebfuvMs_Rt)Ep{Q*Ps&fSH_jFl-p}R7eSEc-Jo?3NEp`@T4-51->GaZOE-TH9
zml4Zh9G1fZq}KE7?hoZ2TIdYLz}7-ZqNt$cxgSP>HcqjWXW8rh1K94ebt)b@fW{ri
z1CEzS>@5F1d$nWuo>*m=zM8Du;JcA%{B=p~xi+y)5>-ErptTqLS!|!F!Y|@Xy%2L!
zQ9Hu6!;bfIrmd^S_H$HfaV4<!D7CYxtEmf84-313NyRLVc}Gor%UPgo^ij_&@5H%d
z5;y~Ia|bMgN$YxrALmr}o4NWR?TFfGEp4e&F{FG^Na>!qcL4hW^=1Ir0Cqaj)@n>o
zGm0OaUC+xB;J2WBUY+z|K0r}~FJ8Qu8CbZ#V&v!LjALqgBRuWN>?zqiqqVTnubKq;
zfmniFo6l3s99<Z|{FOD;?0<4!DP{HS-Yh9K!ylINLkC-3Pm}E3pL_FgY2>`K@s4}}
zPqh-fGn$KCFA%HXsAhqWza@7qq_JXFe@S>5b#qW;HD;`3Sw#Xa9?oIp2>>_SggpM!
zZ?{#RSg6O$xMymW*op-T8;_1ylbq$W0KfOAII6YYj=a+)zV$24N_u<m-&r?~nfI)8
zZ1bUj-6N26XP?1zbp1%OINPw5UruSM4zDcJM{4&JfCJ_9e?Q%GU*Tnn-)<S5+jkcr
z8Mx&dU%C1Q{-CV@J&7pvBP}kdS*dKL)1&MbmnSH2<Wh6jY+V9doV<VrS*NrWsIZLJ
z&c(A8NX}?M_`ErI0`O>&gH__17h_xAwQPn?beSH4%xW&4#M$gJyJh3flJCDek6(((
z%AJ}x1ec3FcWA8Q0qC8&4YuB#MgO)+65HPU^?jqs!R%lNaQBe!-P0Gcl8q_mI5l$A
zCQ|NRaH<fE$cZW!QgIqc-0?~eqm((O>Ma@{turm~A;m2E^zrn!hWuJ+t6$2;FLPNu
zQZl}E;`kc)k|Ar^Wj%AO3zxAMWlHVoSx%F>><cSt^lzeWLU&}nEceSKVup1V&GvCc
zZ|g_szMpKwt`DU}#QK)aH9jHsJQlVMEe~L!<0!YZJ^SWeg!0cGcEtNwfFrJ<k){N(
z_*}SCRCqyxoJhWTqiD?Ugfnv3b2X&dz6AZcFOo2g-84Y$`!@z(<Z7u9oY$T7PE<Cv
zs$JVb#aYd|jHT16HGDv(r=^oLLuGSxp6EiXXt+YQwoB-l-LSXQ20_)L;Q>tCXNCUy
z8r*+cw9}*kIBqP2^;di=C_jzGSI9#;71>&_sLpi3Vm*?qGoU0X$yXU}5~8-R1#?fm
zS#P%JH`wp`2Sq_?Hz@I<JytumX(F;0_d=Jl4vqzINEw|{y{SF(I|U1Y9`JNBX9V-_
zD$8mE^{fG`dBk=G|ARb7yoAxd@FI?3zS~CMl>g+Y)Jz(TfzJ5=BAhrosJ3-j4<VaY
zq-O9E*IHHK((KVm&=@6k4hqgbGlM@UxTy=?u&+F*#fdmLG~d%@!uOB;A*{6iQO=W~
zhqFPa)~)?**fHnTO=K)8$a+C_v#cB@s>n=5vSxwH__-!-=s@e7v?ob;tj^>h#=*%;
zZG3C!vh!QM1JTsk+3RdE;4+{Xam$gcEU(2c3Nz6%DE|qv0veR<Em$iwYl3~h>hI+g
zJOo~JOhU^1tW-{oJ)3>V4VN!F61<VHinFnu3TOKhu9W@ctMU9PCJs`418Oc02~W;~
z!nuOhqHr@U%8!bN_+(9y7y*HX7qSp1LvgMjN2M{}YhcGGw{X@b3t#M%`k_Wgdl1<&
zy|K$W<;iS^ZJ$KI2eD^X9bn`m`LpWIxbncbO1hgrXwL1bTgv&~C-vR|QPaKz`T2|j
zQI81@_0q;nf+6P9J^HqFhsKU2T)$gCgv1Vu2~qOUO84^HnYopnMf7J(5aWAz1N&0G
znfkeadD!0R-8Y7pM#X8jY^wT`Rtu(d4}Wx8bnc_~j9<B|gy53bfG@7}Eci_NhVji-
zRMvQ6D85!q>C(?R4&J`EHxZh9ViUQpy11&H0xTSfS)7w9gPfKj>Yv@S2-y^^2lN+=
zL0xD+qTWs4n#h)^GH4AN2)X<!ElPN$%{-u<1KQ8+8<x4~C@*w%A0yShGyO*U4h6dW
zgC=aBQe5FyW<+MchEP&^{7|SYt3`zz;0MEY6$koX9%Cht_wlJO<7ytA(7$fq7|MY2
z&`3B}>uvx#`eUc>&bYtJG#zFdmz}ebvT+YFuoTONX6Hgr4SOl2!gcu6GYruE0~q#R
zbqS2?ZRAC+;_GzqDVG0EW6?H{3Rj_9tH|+pgB4s}@CNQ8CC~yjsNB4}Am10gi%&Z`
zlLfZ$*HQS?5JBZ*vIb8W`s3%q$Za5R>&qxrg-Zry!(Im#wjuWpGLqp3f5v^2FJH_}
zTo^kCwEIa`F_~vW2HAXT=-vcxDa>kSwZ+EYJ7yek=y&wXHo(OrQpFA{&o+DT^tG$}
zWUQ5c{rVm*ZoaRME5WgCGvfC&Q0YcHj;h`+H}+P;>CQH|N`O{PRTzQ&rK;a1Ukj@n
zD>ar0e~I(i*27O0SB4r1+q)+Mp=h$)9t&}sf6(Z7L}8ebR|L$f2TvPw<l&B<($xwI
z3*T-4i`J6~WYo->39s4MjSVD}%EpLQwKhjnB9wH7EtYCCG}uwG#U;nIFY>1JRyyNS
z^Fx%1HGC?U&tsM~Pf8RUuw@m7i7)ACDk_8y@M{i2hY)E_QIw#wFatk%60C>XV8W@L
z-sPNC*VDy5fY{Y+#fhQsllRQe%%$X8T+Vuo5tr^~^=?9dNj>&0>4nqHbGeAB-)FAT
zrdxZco81(34>a{RblC+Ql7UBm&_p_n4h3>~4fVd-I~zDSK>0sG2yR>}EQAg*GA7ui
zmWCDd^TDWrdvUVp@NYwk!cx5k0LOQi6Oi9nWuM>uK_fqG&iAr^2(2e)ao6cP*)6z}
zyGB@m-x|=4ICG%}9i2f%`Av6t+Z-u2#UN8?hBY}Xu`5k9G+w7zsMjS`g%qkVLrT-U
zaR*n>gFH#z6_NIu5cS-LEF*uH-af0GO9a+ed`*qmhMjr*LF2b&3bK0Xw<1VsUt1tD
zX_+uj`5JAGywg|CoUS)8#6)|J^9EAkyv+QJnVx##(cCvCDX9d@b|fEeb}}>!#wlbU
zI@`V~cT}txZpBjr+9VsZ91HFrPiek^(@`7dYrFkH%UzMJK8&q79psgh*FTbQ^q$XR
zg)8QHPf$8|)Xt5v)!!$jiiW8<EIOvjozoL1^7HN~Fl-20$~z6M;T6Oi$P3T;fqHLE
z&ROYq_(c-}GKEz1H|oU*epZ&6p+=h}65S3QULf4a!Xv2c-H?J4Z(M#Nl?*I;&B7)h
zb%0NJG;tg1%@6+LZ3u7O>80G7@1NW|F0y9_Ph+G9u0Y&2=DNG`i@FDGPVHu3)%H|1
zvZBGBG_a;`Tp8IjffN_**HP5ZI{xpu>u6|r%7owQRXv?ScLbe<P)@S@_8;|SD?)yf
zO~rDK2Px(fZv`myxO~}?`N+jy;WBX$1<Pjk|8*e}Mys_fu$T`C*(Su(MkHJfBTC$#
zzD)a`HJiqRK&_OB66j(cbWr}SS$Qe@{yCVY<#WJMsY_xxza}ZEGHuTe^W$e;AYETt
zY_^S<GQ!~rcf$wTY9wm@6yyp&m&}N1nzFQDB2SA;sx&m5!Zr_0w&L89S8V<;NY0po
zS4DV=pJYkSLnSGEC0N_<KW8F!lX~slq%xpi*{GbxZks7vJM}5mEq_pPktWh9^v>ML
zT5+KW*i+2G-ZDhZB2o+9dBdNYy#3_4-9)Jb7qvC!`Q|{9sGpHpxu<F<k8<HBp>UG(
zN=XjO>-r!+plFsStGrQ>fx5Bk@?Tl9Kx1TmC>A%R3YfBX-ROxdV5ut0_La$SvYx8L
zQm!hWW|}qdx&;LdOs7CuP(sR-Oo}O4NjSMn#V2S9#7pHP{0pkZf-2`lQAs=cSK;qs
z#!sMv`2Hdkqva?|7$ABk?8_)XgvCvnW=Qx$Vjy-`ZK*`@SQ7@rWB@?i?;Kj1axQZ!
z&%GaYXqH9AeXBf4bw+zmavsp+mZjQ@Mc8aUZq$6iXDh0b!Oa_|u3|xfx7y*|{{c!v
z!(}sZ=WOWA<rn+_EW~k|Ebdx%s632McT*_sJB5s_Afz>6ghmX&aIIXfrHY8U<W!AN
zydQ%}!ZVJH`7nSPZC5Q`dX42*RfAe<6vG=-zFY~B05t&o)r!F`zK|Zv$bzEp-SfR?
zTgUZ2x<h4ZngBt`%E0{jxg5S>)t1?HN1!sEeHcTeJW|gFBo&@&4kcl$O$ydg7vbjL
zJBUV$Yrze<=}akN0UM!)jL!2oJ<ziOu$uiC8QFY&OMP*-q~7({$k|_WLomW^4f=r|
zTr-#fy?+Gx6z!KwzIATym?irQ6;JxJdx9?}Kdu%UF08&Gl#TlkWeQQOjMjQK^xA6s
zg<o!8zXh&rT^W@JWnxJB&tDSs8=Q_Y>=_4rS_C<#-1cZ4Dce~^*w4ep<PX?syXwdS
zKGvQWRvYc3!pmO|S{Fuysr}v>N_X3!&{TIlJ$@E?B^9MkEf#2|cob9EwUiP#cMeZY
zt})^Q5rRfW7MNdKW|(Nrv<~fu)DKqv9u5+Y=SZoG_VI0(`h*?zarQ#OP&?$i1G}oc
z(S(Cz?NF<7p6*(ipfH&?P_}lGtl89xj*x7TgWcjmfJh~0!$vZvd4SO9Il42_czdO^
zlDF8sA!J%?zvk7E0hX6>Gu(c5+r^Ee=wn*KSFTjPkG~T+7!njb@%bYASR|T-iVvSX
z)YGw_^Kf(x=-w9R3iJg9r;EwjP}t);sLz(py?)0~TI*@91t+3t|8lOyU4+%zQ&r;v
z3AjEQE3u%-+$jGdq&BSXcC^B=6_be=S`%_O6q^Z~(TxIU)|03LVvAXd9Mo3kUdyKt
zKa{RgcEQq{_(WI&ZSg9bngd~?42>p57nowZ?K>JJ%^XA7dam|L7^9UlG$|Ptk^AkK
zQ21z{bTg;7ODMrSQ%tbySD*X9PlV4Mf5nn@>OhY>+g+VmmTi4vg67&a6&ucTa%-lc
z(UwVx;t8>{h~`MhfW0B+@f3S`K2?+@N~4LlCdq>GS+?T>Tpx_{{7Btz_LW#wWm34q
zGv>-l1kPh@5IuSi{_j50BS~|KE$?Y%P66d#jbadnv~Je=rmKx{#;dQV_dP~%QABx`
z2w_%zc5^p(CYS3hl~0sjGfmQJ;7I@WGBs9A98u(&jKx;<!Pal19SRoSdo*|7;=azf
zoR|)_de44R2*JWUf!iV<dHG7pw>d|Mto2f*L7`c8AVx(@42EO4&ZO#HEQvx$mvXcP
z#k0dLVT*`j!H=||nHxwDYyy?B1ag4co8}?}a>_o}r6iut)k+`CBAE9zA%D=+-OA{M
zgQP`n*eP=UpxM<$hiO}$o_3M5s}h11Z-U}0K|8=ycUboq%k4^j5ye^bdT}|q5hWU3
zwbXr_Z-%de+-Lp6FR0fRezKviGbxbIMO&wQLZ0B<@&09q)@4r6qLM2Vw?)vb56&eg
z!SvD$$o0&&4#jI|8Du#SMVEZ5Gmzn;W$~i_+b4XIjsBqKuOq&l;;v(YcA3r97a^?<
zqx>H#jTRddx~eaO#4O#3buoCIu>8`vh}<&4B-bt~<X322kBS>LaUjnyoJ-pA#(o8R
zT5>DI$*X0S3b8PZdUwJ3UN#)H-mG}d)hV6mnHcuM_!#wHR*T-rni`RQBvmpmGC!mB
zWjd(XJ>Dt>R$k|n&x)m)3~>f!Pywb2bx$tIt6*y?f*$Lw_Ax0Dz(w^)a6y&UO@!tA
zAb<4d-+uQ4ONhboGZ*ne<bvskJ~hhHor!7KLg7bHU;b-&s<Hu{o_V@db=B~8ayPpK
z5K^GM^8(46`<~Xp<aZdMy_nJRiOy*;+W~bijNJ#P-)*+-w4~Id6`i@5vNwmysmHLm
zZ+NIz|2O#I9y+5*p3{j;{v{!oxEZUp9oX~-EsVf&rz7CLi$!5E@MC>PBTC9WpXOXj
zU8(<y*$MN&?%i!~2e#3J5%a0~(Zx0OG-F*Su_QMWx4H*<SRBE-K*t21g3Mc?w$;+*
zf-_A|c5zqEb*^3m1*lb4o6a!%0oRim5OTtM=DhHae~*mJECb^kjtR|bjdvAPvNF;#
zybk0<BxGIx2wG56QuQ)XR(~3-l~rZZq9;-Le+J|af&p}eN*8jh$zyF_s_fp^3EFeI
z)?_8E%*O~Ml^f31r|9--Uvkgb%zD`=(CAQgvz(%^BYWgQNmeAmE7iVN^rO*XMU&zV
z1LC7xLQ<m-rK4?+V%9NoRD*&A?oQPxz8NeJq|0TS&Qc5k{GT(BAZAE+!3Ri50<gdJ
zMOUXS9=_nZcJFfR0do!OgZF7Ns@v+k)F8rJ%@su$#l!u$0AC$Z>T4=zBVpu!a|@|h
zTNt1j+RtSb8TmWcGtsVW1{kikbkHd|Re4QO%V^Hx{k+U}b)O|w9P$V1Hxye?te0Y(
zamfjW@s8;5VX^;+nO1zB!DoXGaE<Rqsgtb;*d73vQR055MgrN)=iW-u3$J?$Q8jwZ
zAW5c7McXLqGV}9_LPLcj;FOy@g;8XDao2D24Pm^niPs!`G)_;%rp4YC(#{zzQ{Wo7
zJ|MNSc!|@crVkPpDJ!RM&5$+97j8Tw$xumRRGA`mB!VALHw^qOk7wIjZ!$8OWibQ_
zv+U|CC&aH2OcS>kWj^C(_Nw?G9AoBMp~G<KXdoQiKUk9S`d1|2$#!l~${oG-|E62{
z-);Z52L4+E|E+<4p@C<zV&p@nE<=Ae3oK5@XuP`Nmog^rpx+D*@v}vi1x)-#f?~f;
zy=(QUmzyrnNvxsa<Y#O=BuqVnS3Add@7zee`=~&c-v}W8pwm1fZ*Fhm9=$|Euu!L&
z6uW)hTGU#(!BjDM_LA!e0dbo589}*5>Rb`7HZh*dX0UPguL(@4b7Prf@NL9Q%(q4Y
zZg#4h5;3EhKAFC!{}>&#Z?C78`ZV+b8_2pU;;;IBBC#0O)+$$;!iw%@lkS;B!qm;u
z6@)wDzM2Hc7LTn7KXukh-_|}V5lw7YD)w{08nlG+FXH+6t*rC~>1heqzrDlhSo;~_
z;Fa3bXyY8(>n6h%f<t*;?Qv~vcUP#S7vHo_Qd_AtL(Y3Ol)S2}eVaiQ;xCNKYOKjO
zZ<^#j;*4xgq^Gt@e4F?EU0_twX4BQC#F!YE`|BJ`n&D)8X^#>~jaXg!gQhp(npxD^
z%DDEF!NW3HceqG8G-^(ue4HLSnnO2Fs~*o^krx<l8Et{CyXBO6rilQQ0Zj@|!*n(u
z{{{h#d~LO+YrV{sZd3X(wiOU(+}BoSAL&;cq3Q?pA9Fd!M%<XT$@re`(sUYM=q1p|
z*1kE&O4x9N&%xF%8AP3&twuezo3|TX^lV6G<cI~#H7oC04j4HX05|}^``W=))fb~I
z);AxE>>(w@$~J%TS3shcV=8VZ`E4&i0{ay`k`A6N4$~Bkjo85N*mgzKUP`)F<&GMq
z1-E;HEVgxwjAz1K#OfPqOn3%;_wf2_YU8hPbC(NDa#46wMKOHg!zGZlxTc1BuvpxI
zKa86#nTnS!qr9XmLP4GalJ*BpDl3WYp}Bo6tfF0L6Dd>_RdEo&&F<l3i=2Q1N2l4Z
zJF35?a#zQvN85Z_==@d%<tP`)(KDwJ@d5ooLv`5g!Pba{=~LJuiJw;kc0l_$jv$-(
zn>+GaZoX6tIS?DfpIKu&m^~#+R>Iv-8W0u2emwM}w2(-)c!23E`v&foX~YqWa{N`+
zRCNokm0eS2n&NqZ4ev3lGw0i(J=@AD<CPI}%KpI?M^hiwT!RV}eSxZZj3TR?LbPJ4
zf!-P&D=!~7SYxXr<IJ$$W0*Zx<EFjzET;rSY<IBSO0?qB|1GB$X8Gk+NDy~(*}hM3
zUBUJ&ZxVlPI~&!nj*XW$W8VlBXWI!kIW$%e#$U&5#MaH#Q{wpAMPYPj9{EE@)f`eU
zt&8SGiV9Vm9H{GqC-Kb;`1~UTmI@+6{gdCN;(dvAN$`9O8P|Bd&@u4syuiA9_hJ_q
zu_=;)+=FYIXPGihhDl_a?)WI^T9zv`l$V{ZVe_U}GzOmO4-%<wkiWoF?`v&r5TXaL
zwUURv5w5<Rny#rD#6(R8<UT5JHgGmK&`j(l<8ZUl*;)gH(0g)WcWKk3zn|!aa@R2!
zn=e+A-a4g9B8qOBN|&4KsHqS}tZML;yKXqUzbDfbGcL>EBbINe>3rmV?cgF}5cASG
z!#~waB`S%7oEaN`cG0H%5}&M5Y@jUyf9^Vi!(0=f^m<ipy1B63y2RF7FVZlv(TFY0
znxveW8&%Fz%(ToLt><mH?yJeGLfe?uK>p$IQIXtnZv_&)<zKrGaxBRC7QPHzej4It
zj@tN+&8?cCTQO};mjjJ`*PKb3Krkh8W6gUWr$NM@cIP~pGq#ujrOi5Z)!K?t+Gls2
zF8URI5@Xhx)2meT3A)ChSh;_fn$P=2akiHOBFr&KRD^$ur@cY@S;Euwf;HY)#;fwf
z8BZ*1%wm$MEI6gsWcyy(1fm?-TQYeD9@A1PcA4??oGMAAFW1Y}RP0u?MApXjTr*iv
zFbVdXxxbPU$h6YFo#fk@^qRRGruUBUpE9YCrL$0|j&L=f$*nr!;3;Qz00kT^x~ciI
zZsnR4y<$M`?(LExa;^G(v^-GM%VYjKJ1~l*DzP}*tANg6q9yS1%~@Zq`HcL>B8Po$
zJ1N^#fDqGgR?A)7|LnCDiu_N1dyV}Mnj*Xs@dqs`GUw+G`U%vFNz`i0_W;MTJd*SK
zT9i$vjqHHEffzeCdx6jIpW!1_dhxfMpQ2b#hmWwyXIl-+5M&+jIT7l_@2%vY6pUap
z>Icmo5vAwa_Ln01inCS65k|+a7VG_nj#$!bEW+y-nQB7<Q+`+BEd3Pr;RemMlhh-`
zlx;L2U{kgut74Y))@fJjmgaGl`EEV~J{h$%3!wcaRvNo%p2h3YcLQaDSNAFq8_91B
zae%*uULvV^-7cGceZ6?3#c|P)xVE*5qeC$C!k>6sly?Jmu4TVJQ&={tus+BV$))n0
zE(x!?0VqkJW!X&VFh}!z_3rBiFl<wb230j!JT#-uYS%n^<<Y(k-A~mT61P|C7L1Cm
zZ*8kC@T%AJJm2Gv)E}BxUb~M-vgCh^Gjo6K$wy*6>y6|aoT5af94OvGjZwzI5iAq&
zA<@2{P!y|94&3D!HpUrlTM1w(#U8yvhme7#5S_xeW-CG~{{2nMhv>c}K@LkslN}57
zh0iwW3@1dr7V$?C>q2S)ZVQ?4xBW%t)k$P5^h=aTx`!th71rPQCwRv7qRxXUrujJs
z*%p6AM~F;N`>WCCDv6!=o4rr=)l7B9D6Cp{q+=_!I*idPsk%M0j28Hwn$UwsDM-@2
z)%B0U<;v8)s%+(V&a8x?mycmJD=UnjwlA}0rGeg+6*TlC_$d!5dt#`LT#uL(S4s(L
zYP#@5K~?B5i_Ht20(-Z&G@Tsa*w_VFshFNN<f9&|Bfq{i<Mj0S{Sab&WD3sTqn<za
z2Tfx?1>~-X$`~rU&oIk*<?!3}Sfu^t0t-GyBeXeec0vJ@lJ;sUJx`7I%#>qpthez!
zz(>&&g*QdWLxV`P^`Knl9`eVf)|Cdi=<%#mLkApm>!ggYx{h0M13hPQ>!;0|ar-Ns
zYs)4tueHb)jaDrWo~%K+lF5-Vu-OE-5frhEN?>*E<;ZH6wIoqzxiMgY#JS*Ib5;9b
z7xOaL<u%W6n-;~D(pQ$9%{Qw0h|+V_UbXbeS!e`waOvw-=L;+UbI`Xw7#_{<uKNd#
zPi~7Lo#i>sY2VdxOfRdS<D3zp@dnl$**n|`reAkwVN=DO9Rce=k!=DBZRj#3=!?VA
z0z#u)SS+z^xGg!IMGK%pG3ESs*s{mTDmt{w|Lwb`<~2m>cWRnRLf=(=2Lln&r8f8H
znb7n*B@3<5xw&$+gn-8y3Yf<Jpt%%Ik^ApoA#W4|8W(7!N(HlwGm}Jg8R1vj<yl4o
zMb)rfoJJvm$fB08TyIt=^VzaSOP>M{Id@l(xNORIG8cpAoIoYr9k!l>Et-@`UqjuT
zDU?w!>7e2J25+x!`*g<>gWXhnU64?$7Ry9-kan52d3b$K7y$`CVbV}#^&$peXm@&-
zH21Dr=9!iMs_QGR)4u|Y-BB(sy74_er&+n{aT(~T3lGln)$&o5cvk9=bSBrN%4_*^
zk60lzhU2iRNQWJRzGwA{_TfZT!n~;84KuE6$6C+Y%+h2DoRl5)ej8|~t-8t*?8opj
zjmFsvVyh+_5#3?TP=+qUeB(`Dm#aaE>g(QRx(#j>=@gX{-mBP`o0)Q3@l=IUBp!u0
z3@V6BOTj}cVDMB`Fs8T%XRgAnERK3c512%91mopK<@xersQvM9wHHQf-@EDrm*Y}?
z7ymEPdk7w_IGQ#k6Ski~A)|@e(nQL<w-tK0CBI_b8WgnBn2>H12Oj@sF4d)n50=^1
zw+q{DFAGL_v*cwvPW&y(EQ{#3Kb6C)_}T63MinK@W3*C47#V)VsIx$Z%!P0Z+y^%Q
zb{w*9ep#>-5yT<&)@@1M_z+cDqsapWBOl7oljMM`np51&j$56E_IkpN${z>#trv+3
zZGl;4$2#mG-56{^IjZyR{0)LZ!%})~o9YaQ(~h+@T!NN>`wDjl`4m9H?_-9_?M|IC
zoq>j7Wht-I4hy&OVWA%Jw^sNeky^nPlQe>xeJA4+RM#Fi&852?ex5w;He3^~k4F`N
zpIPD`Iq_p2`98rwgxY$CTJo)G{qpi9bBUJX2A0ulGVF5w^gRq7+_%R^5<&`A{Y%e`
z4|ffkn#1($29ZC$oiijG$T`gy61`es->P!PT^Qt>uuA0*e&r-LFOSZvn)G+)WhJ(I
zm+_py!)<AgB2Lvpi^s8_Yr}mO7=fH)_Etm}Bs|#WU@l)vy!t0q44_`%_Z`PMV;G!7
z`-6ff_IlO(m>M4`1%L3L6-IXA<5OK_S_B;`VXAUKGu&=<n9TG-iX9@wB3oQ5!exK&
zS?Mv{O7*ZQXZd{Yzuzf($k4bx?8W@}*V>#s;*4H?z>58v@Yh-vZx-nfcXdfNN=att
zPB)0$^k^5YN}7LEXM!k_hLWqZrF1F0Pc}BLz0=m&pRRmeKEiA4SdVWT9;F9VIq?}2
zty8K#s!vRu$pTYbX>GmA8$UJ#DAe58gcz+MRe-p`F+rGvgVZv`4KsB~b<kXnk8T)4
zj&8}J(K9dGpXu&lBqndyyyV{C4(rgd7ij9k9U7Jj7~{3Jg=VHo;ZkaocHz8_p$hzq
z-$;~&-}vZ+oK!++j2rB6v&S80@%N9jR_xO=XJlQP1_w{N+@%uqJsL)5`C=5C8%VQJ
zM&OlwM{z)s*Rt1q`65J?8*?z6^%@Fm-}+5x?A=l)_J-ZlZ^>;`TU}03-7^xaoHy4X
zlEXyz*W@f6&vtka+H})R*uno|&G>&l`ycTG<94K;>ggm5u0K4Tb8^Iep!t0&!3CA3
zsT*$NixKLeo5R)jvw{(0rmpiFIeYaG58XqO>9%UJ>cbp1-oHfjoT6d5#p<Y6)nm-b
zf!y~Q>u?{3oMS*4+*M7nRLxU!Uso$te1Z<ZU_BIJMHsF^E`sFUMVs}oOPeJXT;yu5
zFae|IG{xvf&fEiCU6o|N|0fS^!@u+3z5REdy?9FDe*^Qv_T+^3iKp4qu65FI5pj`C
zXfG$OBpQ%juy~J`ttM;kr`2zqT-rOvn?FGMIz{F?cOA2~s`TsLE*%yXGD^+H>8lr@
zR_{E6@@pkv3zcZUP9u<o@lY_6E5Nu+J<sM1tGSU_bXs_lWeus*R!bQ}?cd`Y;0@sc
z!jI2i83`d{W|J_gR<9*X^7XNnF|EHgfk7eGpDJH(uz+_blC<%1)>^-&iO`>^+nvVE
zXMMhOKR6;cmxV!0m)Si_q^qop(u?)i4o|JzR6g}P*<s1Y!OYxV%^?*}t}Q5^-oG5$
z+T!o`=vZamx=%B+y^;JP*w+b%lr9-ARyr%4+e2ylI<;K7U``4PT})>NwX*i#he}`R
z7fRx0ea?OJS3L}pu_q=1d;W#h0y&@eXtW!zipm{X&dsz+JgjH1LFM{(GnWzad<K@<
zb={m`f+_3oNjf*|J6-RUF`X8w?YGW%xaN5+mu;MB#S8$WAVF|Os>{}JNc2Yyr>{(d
zNXX6$HrK2j;XX9rrTS=b#4vIrc07gqa;J1TlV3Q*oIo(8>9$f`KS%t|&yN$M518n)
z;-;o6!|XIlmUuw3zRv=q(Aa0ueo^o_M$<35#O!Wmei1KYjNE%!z}Pw4hOSh&C8bah
zp}WQG7Y<%%O|D*UYvv>_%aHeyRCBNUYD1fT`HcDfUqSzC>BA#-ud5G7t=K9Y1jf7)
zp*=|0Ho&&$k39F90d0sc7QdS2OpMD>GFZPdGNdj&AKH?uc+&WTHcF1Oas5N>UxTH~
zaP}EG*Nc+kqt$9a*yO&Qt;&WQcnkS?{rcS8IiGmB>v-C1tl=b<Gm}kL?(=yR^>Oov
zj1t3<c5MM~ixR;oEyVU)B~<12eoumU=xxUaLHxJf1zDC8>b_RG8qw`ajjTE4nfT)y
z$AdaHzUvv%1Frp<6j6z@I-u`I9ezRWSSLlY9X<?g5u~)4Ct)rj>+8Li7W`y&6EFR+
z=*foB;B4)jH@PQ{b+yi@1>ZecvjgG=(PW)vi5z57q-~O?E3yIw)K2AH^dl>KH5d<2
zEr6bwU8O}BYtFUttBwOD3zuLGnLeR~7h(r(p;<uz!TYYQYC=1u8D`dg9!KT{PoJSG
ziSmY9wI|n-EPPZKzxz|+gUUm+Dcs|m9Yz~Cy%wPM>cpuAK(HecuD<FKN^DA#ia@#Y
zdv0XpO=!w-D6%qpdFXowSs8e3@#!|<eMgx9|De$u8*Jw5c<s;n;C{9Q<f_{3A1|QZ
z5tTYByXzv#h5!w9P0ELrNo)Z{^39NnLPTMNUX)P8uzclW!>|Ai=NgMW`s|hzvx%z6
z;s7A+ba?x@?)hSbT6KjHi??sF2o&`NhbweSCK|$PmT$<wxeea3(y?B|CUu|tC~AH=
zg^olmdgF=NQgDpG1$RyGQJD)zBeYm{G_yi;!mEbL1wzv;+yIVt#(<n|;{BB@nNeLp
z>O@=!$lmqv)Bu>Hy|%9V$rDT1sUd*jgSZr>94eKTRCq<S4q<SVC>TBOWJ6c0q*<op
zEATYm3bICMMQ>*cB!1X#b`=XpjU{H37bfzj%4<d*seOWpFIS7F&-uz!mC=agxdREq
z%ZekP2pcL17VI3eYo;{!Ej=@*rB+FX8v*8S(CmkwG>}b`<jO^VMG~ut=Q_@VhB-Id
zAaBdlv5v-0c5uv|&K^a~O0ki{GT#2#A2dhjZCGp=ycCp8f%R53?R}48nK>cV12E6h
zEK4-TDoYgDrK$Rkf~T8cV+$*8fcllw%Ngpx{xrcQ5uYPa^WTUkjYAGdYs2Y$H8(A?
z+nNgQQg6I0yQtsaVfVIAWMn!&bfP+OzrppZIIsy9@(}60`d;grC!+FNZQ?Gpw5>re
ztx6+ot~C)+xi@a8_DDPrd@=#-1xAHBIMnmX?1I}PBt4pOLv4L5=&+@1P5TpKH)dCa
zn-<+Y$OeNG5z_YQ;nA|%^u`SqJkX{^u<NS@V}s0DO9SBx%fq_S4(JMzi#5@v%fYY8
z-)<Q}Cb<3a@Iek(J)<cj4f#+Tv(Vs(xkDzV-Pt9MvQ^JTQPq@|TBZoZ(nMElkmEU>
ztpz5-*F+Kiatw>ppptc(QY|;#HU3ukD9gBw#_4>8pPAj=_T~N`G&Y@vR+;zxM?3Nn
z`Sy=W+0xu6X3gN6IlG&FCzdyBCM`FInu&^J!eNO9KyrSaj5a*RvX;&>_8sKOMwQbs
zKJS2UX6~KSwPj@8@snAe<6z=JQ{)lwp}t%sO1kzk-G{iwzQulz5jm@?rin3JFfoCY
zjZW8=hg|E(hvkY$s&YQS3wJx=zCG!;<;Le#W$^vNrE$l_9=k<%)s|}!hjNDuQJtXq
zjGK?vt83N=TNmva9&<Y7573e4;A6~?Gogv4#s+ptOF^4=ddbT`s67}V7wYL_!>(#j
zy{EqB!NueD7SYg8N^PM@X~D08nlJ1i!s{%X6Cu{7FKOK5DMs)x?S+X;<oj`Hcn_x?
zko!{%!M92k^wQE&Hm$RO#avD%2JFeyfO$^XnLMcdO?%Ef8!KOsVEBrlY~1TSv%BYZ
zsC8p|F8%F)c4FetH;goAL*}gXW0n0PKy_pV&@loM-Xzy38GfScwq_*Z>7#3|%IB)P
zFfJ$CT!GBAT2sAr)^Am`5`s2TKNfI5g=i_xj8`}-%dS|HDs?-negcoSd#I8|p2}_6
ze~<Y+<d;jy{SWHLzc%v!hf4q(nf32#wFh}poA5hU9hpZ^5jnK1Z{V(FBR}o0Ig3P^
z_3jr%-G=UHl4YCGJ*;sI&UxLh@vNjW4B6Q3KXWntOFS5LG-<zz9#KzF5&1!x1G!xF
z3;~plt}=M&_0Qg)nF0L>_llyQwVJwLD(k<u=POOeN&2MHl-zZXS_pz~a@mWpB8)Io
zPEj{cD>s1yX^BoLG8P9-#`vhaF*gSSPpe9n%7P`TP)rZ70)7xnbcIn06+t<=5Bs?4
zLQf?f|J%G^>o3#Y)`O>?OO$dqY_{?ZY?qUI@Ah$f3`ZTU1(zF7y=&a-L-I?wNpH6b
zQ^TSm_KkMgnQZXfZ}SsWjatkN#=6x-y0Hlcli1C|#yd<dJ6US)PxXlUK|9_zzx!nm
z#FiXA)j%u^jXGCV(L0!&g|whg;PahXUhOe}Tf@9jRqqP>F(u27;txg{Koa9>`7z=Z
z0r7{MrN{}t?Zyu`Q;F*f9<v}+J%^_4iE`{>Vj<{AID2k{EpgJv(1ia!`y9AL%qH)K
zC1}05S5l*=$p7Zs2Ppxsv_-mvPOkbL_?wonutwUvUpmQAt22wt%#T_Lb!LEK*$%+>
zIn`n?*PD$^Zd8;au`9{jpbcU3i(F!nA&w17+~|!HLVtg*MXITRu#~E`utTodZ{?8R
z;!~BEj@Y5?lP&EImfCX}nW-S&@bcR13~%bURod(>F{H!wi!CmvE1-JCCa-d*iiZk%
zR(LXg;1umUj~8Sf{SKh0b`Mg5YxTKH`-<{w<ylyJfev&!gsULML&AOh1G7a%6F`&O
zSj+>QE%PGvCggOazqg-w;ua!Q&|usyMpJGHJ{T6x?~+CyDo+(5t)q-WHK8ao;}G-S
z0*H7nlje}h?_YZi(CA3BJ#S>|=A=-)DAJ~=0aJX=%fpbUvzuAj<fkJ4qF~nAIfN~0
zs`p@G({K?tBq?F&jM$V`o~>jUKl9U`B|FCuVT=^Ngve&*hpRBg<713sO|Qq`WACrj
zkPWxK<+Xw<M`E((hVwec1D{+=-D9SBnB|s2VP?Cz?x9+wwPQ$<rd80bJ)ERNzeT9+
zZvcL+cqLSbL7zd`!)M-cT7X-o&~Scuz8HMOP@PY0H8cyqjLyU&tdQn)Yr!1pW`8rG
zJMe9FXQ@h~`$^8v>`D4@vEi+ox+r5G35_C>#<Hi#`zy1wGpxOc@{!nD<*tVsn{0vo
zI5<BwUM=ZoCZA%fQ#m}a212qJPo~cH_b}Q0Mn9&UYgk%)J2J_a4m_7GJ}br%SDsp*
z2i>5`Y$<Gr;AdI=G+WB9t}3@$zE5*teVCrG#J68J+(rp5i`S#+@5kH>8eia*qVvaJ
zGE$`(<a3F?JF#(>#c8bG5)HE13s%d5+$BeY3mFJHj(w!tPy$RbX+-P;Ni3=(b(0k*
z9Kr3!JUE$(qmX{JiPONA0}y7pxbb*R$;HQ_%~&;$kUBw*A#HZ?M^js=@KU4#qZBW3
z&<N*kn|X=o&&>>iNcN@=i|=mhV{;!;8v6dAO`I>=O&Reb#SbbPUcKnIow~KoHE&uK
zLInJy2s@)%*S_RN@J-^hLZcLi<@xZo^Nsq~W?r+Ppot$#f*BW0V>>lGPPDyQcAej(
z1_*UZ>^^yC&$RhVlw~>IL}4T>yxKuscwhZNv;LZ#FXvBLf$viv<UmQGJ_^%h&T8Ou
zc+=eekB18#z4zt#Lvf49V!p3$Vnsu`;~O6vYNyCe?yjX)I@80@<CGE3szulWRYw?&
zdzs9~PZjaQH;d?D@NS@UtlX{BE(7=VTHv5_yTJ4g)n&1?7jjnosJh0m>NqC9l)Y87
zA8~YXGnhW;{h}N(*rH=)W*5d|PUK>#-K$V6DN!l{R@wXph@0?<tQlKSR@=2#%%ROF
zxU}w=hB6M6pKcNw%v=gbx)ZI!?<M)CdV82t)c>GW6hU(D-ply(TbmS%R~|`XxayWN
zR_^-AR^0`NZbn#3UVA$=9pe(T_?!Az4CY`8_SLxTRi%<x%S*kp@J->B!2MkFUq2<i
zY?bo}vGBf}g`9D!a{mn#>*0I7M(5BtFd*L?+)kH&Np!Wzkh0Ac6*kdZ+0UBlRJ|iI
zR32~^*z%9<=YKgJ{d<A3%I3A_y<|5F*EDMlH)-0-9;sb5aHp1qRk*8dC2Z(On)A*Y
z#LYh5jXyldJa|fD!{txIZjo*Dx?fU-EP?uUH?2gliZ)<k!W9hbm+P;U1su**TYjPD
zx1MN`Wmf>$FEb?YkyEfK{SElN_(53z|AK{XA60V(Yj*~heH5B*Is&wCHwr=Ak(D^H
zM?&IF&5HcxjDirgH%TXggU*UA!wQ#lG-c{%P^Abpa4!9S?ZQ=uFa-%c>c~i2Z}%TG
zO!D0o{ipo}OR@fulivc$*vz}c4qCz=!Jn*<Y7lC_7_J;j8DC+*`!{f(Zr!Q_$0+MN
zS5t1X<5E~<#WZf3!A0YzfjMV_WBdj}O~VatPx}K_jdw2#in<1RdwSk1l%c$2X^Xxj
z@%}zK5AtotkN(6+(0rOuziX_qT2LVV%^SaS=H=Yg@M&|=0m7kH^o7?w-+7eaH!eNt
zvX%GKWnI=-3fq70DmYPy9`93gcJi0uyOw%sa?dK-DNwb2Nc*m@xp$UWc8g<Ekdc}Z
za5&weE;Ls5LlvejNZ3GnXIjU$Mp!0xdF`j-gJJvq92J{36mhP79I@i*@If%7+<2$R
zu|bZf1^rA{j$dWr5VAlNBP}J=GPo69@0*))W5+)FdNcHDhZ0L@<N9Y50q(Bx?WD~S
zLG#|hiQ=@qU}a@&WFrt<2~Gq(z721oL8|_w0TC#vp;SwIOR-<KrlfG_H%2i?Z(#I)
z^Q3`nu-ZW}c2gFC>a0)FxAwG3#Q)H9$s${|+O=nR%cXgH1h*O2HtlPXiOE@#lKeee
z;5AvuLA1p!n#ys*&M=+2on5v(aMyohIEi@j^7f<0wb(ob3g9QL;ijm-LrTfdNX?F-
zn4g{;_6!~p849Ns{(A++!t<U}9u^}JT+@X;a_;86j-EiPrkv@S^J!0t;q30(@!vJP
z6s5DPy*=G|Gxf6hc;{Fd{F&R??9QIwYogI@3vLD$fCN?j%57&~Hu=`{`ljm-bJr3l
z5DsJB*Sx8UA{iJ9Q-YRdB?JCnKx+CibAq?<@I-u~J~eQf$bfan_PXJ$C1ZtFp(mz^
zYb<oVk(4QAi(^TGSv4w3lYPrJ`+#qpKTwNVU9cBI9sESzHIfAPY+a4BwU)3N^N{c!
z=JM`P&J#-Qtpm*d58mD?tgWu?8f~8nr9dfAyg+e>;tks37Tn$4A;F<7?gW<rMS=u^
z1PR_2*HAP##T|-!pPl!+_Q`+kgZE&6d*>`ymdrKpHODQv=NL`%e63n%HrCAFO}^Ks
z_#oV!=YA!)(mE#?3owm%C#C-+9%hnI#J(odyq;2t50HUep21o-GkLL^TI;Ge_6;+)
zXXQ;gr068`fTNHOVHGHoi>VVOc&5qW6I(CyNMW>|`y%-Qw1D!5e}2bFUPC~tm-zgJ
z6sy^%8zcIqfS33=nNV<E01QF(Awte3GtZTfSuev&DW<A2tWBHK9*2()YW;{sLC1S3
zuB1U{CcE-n@@y-+!X6aG=9Y(JYiYQk`LuetF3n>`7<6xwDwteXG%{+w&V6=bTvh0K
zhz|Ck#fiS5S|4~57$LchS^6}a<f_+jfZ#Dz@rr9Tf+uK4+mh#ux*q5VasWB^IDVny
zA<^EpL;WIclaikqgPr@-CTc)A9m4B_|2|4yAkYPN+s#L8-Yiv`dXFjf7_h=hbDdIj
zwc*V43GSxSs__O5SYzeNSCDhqGo!sIyIcIKPi99z^`iQ&7vCxKKOaqP$sKc;6|wVN
z;<`ltV46lOp4MvnLEMc-7^0GXzAxw!*rx3~21`8Q*^>;c+xu&lv<)zeUN%-6>Pz)3
zXP=-Z8|X6*>hyQ=w(ywveL6BPRQ!yalpoCkb((el${^4E$0~QOKqKz0_IjnQ7tdtD
z96251(hBAzHQ=vaa%96ELFN<hZ=b%W)J@x`wD-vM&R9a}J>uPsbi;fpb$U42MQjEI
zQrDSZ<<_=4x%{nI<ZxBaOjUnpQcN7sL*q+ToC{6}DMmzZy&Y3+jE4~GI{JRd%f4y1
zocK;XAH<+8z5eUz_uc$}x^rI1QHQ;4*<(p?eg?B_^sy<WjkWQjCS~Pl_i&rI&E3u%
zi$l&tyy4uyEo!XS>CkF@A=UZ=zJHJS7w`4kh#7?Ufg0?<9=y*8xCB?1w#u1ufu0{S
zk|aumR3CAWkqT=hPO*-_vj@Cwy=|5|3u>Q&Vz5ZKE;??Or$0oPJg1Co%XfBJ&ny8W
zKxTf*v}Qwpe<T*Gl^azsc~%uJNN{=Ug^HXg_C6g<7k~yLDc75?yN+k;s7`q@=R0is
zV#k@P)Hj&@^-f5XAf2qDGt6y%cR9#fZ`)+se;m5m21>g8feOmofU!2(FX;_Q60;XL
zd_8A9W3ukty*BZcOU|+9W)n!hoMoh&>qHZP1+0X}X)vQonqST=nd{K#vG7c;J<uwv
zbWi{quC=!Ewv96d+|W1Ne8X^T))t>uq@t}cV+yGM#v^*meh%GqGvzD^(y_Io=>j)5
zyu<GLcd1N)RE4E%v<%ikaeC)yg`0f!4yV8g3WL5Or!HE$6h@sOd%?q(4>Bh3Bj&}*
zbOeX&;PmF=RxghdL&>gr+@YvzN*`Vlc6FPcn@h~1#y+5Tl>dDs>n?#~nVkgVlL_5*
z=A^pZi_j{ztVWB6l3NB+G7OS~FaIdNPZKuY#uEtn4ZGZ!ML(hP#GzUGa9I~ny?Dn7
z52PO*3gCB4PpUYo(>I(qA{yVT(;_R~;vgm!=HusJkHF9m1!ZF!o6%CqPXlH8<R=sm
zz{yf9911yA`-&yh-}7=dI+t$8%hcHD1+^xTs_J?&-r9tL_-N&^7W3$7X6?Cn_)U4&
z>^HwZ>+$A=vbp_*FEy`HR&H7_U<l)vqtmg<4pz-!By~5_BX&qyw?vN&3(>#KSA_SY
zy$j4|DWyJoPbVp!iL*+e&(+SB^*;EiOi3>hp$_6%QE6QmPSSi(d=CtgW)t{qlil?|
z>3xhOwe<`B1Sf5a8x~cI7^wYvsGS#hKDZw7S2=JSv6ip3#UVSrB!D#+)nV$O;RnD|
z*HNot&eun{_AzR24;Q%8y|mt41ljzLwIs5!r-L%)t?S{m%3|v&4B1+%dkWL{qxIj|
zZ%_6IDgcAe5a_j%eUev%_8Dq635$pb5AAP*_!iBB-q3L5%HoC1Qu~<5n1?Az`#C7l
zj5zL9lB^RRQxMr{lu@o_m0D+398w}a*&-e`-T~4h9<punsAp%D<9T77){Af1z32*B
zAHch3o~*xZL+ZDwN-Oh0_j22TGiHftGOP%)*m7P{+?Vo1^*DaId9OFMS#^o+Df`O~
z@&8%xlb<w4;7!6#!#0Fo*7=H=V$H?a=nUHqlR+ViErg3lwS`Ou{$>MQGOW|=&qB$$
zfGI)h=F5-i#X=L0@*)h1FphcY)8cMhxe}~`m;J-!25_Cjaa@Y?tTJ3@Z%$mn+?}jV
zlA#?I=~QNeIjp@l9;dWS<{ubQLoLpk!-_=mA8Nb%mkfEoUjfr2;y|huZnXyXl%1)y
z$oNE?%GNOdp4r}Qf|GEg_ioyONRw@PBER)MG<uV}y?Pww&wG4?jM0J#8T3`|niiU)
z_S2SBTH`zt3O{5GwaY*aitygr%vYXtlmLgYlYP|xEqDpGo@onenu(nAIc7559q@$c
zDCSt*4revFJQFilk&Mqnj&On?v8h$6AM`M6%Rf?C@m5V<A6n;<KkHM1ZZO*&Y`4)a
zZvo7FBX<%u%MMa>jZ9s6;-_ci-?z>1P0MSwFq{t4*@CDpzG`2df75Ygt2VZ&q<^;o
zWA_TOe;oV4JD%qrL{i7dlUF$toPju!Z~ZH&4X+^*aePVa!3|qzwp7*Qc=fvOplp;)
zjyCODn}Y@p=<~#Gwz7x`rKDEYMriF$wSV)+kA=fp3Wd@BU@<3st?dZQ)dR@czLO9~
zBs^b1bjY@iQK1!UvNs3%8d=(uXoAm>;I;t~o*!is@CJ0xo*KYc6WLZP-gr7X!INo&
zLV?JtJUM54y=v=~+QXgTSzbA|^aK&ZVF9-xKPU;a{g3*`#2@gA5AulGTR8&WHLW2!
zL|~AFRF9G1aS>jgUlXYZ*4BnqPDuW6dzzgUI^0Sfl>)E|bc6ni_IhgW9?`5lpRvM{
z|FN}|DLrQB2&ABRNcGTW%(i7Qf=q}%A!>Ap0^~*BsIJ1X?njj?)Wm)Y2Q<Gd+N5vl
z`hftVb}F`X|Ai8_W^EU5-^_f@LvRRx{d$2E?r%KOZUeKk?T!mi4P;P#%NwCA!yoUU
zN<vAGv`D-2&OZp0$t*a7>SMv&*fvn9OowEb0Ayp0En>980X5Y}zb8hvgchD2l6Czo
zho;Oqr*J8t-)m*ABz^2L5$e@X{9uhVTncor7JGkn1=8mm6Ro}=0)uMejOBw_ffYEh
zU&DL619|Kw#UlwLB-yi2=a+ZhdlZtR*pSvBV3a4<9lPOtz!nV8|6O$`;B`+njivd5
zF>p?f3#fsV_J(vxlOI0R(NlocbAm8En#C}c2tKU$#HC+=CV1}ON=W9Z?jIGbqZ*x!
zGW30Uj9TO-4)vPKq65ieM<EAQf)i@{yYN5NwGc&_pdlt~v_qRs7(BFiWwo_LidX7N
zL)d<6=ES^|J#<$bVrXF4G}6MQbqu3!Ov$sPq#D)XEN|!3qv#F_V%aG*xvNhy2^ZPm
zrCd<RxZ>olb3O%a(7~-JG27q9Dgs0tF^JaDwKGMgSdzrTb*FJ2b-tH2r{Gvi7mFxi
zZ>SAU*<lhbHDmRquCv8tP~4S1or3<(NCjGGo9CdM!<M$|qU0gc<qFU|r#v-u<m&b-
z4;WjSE}hCXE5eJSWbsb^W2We<Anj?-=egiY?pHVOrha=D3+T33MqFIy9yT3UAucln
zm(nF1hi9W)iRz4ssVIl%bcjC5A7zMEP>OFq;-13ho=;!boJjp@PjPXlr>iIsy8SZH
z;HLM$a3}CPnDwBe7=R49ySqDT><ryAH@xA6J&@9c{pl1LEm-;!OVa*-<gWi8e|hE)
zj&d-(2@LeRo=O%hr&jf~^XZ9<saF{uP!tGMjj^vj@YNdo^lqf((|yd6G!cI3`Z#Z7
z(DsezXELh)KDH-%hEgAb7jQO7N}5|o`=IMe+ac1Ox-K--zcSGUUtyVsUfgK4=B>tY
zVqWf7^lPQGL%$Bkkp-XP?QRy7aKhuf)6zH+mEZQ<bR8{_r<u90&3~u#&g;)>-MVt9
z^F~RJw}!kt1!fNDR5&xtgv2KX>UeXL;2%bi6*99a;>yRHi9(#SO8DWWjA75ZT~>I^
z-Z#wjK&BSN+Hc$n>MU7k^j&lfCBAEl-Im<pa0n!aH?yJTCPl*=oEzeEGftfvbnAvw
z%C$D0Hg~!&QCET(^X`x$WVJ;OGakRdq*nyU3&Srna$yZ05*-aPnAhZi9j<m{_Nn;e
zi5iKj`uk#Xg8YTk>d#+5q&A0pFRWlX9ivy<tODg`EKI-Q0zFGp3h`CM@15(NMGX#W
zv%KIVzoClX6wn>bP6NXvcx>um37Psvf8&!>MJN<uEdJSZgXU}FTSLbI-wwh`K1SMJ
zd7zD^Xo7Y<8$1qRb3sZho5eqDX=M>EtZHH-Kqz=w#&jvGx`^TOvHP~7TKTe39-ZIN
z)@8_4$syI%Vj1_AQA9rb3s>Z8?ah(HR%Y|ng4K=X$$;l=#v<jR4CtCU8qd1s>L|2{
z^>9#egD_89@ek02@Hk;ZfGv&si8sL2;&f6eHP&9Ek)IXK9|HN!U`!)BXQeq~-07EP
zLKsE&4!Nx6_eAX?y)>Ps<`}F`HWb;7__+0PTBo{!lVoqP9rj5{cWGd)QBpoe(&g)Z
zt6|azWl`c=nkPKp)Qm769dh%h+jroR+tc7YOci6X?WKHCjrm@^7*81ad%d!_<mT~p
zn2DjY%L?Soo2j^OAg$LU(X&8K`|8;<f8Eo$jm+q~!2A}4ybmJk#|E@0HD!`)S1@9i
zMn8XEf8IpbO2!^T+bf=q=-o%^yfv|&P;$&<7fe>Nf`_H%uD@H9&?mV`+BfF)*A{ob
z5%}HX*H-Au(@^j7fG4230Lp7onB{A@z7ya4N;#R+eTT1VUBY?ZV$c4;UGRE^=nKTO
zEeQT)3ZHrNsv)TV500GUC`_y4BxRAPGvF#85N57D4qIkv<(EMB*GObHwzu2ah8cOi
z$aYe_G44prH0TBD(cMw1+Wvj*%JC0wxBthJL92z`9X8tSO+Z3VJGL1(jpA^(I&W0b
z5GpBXrOWm`Ow!IP^X)(;n;m^Cd=O&3#7{AH@R(CL5i>(29b++1ooPAIsMib^;#}Z9
zwwzn7xL{-H*NJATD!ISwBZ~hk7NMA8AezMwhnBG5TU%4U_);2F<u|iq94_BJNi#Nl
zb@;^!%IsDz8@(tv<(5&=O9XE9yihJlE2i9+hBNi5rj1ZH(|7RgxNK}(JySbHrAl^S
z@^!?+PTf-uM+X|v!JCJK<ekoeRQc004W+v9J3`C-4611{*qP0s0u80fwELlfgiDhe
zWSg`L2nF<M@qh8~JAI4}#K~<XDe@Pwm<Tj{6^g+)`=sM#yG=j9T<Yfb&Uc+%SWp&2
zIcq;o@{lLzBSHys@>`l9(x0h7Rq_vyD-+jGS#OxVrMUZV99M+gSjxS=*g6VR^AgOy
zJDfIM#nOE;Ts_k+N2MmGSHq-;ypz=?rb;<7SP6^cZ5CU?B^7tSS~fKGADfbw``<xZ
zrL1!naO8Iv%q7M_>+<QFn`IhJ=Sa;`jHma>4hN)|h-1PAczT0Y#xSG3<8L43{J>%*
z$1)5V5P`|2`B`C`-f5p$)E>HzH~NYEO;;fz?z;^tIG<{y1a!B9oBi-zFk`f~-F#}g
zi#xb7s3my$KBDBmqwW9V5edd)*V+%#TPthi=b0A*7n);#5VvvX<}Lp|G6<PVyQ!H7
zg@+!7`ahJNg&e2+4c(Mv{aaIx%kkfb@Z>E1K1%R%qt$;Wa^n#2u3gkj=Vmw=MAolc
zuWNE|lp2y?5nI0(#GJw*SFYq8hI2Hf{ql94tI3<*%ZekmAiAWZhQ*vpv?(SVrG;z)
z|78J~BO9CAozUD}DQb=b=bTqDL$dXvd)aqw!+)bgE=5*@#5rjX;WoU55`8aE79Ys<
z4ki3C!NFnI5^M0Ip?cRwqciLV0JU6uUxF^P;GVCQqqPw0GU4j(sueCIzq~0mmrU$1
zmnszP6&|Qcsr4LVM$wwt&x|_R96xv4S}-|rnaV-119g9$BepJI_L{?I3y$+5JSL{`
zg3bJK%5IBl`G`_<H-;AXNQIx8HhE^0@>?qW`zUcM<_`&Iy2<t~Ww@xu)V;&kwq9j_
zRI1(Ol?o-Ee0DG%;m1By)qR7DiyZQVmCNQ=j^RW~!ZeU+mTOFkd>yCl46j%m=rLB{
zPWLC6<Qu0TEm7RMguh6G!~9kr!%+*T?Pt`vS|^4Kk#1N;8fexP8KqTVqUKT3y$kT$
zL9%QDp8n;uf6V6JIoM_V{gh}O@AxYfRk9H>1};48PESrCfjA6E&0caIoIl9%xsDQ|
z^b^empS^McCabQz<bJVzrqs=`vd;5!ekYRS_!#g;ikJG-_zOtET)REQZz|rSJ|tsr
zG>YjI)g-D_JAP?6pdlouiWn84R&Tr)NUmCfRFqKs9vY>2UN%DgXHlx}xu0n!nUiv6
zq8PK?+>p^h1{DFn7qTihZNq+l2KOH`n=-Reh29}$qcZMlngPWQ-G4~$qWry*;4fj&
z$hX@Lbi@bkP|(PjD4ovsVDTP)K(I-AfY;H{Q98w`4$Ne;N3iPa?WxE*5N0NL^6j<K
z+=^wQtw1_5soc5yM#42Qr0s_6&kf8j5U|3d?xf<Yr0fp-4nInKCye6Jzl`S<{r=5#
zQK{>LyjCH-gS~!W2F~E5t~^XMtG}C1)uQIe-dyQ43c5lkrjR6_(s@Sey&DHzk#Y@k
zjiv_OdF>K7yDIaDhT1eNOg2&KsZD?Vl9rh!8f_caq{(;6mWKmeMx;dKhy1^$Ycz~X
zGn+zOmPIDL(_10UoyQ3>uQ-o3+Dw#DP#qpJcAoaV97;W`9ZfaRF`msH7N4-o(|kh2
zf3Ks6>gJ1=v3^{=d<?m_GV$XB(ZF$wl%^@R$tw1lDSlF4?`R`ZDogx|TyUxuh##`-
zsSU+5Z1$_1+!*jtlO3aysxA1i4FZ+ybMtj3%<yiLfSHiE0E(sw2I0Kzd}tjr);cNC
z+?gziIb@rM&b%=}UVFKyZjy3S{`;sMXl=n_H&-3<-tJ(G#yIoiF>T;<?(&z3+ruVg
z;h*rQZ|>!nnhjXKtv8;(TCa8r7}=0>;^5c_BlYIks5|D@U3>?RwGFa;_w87)+=X`W
zG<TEdqW3DpxH!*eg@2d&yZsNRj%)%Wa|_2;{{9u|^Nr<!Vmm7MOmU<3{?&|ZJK1u3
zuKE!-DMBIjI`ie!hJM|re7`Fd{cdL4YGP-#1tq9eonCNPOw5B>kd^4Hh@Qu}n(G<!
zxS6~zZCYJxeGhCi+GWY1w5_M?m6^avkDxT6Gbu=?{aMUJ|6;zj7@nc$as09ewvMQB
zeSe2^T}H1=)&gT!@v3+4q=oJ>cf_53?;tW}hr^RK9m-{i`D^UVo}bX*3_<^Wv@$PK
zU}bK^<su}+>Eykl=fs{M{<j{#&(T%uBz;A%i50^EHrN!}7#8|<eVEhauqiLkuHmsF
z)&7tBXDlYiBtJl^?C#w5f4aV_i8;Jw1x$;2nGLM*cLH6uB0M^k_Xh@|=fV$P&Xb`E
zg<iueQF=6Fa!yuRiHW6N+0uspxyJu*pBkQDYj;u_$L@60ZqN6h=q0X8W+gArIK=qf
z;3T<u@(L{_rWsH$&*i>&4`Hl8@}|EHfc+VShVpp))BW+kkJ1W{P|`sRJXh-8W|urg
z7Q9={u-N51I&L|G<-)0#iSSbj&+E|^7EI&JjWFY$08{=`z7*F(3^4!jGY-mC)+{w0
zTHwR2;<9+Psjs+4g}#^_2U;|d?bhr&*$B^dO7kgGK7W5DkwEiue$^q?E&(haBNBAt
zqs{sJn%y&8`!d|V*01iQ(q&FN_v`delQ{c1J>R5~DsUUQ)IMeZ<OH&SdM{Prg|gn#
zdq{4oKv;J7G(-%B4$9i2m_vWGp+G-d{a|WtJMsEnf6!CZXj_cjq1lN7?KxQ`6kI*}
z!bTm>3i*p_QVXtz?x95bI~`8SnTQP8Ufc7cXuoXk(F(MCI4=N0&OV*o^!NlKluhFw
z^j0-CxrvM#X6&uW8O!J34`0c`hm5GqBqnS%akw*pHfcQgGS(mZJ~|gHzWuVbVsR6v
z-R84Ged30hHkwo(^>Ra2A9hra9!gp7+*Aq;jpy2pR4N%*V1471Z?JbnPSLLeW6L@c
z240EpG#2xBT6-O#6&(1^^LKjAK<V)bppD&L4!rJ}KD=16URH&{4)L3w^Kd1dIWBEr
zQ@(-&F3Y2;_3xv(A9AGP8rO}wGVq!Nw<nsquWa9FssXmG%ZsAWs@~8o41f>qZI1zr
z9il%fkA5l<L`iCEVvGnioF3?FatFR=O2?G5kxF3iX{52CrKU;M%g;ha1fV1-(dKE<
z=!&yx9vm%DeRS5SX0z?YvpY*eqcVPrto$zAE<-_(-u#>GVy6S*7m;uaHtI7(jLb3%
zag9+%+NQsN_jS!#EpP;$F40-|nf~k&m-;n><rsVDYcFeeFH5q29oYGMJ3+3VdpkCf
zeVC5InR}v8gx}$kSz}i2E#l9_&-z0iiI3o0^LW3jT-fp~U?mMuDXl&zq!}N-l{s$o
zU2k}RU!R>)&)uf{!hy9q2W~?$n5wZwkCeV$zqEKR+Tu5U(qD(j5IgER`S(!|s%^B(
zPP*5%)ucsRcAh&U_i24Pt+LwefwqsxB#Ae_+BAPP4Zu)$Mq`d|G}RvP7<yjz!Mk7R
z<B)F4n2ck>69Z^!&(yTHveTYq<(JH?Zl8H8BQ1tFoF{bjtdTLKTs&}l{ngB8R{>aN
zpa=$W_v_ow3(U)Zg5-wys38ERve$E(tvLEflu&KapNXgS$SijQ;4rm5I(Lmq^KHt<
ztv-oidn`p3vv5eo3^g)U1R=uGTI?F^Yx@yVB?wH?`Pw?g@{9ft+;K>s%f+UM&XQCm
zEOm_mE0^&zZWn6(j^Q%tq<DQLr#ku|7`GQXR&hz9o^u<#ms#dWBtr4E;~{Y(90?=L
z&XRpga;26l2ceI->&#!7eZo4`ypmil41{<Uoz{p<Anr0~13FNLy4=)N&KbbU$Bv-0
zCO-)XfKcC_)jz7P{jdL>qns(jiTN^?aq#f*hEs^dHC2SccP)oSsR6+uyXhF~#|<1p
zl1`JP^L+zf<3p4TW{uDBCAguwT`ZEC3@x7;V%3P0yjG<%ar*e5(1i~9_GbPy3VQr^
z>wX{0vh(i!aq1!K_wAOew2KvDeHTz82<3P(lJ;N@9P8hkA+2`CWXh6sc9l+u+Lck5
z`r-kdQt&>Zu8QiJhQ7M$uzRRJtNn}xJ#1%w0k$)Les2EY|Jj+Z^?!EeS5p4po%sO)
z)>BW@|2t&<uN?7s`!J(*`c<V}yU8{q=bKyG5Ic-Ba<olelB&a~x*w342h9xm-Sa8U
zM%7rz<b%P@PZA{oYCs=f^6P!Y8^M#~i8f+REr(Y_+pfL!Wky4tk%jw9g$DKWHgiAw
zKV)Wk4bk$Q9=~Ozq!`zo;ZuRKq}U0E+C2(D#~kOgZ#chsrc&0Ls_0-WQA(;p<Avks
zLiHf2<@OA__l2F83T?V>n^%jaha?&*=~IcoO}V}O{oE*Hhk)@(3GPI_u~q{4*q%la
z7$QQlfFBs_=jts%HsnTjU@RJEB_LFx81v@sx8t=SW+H>gzF>mS6zfp2u%x@kqm2dJ
z=lQuS=p9ArmR6Bz{c^FCY4D{G)5I4em*gX8jsyTiZ<^kzbKaHhR#sMnddE>YfXbjE
zi#npC4f+Yc-PYD+ty=opZ49=n9EohsZRmA2maI+ADC=3wonwG)U)&00R2aB4`294%
z#Fg*9`O~CWQ~T2+7;j2^e=x0iG-1-X(s(E5?Q~%}2g_0ex2En3Txy?qovHno>i~9N
zf(739M^w!Z6Ao|Y0LgY0CrQ%Ozr+X##&hV(cWmdyw^3}=H75CX^6kdeO~}6aeLHA$
zx3{wFnjZFCuZJ(DuypOKz?cZU{iKD@6pR}F+$VS6FVjLG6Q&7cV%qM*v_6oW`GopP
zVd}W`8ODO)+1Qzv5~pJsf?OuXnXiIz3&K-y_&VO3e7-OxU51(t)HlwLX@H)m{K}wq
zQHh!F5uW`1Lwm!%Fh^W4F#AJO{i_9zlN7?ZwD>~4lvUM$P&WMPyU<;>C0e!2&_=dV
zzzr~%u5ohFdA2weV?UEpdssz;N}0#TrUHh6FHTsM+O7*;46tciOJ+Zgzv*xBJFY1(
zDh7DN&e0Fa#e%WHp7!vXWPNhdQtwJh_g8ziY`$))KXHZD-x86q??&LqoFv@=9<Mb?
zrHs+FhIvYCj~34^o=7hfAuopRBP$vXU-roDLYF$iWnos#z5Wa58^obvG?IgMgEm>B
z6|L$Y`j0K1yX^4fZdYrR)Xhn7w=~Yge^WA~m|Q=i79A(otPyV*>>pN9z$^?7^DH5e
zHP-L)d2&98v=0ht%=Q=ZKe^lCN%>`}DW`4k9r7Eq&}POgMsJ|Ejo=#37+M2Uq)`G=
zA_Ysdj;Pk;;`~{*b$NZ8BMh?9Qh|@&n^l||uU`eYW*riiiBac+?Y1+0t*ptb*aJoD
zB|sskFmg^2dzYEVVVmYv4fx{J{XDT+_H0FSGSrjL1_({Z^H%yyqWFwzE(q0jO>sp$
z69V~kW9Kt>sJ!=|>UfvTNhQa)zduAK83}?bv^6&Rl(lmy#tt8+js2ohlAue>)OOO0
z8Ee4|9tODy7+9aw2q1K$xo&+6T24`ulC$FUfA(MfW$hZGyt=)8)O)|Z6y%CQX84(z
z?jpFqGw-@EZgSVnSKmGH^fyh+@K`Oep7Wt!Y7rwFgEr_2CI(sK@Z0rCCF3N`)2fJv
zIs(5leiPodIzu-v)e)1HX9M*;=BBg6hH=H>RRs@q<KSo8gNh7$KM5?Yb_@AcZEa{u
zyl#JAU*NBPztu0e(TZQO^T}Al<Vcx|7Fg=Kdd+3{4dsCXnJNv*$7>yxRd|*vHdX{;
zeBg?|m(L(iwkgheK6;Kl@Qy7RDtIGr>OJ>I*K4w8&8L<Y8`Jao7fju4<Ls&F8J{+k
zC?8R!08=R=;pK_IV+J=8wC9e?qb{o9LHne`v1Z1bJ8iIt_yKT<li4ORVH!kJV35<^
znO@dA0yMJD!DLJ7eX{o;BHc!tk~|%R$Q6}}l;S4S4i^WOCEUDJt|6Q<6@Z<;Ma;#g
z<;hDqVY40df#;tis>U?`SMl9V{XxqT!&A`}G4a16HJZmxe?M1K_$C;aK$o8><D{9s
zJ+Xgr*@9vKQ=B=EIO~nSRBvwI8jI-6zU+D*Lph#zuO1POm--(AL3fwmT4TJ0m9Kbx
zF4I=L)2{oxg|qSuLq0cuujv^O6(iH)M-(o#Xr?NtlMKlMPQGScpZ8-uXjA`x77we7
zqC?*3?BZFKFPBQr89>4sgtZS|F^=BAz#x3sSA%ec;epiN$}R`s9G^;QNY0+s*)06X
z9(XfBMVR3bG50`+y7&vh*yko8h<A}y<BiIzRm6khX8d%-$V!xRHqJnQwN1)ZV4_X}
z)cVo}XLKwNFeYKC6pHjO_TJCi{S=3kEK^(*N+{)7GPiY#PmIY-u4ze)!Af(%vU|t)
zJ&FJ#?G>VmbNz*Xp1gD(W6qaqnpw9W3mC3z*}7^kv8xj56D_K#ujIc%c~7%=O$hr%
zUd$w=LA0cw#p6wzGJg$(0O8rfd|z#Fns$yh*3lmWbrSX@GbTlGA9kzi=e;E=1HYCw
z*9&~n{Ys){sg(tu9UG4`(yY_x4{25%PQ|9gjh`twq`nEpS0`%2*1<5!D@j<*#${_R
zq%js?3I;k8ygmxiJ9Yk~H)6-h_oqtFr}BHupXjgh!rr^_pL_22mlq8&+A~zB_aL&A
z6+Ka?G|44a>8AD_53z<uqFD-{y(CxB*G<=<*M5kUxID<BMf*el>F3jhs6qFn<=}Z@
zDs?~XtGJifLXst}v>!KvuEMzCd)p;ibDMRS1IPzD754d3Truu@szEv#^l|%P7SH`x
zXvm8I;PBFkoetEeiJVP#zu9Lz3k-wQx(H%7>-7g7wh3gVh;!5g9amYG`qGh9y=1|_
zlj`mK!fmq#*x-Si?UK<-pvI~zp(Fpyp}A8W>W6}HagimfIfhnAmS+`7LO@>?Ps8>l
zAuOFdhVH{TAXo-p^FG7x?_6LuO3eAFZ#y^R!r!`}TxhCsDbFyiv$;W6E6r%|QMza4
zCX+hni!{b=fWp%&Smcn+Y(MIq1V-WV>Ye9ZY@8WvmmBcSWCfDzsVw7c_H7qFuYsUt
z2V7VTzTa)KA&JuR%ztYuWx|}ve%B!}`xsK#`b2TIA*NLe&q)*3@Yo^cPJAIIB2og~
z!PE8`3y(rRqj^FJl(8EoI?hpetylL<6S7<AV7z3C)V#LiSkbk935_jU*!bPnq<%Pe
zO7G)JFhR&FC*C99!=hn6CN%wedQN|i+?wL5w)L~?*f*V6MKMC5*7l6+hi#e^Y{~Mt
zx^FvAu90D(yA^`DddM?G7|jXS^1CxXZ_9FI9S~Py*LyW#h`-U?6$35pKN!Hj>1ZXF
zKO=aIp595QlbN`kgO=EER`&JP_3Ku0Z*J*|5@`+%D|3ZOizgdob1)5dVCA>sAMM_(
zSG05^oQN@hZI<RI11Bt85*;V?@=RT0P+h%7hP}N&z~`X9^xbL3%8VqUqdy_S9xhCq
z0-^M^PY9-hGW;ICyQQ@ZrDK=7;|k||8QZ|#2Z=nx<nNrGRr!h;RO6hkE3}G}@B5=b
zQ7-}|84TUZ+9)nJa5{Hi&25-T2npr^$8Q5a(phHwaN+|uZoSHJwsgu4?@jw9m*um-
z&m1-ii5l6IGLJZm3G9%#ew<vH_qfFMR-=PnZ$8cO;dy`!x|e4qwzj_=vsI*TBo!>?
z+M7+9K%Mm>!&6PyV|(S#Bm)k%P*47j4@d0g(V7~n0XX3w2N!g^M(O<~a4bOV9Y&co
ztIF!*GW0Q>dEr~p0!$kmZ8jDP(}D3MYgN*>x4DnRk52#NV6(w^hw8ub62?TGZTqm*
z%7Jk(3wEF7!9lTIPWrjbHXpHjGC0+d%)YC<fcYFCu%9&h&&Ok)&lBRqH3hDT;qz&(
zLI_4ChP4u2W3p~K{VM*H93MSCd=nKccc?6Mpk0zgHd~}(tUxv77j0nPEh^`B86o7m
z#l2aD>XmMh45+$D)i<_sdwoJCyPh?i|LW7ZOca}NDwj<z=oC^PcF!E`t}bKooDOk=
zjn+JtY`1Pv*50FGA+*~<)x+fsO(5H&A3}Srd!OPjR7lgzsZ<2U2^pXztEl=l79Ya+
zbxNL(Hay9PYD()9lSxxcl;whtg^8-36;nH#uCg~bQB_2n%&9!d&IEQAXNeok24T9d
zg6N<sOS{Ls<$<?JA04SLGZl*2u`#vduOp7s%rxN*nn+3MuL#iwC1x%%D?{Tdze(wC
zVwxEf-s5XW%hvpZ^@U^#?|ktvkAEK-Ex>k62B&j41D7l~;}*8|ZriN%5oeydV5Fx9
znsr5?N8DqzU5kIxl{(wn`e_$ho42N-_DSjTveGqPmiv*AzATtrVmCNtf`{LAn$Fr~
ze$;4WnZcG3LCR32TD(C6fqqrW8Yp1CETBMpJdr%=`1ess%XbsSBZ8|&w$?a3+2&|R
z)>_^fw!I}_T=v!II{j-;5r$SSZ7sb|^NNM>1vk@gc<}b2zj3m+-H=-$%?R`&ASz&=
z;7qRyn9*oe>$Kn_*Tq5h!;S!kN62)FT{T7nRDd;=RYv>g*G_Mi^Rs{8?^AM83iq~`
z2*%<1^&0?k2KvO^9^#-(73136OO2X(`<6nfd<2KW!T2WYQYgyDSJ)iztK`Q(b8+5k
z!M-1T_2`KQ(>-*^mU@ZR=B4_=N;QhGQS#~r>r=S@eWZ71EH(BZ^1JEg_>N9+<?k%+
zA~wbQ2eD_e(phvraP+DcVtKe#sh{*<S-kHR3YJHlfFHbl=Z*xb{tj`)z_Nox8)9*8
zW9A{xf<uUO2IEqGbHDpvNfQ72MV@mm=j2ldJJR$Qb9recPalrMFUiXCGsA~XfO0Y}
zMrj?ATFQ*P3+>i1@?3VWDub$0DuT2!mu1MOTJS}Nf|SOfQ58Okbu+EE(Dv%2={E>X
zqQ(Nv3N9?uPlRXzjhbG!UBgHHHE2ui+}!)231#L#1FO1I7rYJ2p9MfGs*Z<QH_-fn
zX)27xpn}ja#SLBuS&eg0FIF!iOu+6afc^H+C>w+yR4Vf(8&F@MZnFJOf-y%|=uqVi
zc{;~%D-ZpGzNd=!lQ$hIq?fJ}4rn~mUL#t%*N0T!WK*60g#3f$Eob+Pw|*qbAzU;~
z^;3x%R9cJ4IWbHTQnOSR;y`I>=?b%W*vY1dKXvghAj;0;OslO1hMyKbbmkx2MrrbW
zJJ*f;1zQVM-ZLDAj~Oa)x@TeD?21Vv`Fh--ffrnamYEr1Gfy2Ln);l!vbB)+ChH4H
zA20Mx13%7b0)ICN(H4Y;JHAV+H*iMSkxjpu*L{AJUC6R52}-?;9$*;1-6gV0eMu^m
zXktI_ZB74BF=Zs*7S-!(J_&Dd`b)!e_u6OgT+WV;(9|N!>x?M5YL36Vj6$*VLL&d{
z&T}7nYk`wgwIe~QTwCl`x@)SU)z~yVf~ubZ^@=|4TH$}eHT_pJCpOZVDZ^aYqsTc>
zx4)MmlD)&jWu1O@S#ajtT=kryOP)5O3GceE-=6C7M83+HroTbgq`Fv7&Aw=*Y8)A7
zzih=~mC5qZ(qB%f*vJFjg2m#+<~atiTfRn+r8T@UYGlArH3Gv7@bzC!{kjf!=_O{!
zRS)~~0)nM}eukxt|5ofo0IVW|cp^X{ZJ4;>(j`jUovEB@B}ZAqZ0!bRImH(PXC1(b
zMfS9zYT#F!t-)Xk?>!pTt*~*uKv9%4OZrq3b#9bu0k0brJNwR}W7b42%R=+@$E#4~
z@;!;?4v=U3)oKu87Cfj0tnbN;sn7$VjV$a*9lH?q0Wl#YO`2@iII^Lqm}QBaI~k%Z
z>Y2Lb*!iigH<e+Q<qbP2z!PfG*C+EZ>EPoEdhPVLvs7m%nZ>W+OCfvXY(USaf))m)
z<{*bzc^5<5_3&TxYBlxCL9d&!Ud{q1x!OU3R2@n0RpDr=qK9hI+#5ycj(VzvBSnmW
zZILO;g#b$Y86YzgmAE1Eh^}Y4w$7$VHNK0Dm{=cl$`~+=)5fa1pAde^{miIOtX*@T
zte<Z*Q~PE0+}E>l_i4MYLz&M3B!kM<)>uu%$2;E2r8%(6$8g<LP03iJl{v#Nv7;)7
z6HVO=KOR~#3KVYH4cBokYkpo+sL-ysqb?tvod-))2*t(V1Yq>`^8)q^-%t+$iYGXQ
zM2LY4Z}*y{KRHkgT+lKs$>Cd(s%Dk&)kCc*(H}t>1UN;A4BMk71b2lChLy9{Yiq#*
zBY2Z55JlvgQy=fqejYE7t*f8t(EBtQkUj_j62Hm*sc2#$VT%rvn>~42b0BJ}eDhE4
zluc@Q3x@uX2zNjj=wtky$z@$+Wa#3lNx3xV*T5lNf_Qrh;owiI-^PV{q?4&m)3pkJ
za$GnvTRNr>1hh-1@DP8(A)HDGv+K=Xu8z}*26?HLTa}vF1Acn$(5gN8w9}fGM~yI6
zadMg-L>@1RH2rYS2@=Y~;YZ{Nq<O@j`v^GdWTrUW_(0#ne-E?N4FzqK3G>tBt*bi$
zq3wZ)^hyxA3q%}yxCUKjC(X4wi53-g0!T|ZUJpK_WLTArwN4jLE93Kot^@iW1^hEn
zQBq;|SvlP%O|>f@m7{546{j@{lJB!AdFAZq7#&)amyOIYx~Kb+R-dxbiLH{N$IrJ4
zXLt%EXXa~+_c6$X2}hRGB!w!SCB;TL)v7pq^gF9rj>A^vZ5H-7gR$E77>bL>GqdNk
zvCXrX=7okPSfepCFQwX5Si)NkTGy;4)CPtO@d8-!_kh~9PUB3!2Gh}6K!!J+Sv-87
zFRnOf=&ZWeI%*x$Ri>WPeEav&YhR87xqLdpTv9FJ!05py_iW1rFP<PGb^G=#KwbPg
z=ABd?uenBJgYqxYJ&4A)pUoi+#nDctPE-iu_S|r4Au~CL9br{N_yc$ZqPkC!t<j?x
zf%g8INkPbtiv>uhU+K#&utu(D<2df-g>K;(!JSHJ-jWHg@VK6`*=~A{+?J<iTyj%g
z&7v@!Yhmv&>M1d2bm7#3gu&*eUJWLYz|Uu~n7DB(-KeROEVY}#H%5V7it-(py;aQT
zv`}l%BdF9Q7lXys<7C)mu1>-ySM4fP5fy2#%T&obtBDG#)<bxe=|`&OT&?ac(qZPr
z*n|+mAEF9;{3e-zAA|tKNzqV4@jynzg<2D4f*2S*BzM6Or(e+?GAiWJ>Pp-4CC;L)
zw8x}bl0>7Ag)qrnU4sTx@M)*A@r$$TnDU-6sA6m=s5fV<ntk#Thfe(y*@&dzNR>bf
zX^2w2&!oa)X$a*Dt;|WRJO*vws(i`8haimm`Cj8zr#+j5AZb?ziCvUHU+2G%GJLZJ
zOOp(3heM;?Bg^WS)xp66NhY8FzRC0dI8&$gu5pyM;A6P&yQ=zfC!if!<(n;C@F-;s
z%bFx^>hm~7uJNz9G}a^>l!Dqk-iP#_pHJ(k2<1K+X}AA$bXCCxDV+@pRufTe<xxAT
zDa%&l(!b050_ErL3A(SI?iyNkkFF+_Y6D!^Pl?5$i>IkSp1@r?tZTqj$uO`Yh*z+6
zso{hvG-E4BaZ<f+fbxr3c9EezZs7Y;E!MXGnSH8>iYh1nLjDRp!*TCdTOB#Z1{9?i
z;y)xl>D9bDGf^s}KYMkE++(S95BsB|y2Ty`wg2#VtAfg0ZC_-JS*WvYCg)b|Qz?ZM
zq(qZrT45|})n?H$wcmqJ^)0|L3ir*wkMOtnBsPC`NO;D7zQ*cn$eC^HHJF}P!&(fX
zL~u7QJ%p!PExJ0FDLRa4CNneA|6K*uyKKb_{8Eo>ym<0SiXViw?j>56_<Z}Bdl6>G
zX1ZOkroW(m$j(`&`Ap4;>9fY=I9$Xm3IpoX#!>=fpttKE9-mHtW;$me)s~=M;@q0m
z<xcV9fgGR}aRt?plKkO3JHuu?2^oQYP_bSNi*2H6McM?tcY*Xe1tBlxw{sota#VRR
zXN8s92E_NhO1xcj+51LfBN@)k`$89P%aly6xW!g|PTq)4j%<zPb^E0PjiVZ)T-`%L
z*xyoTU}&9W)Y*#c-CnK<Dwu9Bl=TAJz#1Yfvy0^dxeW|W`V(A0{+spa;*$KASg9b~
z#Z>#vhoN*k(WGdUhF>Y)msm%g9>$FsxdTZ~#5^leG%C791CF|udL~`&LecWGevj4;
z0`pm}Pxj@m7UHSLoYu^30nYOnqV(X35=lclEyFnO7FJmRk1-zO(y}f9Qr-WhySj<>
z2g1muZ|8bHa*PIpCR#HNtvuqu!($7Ly`qDHZQz(yvT|-t0sM>8@+K7u4qN_gcDy@2
ztS@XpX;rlSL+Z423NvieH{n_u`a`xuQOU${M8zJFQ{A95tWc$Eh;06u%9lDA?Y<Dd
z{!WrZz8RB)mROW;F4VfX*S>0j1XC(j{+wo9R-}AtY<t(F!DS%qamAfmny(A+3bn+F
zP5x){-}isoDQ`35h2-dK^_%gr?Rk?6i>^T@FfnCAT=jGD2D#m=yfF%~EgD13*DItl
zY%W=IgY|nfOZ(b;qlMA!E*;JvXS2eG7rWLjfYl40MA^J)J&9H)QZ)3@_usppN{Vz{
z2AO{?Xv55!e>}2w)R{G^aHzfD7y-V6pjQXg628P*>oTd_^XQ-0)(APIeX`C^pJ+D3
zXZ(}a6?sK2iH#mJOLip$5KPdK{RD+7(0%s|YrH?VQlYL>irTu4$I?9H$o!LRtxp?C
zt3yh*;`WfgCIj_{3m$>bH?Ftj@-CFAv&Zx1+Diu<s82=pGY=qj<Vg8+g_k@ciCyT`
z#mPbweSP>~>YY;j?SCXi5I%6%BV4DBHJD+Go=F}&k+Z3pq7t8Yk6$OQUeh54oWwXS
zCO64GF_?<h26(32+EYBnOn|CeXFQLs8XKRd*A&~B2HjoX${D>X?cKj;ke>}WDZzXG
z0^l1?OfN}4WPOMC_4YrvGVi1YkT`p|C%9Q{EG@5<96LHu<i+;QtR=j^41gESAo}Zg
zJqwgFATeQne<H#gnz~xVbb{l4Z}bs-{oRL$Wifk9)f(9{1a`{rbjI*tn=q^iFqNgC
z3KT&p8SF*reSH4)Z46cS;Gmz~sqteK@`EUgm}#%Wu_LC~$cJCVI@rYKndgic{{>*v
z-SLk=Y@-s5yUZH5@8k)FNrmsKZ5Y(=VE~%kyPKp{{R5~Lii-mJ)aLR7Zo;S+D`Iv`
zo+bYG)aZZbh(w9lZrh<#UhUpp5ZA67JCElwzL01`Hme6l@X~nH*4|xKtN8uTN%C<$
zFQYfk8Lu==Ns~c;&@#!rKWk~#`|r)x!dKM<BX?}woZXxAjekKFDqItI{{E~bogZlg
znKu2<I+kF&R91GH57ct2XV~qInDIDXd%O3xZxBhm^WPrgdAXVP-S^?d^wXcP=I#t%
z1*$z$kzZ6Sn3ju&vf%wYlem}Xc94?AqugGaIGd<=<PbO0Jgd24X9AP8tT9c>D8x{f
zw5yt*=t*_^y6@Kb?AzhSb|ar-Dsz8zxM-ZNM}B*asbJ!K#6tQ}Zf;m<b6eY-26^j(
zJ~ybMq`K}XyE8tZxLsl3+ZRJPlA}d~R}A_69U<ki6nOow*KXd*hx~^Md0iS8Dz)}0
zW1e5>wkoD>EMsprH|qU#IUQi^xbMR@au``o_PF)c*HWMJ1^@hjWl)xs-R;!PJ~U--
zE;?%8)UPph$;7MwU@S`fW~+6L#_?_uRFttt%Gm&nY<v}IQu)^JzTLDn8a^kK;<1SJ
z8WlGRej0aF<AIJdgexLd%3U(S)%m%Ls_w$9^#iH^Czx58kWySqMyy>t!-^RF4m8Mr
z$}KfwLU+=NgXW-rV|CT46Jsoqx$o<0>`GN{98O%}aCpO@*z7CJK9mEi&*wGlP<tgW
z*=-cEG;F;w%p8f8?psKEBPaKS!!xts-$!JotIb9Z5(K~Y)?G)lkJzUkl4M(*703^2
z713`+U`h*?S<*n2%{qLzr=J%TIZ#>@)5u4x&GfSTK1rB2_m$O5+kT}Wj@|AddAE<w
z`QkGbGiTASCA+(G3mfUmaSnQOzDSKyqb`&sD9h(2u9<S{xu7NFlYFx&BTu<&MD?;5
z1at0)b3_Whc@RaOti#6A$-NGzIGGne7|H}>?N!yCTH8yZ0AuAZhP_U42e7>9br(sG
z7Eo`36bz3zj2HfPOtT^umuQ`^O+Xd@KI->$w>I^bjez)dCGphTsh2E>d*qS?5~Fq9
zBfb`gGVn~CD*OnjR5)IK-{C##t%r7rHswQFf`b{XrfX>rXrfmq(g04kEuW?xMYW&5
zc2{8z+iW}6<Cj-?73)p(DC)Q!a+813J-4V|5|pth9^{kJaIiF|Jx;KCqbI*WhZR?c
zZG}9qA@Jn}cN<_P5-qH4xd^6>pNB(SF2rf--Zps^Q4T17AAq;5w8lX%r2@TtAssOz
zQ)cc{(<NLc<GNGla|25SPC4@<&ikeU4JFJuV>9MC4143xsJQ94##Vb+(_Stqyn1EP
zPAiNu6vm<VLR@b0?EUm;I|C=6jJEUddGm6ShC&Huc3{($pFpM%CuPmOO7;VKm1D_Z
zzW9(Iq%rNGOuVn!FWT$`X}k7W_LrRM$M&wWIppi-+g_zA*7k9pjuqLt>9pD`a-}w@
zE887;zAnk9YyrwbH%UnV%4C^h)9rIc+j63N-oDP!e{I#$x~<lI+3k|F&6-PxEt?ov
z4As8D%cXkSqRN{bzS0p*mc_BmF`<8Ns?#ec)1`-o#|%@KKWhzGGIvSY5wW%_*go7_
zZs939(s0ujGK}~~*+}9a%b;?QjS-)c;H0psi~T~_bHcrln7zL$_Yr=3bpc0s(C@hC
ziA8(d<1X9%I!*@?tQu3;UJNkwJoe>?{({n+J0fJaYz?hXDObOd&O;p_%8r-%to#ou
z`<Ny~u}Qa0<CN@A>Kye;h>CX|s|8b|+I(S<s4-wid3bOJ={p1j(@(gT9>fy~c-5Tw
z-xNVBY$7yzUUDo3>W|il_y?VYRf}SaazYYGpTHt2?i53vv0E>*_eja8wl$FOwj|p+
zmhE!g{i(BsC)}yDw$_U(szNUHq<m3$7PFEvUXR{VOGcnWTjICxW*-~UQj4BB`(|RD
z+&=g6a>=Bl>rP($<;-&2>&xfxoN3ekIyOjFOiXeH)Ndp*yi$0mSq#>(zp>S)5@rGW
zA17y$uj)5(u@F!rt+68<o35pxeQD2s4jmqjDODa)t}Jo?&@lV1SOAxrt<!~g<ljfR
z_7RG?F4zU*2lspUO(u**?|+2kW^aIIs3u(o>OB*1t|_^3ZcD4ZUW5WTH{ZmhBzqc?
z(^B?Y=_+RfRkCDT5|ETQ&3YpWlPwcgPfq#v-|+u`>G$_nGOm4CX^L+m8Q1>LJF-ht
zCl>_VL=jCvP~%)YS9g`%7T)K}K)o@oL^DG+7Zk&moK=zS`=Dg(Uq<x5P9v`!rxw4X
z^WJwgtmX87I}fQ`Qr+Wan#l3hcdp-kohi$CWJhF66%YTj3$$!mByD|WFt~B8nl*-c
zT=3(+J%U5oO^!C$n%X_VG_Mr+e)-0g*YJ#}-kiZMGvBox9x#)Wz+TovAM4%@zf@t;
z3V(>=S5G+LBe<J=IXx*U;J*0m=FRod?`JOcdagXi;Ov6{E#9f_@|)pC@=rrUhtyrn
z0#>B;Rjxp0$*(plALto#8ZLMw!hgze+`KwvZ{Z427K4{zf^S;I^?y2Wwl}qJ12xgm
z=qoE%weXqb8v9~)j1o;colv!2NZ-xN-taCu)7ly=p{2EuePqHUe<ZH|b(#P3t%qj=
zk_tl0(~U0uB)9G^Zf48h#eB;UFI%-`nqW>Zney`Wc)qFl;7i;bJovk`AebJ8Q(nVq
zp0qekckMnUIl0^9?CPvJE#AmP-HMry&W7sxlB)Se9>h8f;)Uf1nK__*z&ha|nrCE5
zumvVy78Arb!#+i+Nl&h*0^%D}Mdp4Zivc~;V(psRs@|M6t*oBjYZ)GPbHv=vHkOH1
zS8^TeyRbiK&8I%o6Rh-IFsY#t0Z;BawTcars#SU;QZwn}Zr6*P|AJ7ViHAb#ll|_B
zyEFx(OJY?`M}zw`-6fnkZmc;V5FrPzCB~&nw?c)H`E^e&n7Bf~bXYxx{!wpxpaN%$
zuyOe3`<naWQ=_1=@y_>>9sVK)ysTrw2S4e|4Cb|BUKwMPUrU`D_XWZ5)Im6Ika{4a
z=*Q}gQPo0|5~R|zzt^>=NrU++jjl0n#<?9M>(cVh0g1hLV`c^w4uV;=`sO)&Z2s)B
z1VmCty|ycTjQTHmel;r&mu9~e;V(Qr%pX3vP(E!OZ4axx2xK>Z*T$-9a1fyF#$Nnx
z+t$fw8XU-@tX47x2G}_VTP7rAe6M2zd)|oKg%HEQMe@e;9wl@m?j`4W_6a@;97`*6
zxV7M#kDA~`9~W0QTSXLAzah*f?7R3~{ie#JWk)#bdHAP9a<T3p6KrX;pka%^RXCN9
zB)&;R>Oj4}k>_vi4MnjV9KX8io3chy;7kg8F+*#LRz`!Hr_-21d5SGV=<B36vBNSv
z3W$N2#KUHnu^avy_YEq0Mleo%oa)jp%5<qQ66S?&>NlN#Q%K~Po7g_PYe^8nFeFT+
zl*kDgrrt7I^-=yE>EcZ~;mnYwLPsg70r);Bg0Jo=Lb*~iP6I4!_y4G6J~VgHk=SNd
zx;#F4l9Ry_B00axII}`U{(n*TmQigsU%Y52HQE9N3KTC|q(Ff}kOD34?q1v}UL?@B
zw79#c#VxoLZ}AY^f(42f*C0uN^St-2`|bSC`Jemct~(!+weqZIW@l#4-ZNW%+w$jo
z84`g46S4PX6%j;<TsbcaC24~++RQIUkC7jb5zC8y)!w7V<7spCOQBm_55!;kr|oiy
zhjv?FWTKq3_Vp@e4d$Gx=4AM_+)w2mf}N%MDmLH3OgNuUbpJAK)x=r7>2=&F5Q?s+
z6X;yU(qF?_5IE=aK#9|XKv<RaEFY(kbc-J1O-jxD1KZ@JyYy5Gsl!vYrP>5JvmJJl
zv}TJNs*ur%MQju1-jr7k9z=4mMLNfs`p|Xy)kgq-JFIB(RgDZxr+jbH;<zU;#0a<0
zgOX=oq1gV(H^K3R)o(LW88Mn($@7QEsoBu0={@o)KR}<yU|vw!g*8TP)P9U2=xg*$
z(xJf`k(tb5spiW{wYKP<tot^JCLR6jih6_h5^sb$^sh+$3o8SSf(bc1nk;waE=0XU
z8!Af#5{*tB+NU*1wVDW%Y}#pS8#%B0)c+n5vm0v{J}Q9LuvMi96zxquytm;r0*%|x
z-;nMZnJ$`d^qlvNy}p=c*?!4Soi~a9lE_I)G_vNo(bTgY%}Fu@g|Qht!K?PmW5%zD
zLpJ_Ei~D<R0W<S%%~xyY==-08t&^r{xJ9Np4`C^aW!CeTl?EGSwHfXYMCV#-nG8S4
z9!2(C#w@aaDM<`ttcfSJ6FKVx&;a`PM{hopl*e5J3wi|$@2XDspL`UcUHjzl!5zj@
z{zegHcYmWmu;aH$^~N{)VS}QPcPV0lZ~r0ET{GJ7R<el-Nsub}_^;OD%txeY9k{Aa
zzD2rE2vLy3_v4muQF$)01zbmsEXz(Vbu|*pt=)O^fE|C&6fS!U63VJqlIO0&!fA`H
zzjK=aTH!8UrMpQsWs*zE`V%Z(NP55p?O3w|ou$#~*MfY8Num1l0loFYV*V}1?W{Al
zk7E_pJz*;F4V4PuVDtHZuUGzedgs4<{+4H~T%(G5)KaUl+@NW`V@0+}w^5p-`5qHv
zu13XEQWEg}+LM8{I6Z!DAO!A<@JIcJRL^RyR-~*u_8u4)mKt>$J6s!9y@_JCM+wRz
zljlQ=<0_T8#m%iMxl;aKMrtdF^b>FBr$@M%*w4nG=q5udQy&GE1!ke*;h+EaG>TGD
zH&|LK3E3Jo<8$y4xp~P{M9Tzhv*%kc;W7nJQ+HY{-GcYg3cc5z^$!9;_{N>QORoY`
ze1mmQ4vsfnTesT5((B5!6o8>YJgj1a)wuz(+V?&Q?i5&1=ZJ2R*doPMyI;yi^^Dv9
zL&C?uxXG6)Y<3HJ`|beKkuc+Zkl3kno_Y~pR&7`BLrz`csH-&Rq`rJQG3FgzR>Wk)
zX%gSe@iF0<h{_Mmcg0;Cp*A2qWGNX%eUf2HzSVTP^)!M)t4zSvk1aYk=N`R)<0X7!
z(x-CDIU_|S?DsQ%8vvjsjsBn6e<xzsq+3Qjn1S<zQ*g0BeF(y-h@dF%jJ~P`qGi-S
zmC1iZY-;sAD(>ckZ1fjFKH}HnZ5n8819s2~z7)|RFYIC#@5|@!W$S_BVQZ`srix(4
ztmzZWEa6e0xI^DNe+ys;T08omBa*GrYuG=G#4$Hq(Z{G>f6161jriBaG=)|ND%3#H
zk>Q=ViswyxPNChpy1PIT8{>YP$eA@ko=5v`;X1Da*PDfuG_wAHK+O6Y=+GbQ)VUok
zuKGzs-_LD#r|0if6b2FcZE^0Y{uV#q;H#Sf5-2lzV60eL(Gd?F0_<GhJ4FqE5^_zA
z1r+tgFx$p>qR08<N%G4$apT&uF(*ChLIr7GWdQ3nsF!7MIb3E*tOlk5@RotCtjvVl
zm(s6~v#}=>l|Uc=ZNt~P42GZkA8y<kUc)y%Dukd<c*}eaxqt0H9yJ`)DdRuERHEIA
zC?@%cO*Ue_37R`uvA&mQu}_;)nL-pLq>4Zg7HnG?3Zrj5kKfidg_pL--tm3|rfP{4
z+cDX2j1WtyDU(!449nbZ_loK-5g7D=dL*bkf4=;*Q%L0&w6`=?a!sg>8=-OIe!b@J
zyd!qa6grDkb`|1&{%MoBxCO=)C~{6F*ALsB(PI+!)Z*$Qdqp98@WeR~KgHaknx+;s
zjX<#4ow&yDmV$G+8tEj6lk|{h&_Y?I+BI@$LwgbGL|;9&2%SG<7ITauA%A=esxKM6
zEAcw5N#Z2=El>_gX>@~<VJ`VWu396xgm0W?)$+}CRHRK$NvF4Hq0k3ThF{u><16QQ
zLHB;`O@R?2s<iPLT{r9Py{}!2@Rd7?N^h*x{5|tcJSj%<t2Y}i`j(7y(_25R4IO0I
zZ}<>?#bo3sp}cn&5*CGiP`?cuYC$Nj_l^$JB5=~y+%#Qn;9TJ-pytdiC_AUN)4oEu
z3W$~;bNeB*k$MYaew!Rqet4L9L$@n9hmRZbuL&A03$%7W8a{%A4(9%8aXE{f^33-Y
zgHY_Z5-xTW&z5ZWk02iTR5a!_5PWe$x++>bpSSqz!-{M@S1_ElaAjz$H0uG6v|?vn
zu<+<<UDYk<(k*%v_Xe-tEuzRwKL3h>51K+{e>GAoZ(-U|%JnQaQ#PcGDfW4}@+HCF
zk^z5?AbjnLjLviGCF}JO-3zN(s|VjtQV|1p!>G;YE_+ksLiSiIo)K3U+GtN9UOU~%
z8Duox>R0!XQ*k;C^!KA@GtY3&n-w`u7B34>(U?}=TOJX>?E!!KhGtFR7DNbgjY6;=
zQY`k5sA9Yqe6QI<!=~8JxHi94B$T68Q#-22oLilhv~6`Jz@Cs-|G>k)YDWK7jZ0zF
zxU~7D+n*WkCP<1l#k&QuA#8%G?A$D78LA{rT}IR-{0R(ps%9m*a58!on(J2%cQ+un
zAmvLgyH(1y0L0hp9{`w3@l{#loaj{_Vs6S8v+}rC)#Y<<J~54q*Lh;{DgOMt&|BMf
z8y0yYi^yhz)Lo_yfD;9hmqLHJJ;XPt;yGeStf<nfabI;j)Q5S@35ttT4%yDTXC9=?
ze@knipWhca^#6LjMK~3%^(wuOf{WJ!D987^gE9I!w9m!vq%>|H#dZp(?=Bv3ca$t;
zs2JB&`DsIW(7{&FQCN9%C(|d6xCU_~)chwIJegg*BV|V<J{%;XLSy@R(y4YQIBho<
z_E(-ZxX%6<*Vr-J%>6xG?86%FV9v3@W|2O39DYvU9^fYydVF6cY^V$&4zGcRd(vFg
zQmC(((Tvi{mIvR0M%3oa>-A{$(YK(#;IS+ZL~uf`lHByg#c(hYI&<%z8(VRMPP=Id
ze&I{oONnk9^^aykBXS2@#?EY*;RPQWMt|`dx}d8$gPd}%mTF$Oj?a3cOKx9ItHT;t
z<Y<LF9$>+6X|kziLi(Fw+?wRbNarnRr9(jSXzcz-2NUz#tX_|#%^O4K*XL#h60<*F
z_2QVQKirRStdr4I)UyBaMuzqt4G&qGn#R?Lrogh}x65L95S0>@6+G>@Kb8p<dAW%3
zYgKI8Zbk7g>7OCiStR6L#&Vq%QigxFxBP(`Og~g>;3&|Mp7*Xv-F>n9@YTC1LiNE1
zgNL!Z^F|K_mt!!y*O6tX3dmP`6{CLe$Br*kqf`r~bQefyl9Zes3nBVE8N7T(hR=tR
zhmXoOOv?-DzV3rJ2wPUM=GsTBlYObJd58P$p3_tI{BKXBVZ3MTemSGzA%KmTw?!U$
z-T-NKjxaFb%p{!hI4fy?cMHNFx32zJPs~el<MPT#T2}a2lcNgVajb^sciAVM@RUdW
z8C5ZAg66*@{ICr(C8xcjNM$(<p)0TsShLuGhg)CIzR)C8%ercUOdDZ~oq?^?DDfv0
z8+QxxxtzTP;X!1Zrf)&N=O=d<{@j8hZb9z23*Z~O^ckFu2h-1ZNIbBlU(8<l_C0C`
zUFpOYR>ACRB{Ug_^r}4O`8~H)+p2PnrvYvL(2~r`#nMOf%3u`WTkEb{&}*x^cE=lr
zxSD({uWZ{1UiJ2AwtvhQH*^apN80_o>#0Y{3rreNJXL-9u-SW;l=%JZZbc%h&hZ()
z+u_(ji-r38S*B^)xK0zao0Ra{b(VbGM#Fh8+~w}?1!9IRAB1i>&!4mNSoLc3?gD%f
z9L{eoVWVewpc;*nPG4@1uMU3lbUFTs+|=UoX;h>BJ~h0r^V>BYhRjvf?F3w3VS`y7
z4kq@_-2Z$F5-``lkO*EA=<fy>*Qlf|QeHb_F6JBE8UX%#i4C#?&c}7PAYzEf=!9gK
zB(@XBcytRoyai?4f`Cjj->Li2!L~Q<&@DUjThP%m74*^`-^PvbI9HdcIreNBu@Op-
zm<U^@@Gd3(@pL>~P+6YF-06;9Cc9d2$mgPsF5juW{t=Tr2T6cq!v-06w7#aDP><GF
z3y=B>0R3|_JeXD12bM?1hLsEn$W76l!N!W^es#zHOKv*)v1s#7EwIND1zLAZLqgdd
zS<l_JsBV0`|J59X#{iL`pTYP3tot`Ky|9<N?%Qv@sh5gP6q`Pk8hbdsKgS_L%Clc1
z=wz#N2kc3&x5dUxBKzdoESgd0A`S1!+o3)`kCgG2n$ED+C*n@NWX>O4vN0}%pjKUP
zLq`gJsyyqz3L=;TS`HU2;Y-9brHlEDcJ;6A8?T*`{wH*_{eR9?P}3D{+)rBcIo7?q
zRW!Cc5!v>tG178B>|V|!r&LW%zbhaNQ`-ZA#X?7inu#+6Q_b0UAVk*Jp##9N<jELA
z{FL6wyzAR8)17luCWv~nchqya?9`9~pC2%EcglR&R?D}b-qiKdmrN=vCw(>t8XaE;
zJG!3<3E|~2%ynklUCkkHK{#-q-3>KpcA@)?T^c)#rheh-YjyXWulsqQ-VE=Lr(+KV
zm31?Jm@pc#s|AF7uH5MInOaxR?vNRUlu?ddDbH$sMIOYh=F>)X>aH>rXH{f5T|H=F
zZL69KW@HSoT*>d#<jS%NurI)L>rgmqE0u)s=Q!KQ#`j6^;Va?as7QQhGdD|{%rz4h
zY7MNOwWLf(o@mx-8d3CJu}4_xZWbDuvd3e?)SV-nratJ2JSL%sTfRCZdVQ>?EFh2P
zY9ug9oJ0hj4S*VY_J^<;D~*0Pa^xWVp6r&~;T#_GHD>87WY0yDna(nMI@+;ulk$<H
zTwvsp2dM*7T7Bh{$S@9mnmb-{<9|_Z!>0k@_a39iFUKmZIEa2UN1;2SiY3~a#XeNZ
zo8$q(uC57(r6j?Y@8AlGf!@X&BI-qfEzdb)$^J!2ynngReQ4AUKli;GjF-wzFi`X?
zvh!FOaAKkFkT2X@R8ohy=}LFkltCCztlds32fa-K)x%Z(6pn!8dob8R!<I?Wf7FT1
ziBsL1rwgpAZ$X;`BmL;9yS2I8;wv}_iaJbGT$E`atHk_ORT(96qqexWzl3NF@%7NL
zbt7ssWsQjKhOf9q(!jNy!_OV3uoDz;O&;FiRQf1e_VmxbwbG~+HrIdxJTcd5St+4C
zwp*mD<PlbR9HILc1zyD_p^GlYpoJ(PU!VnFL)C$Inv;8BhaV%wxYSh8daYP2oSavG
zZCs_pY$rjj+@^%TYtGVwC>hpOqgRgc?}z8^F1T@A`>q;aGEdu2+S+EiO0FT^c|k`O
zG8Cj5!wYV5cRAN}V>)MMB|dHK$mw$%2gY@ac-TZ_KfvxSRh?KBG!8+e<J$nvrolAy
za?-ostlWs55eq^@SI@P)3mRz7uy*>a6>2w8RP?>eQv;RM&6uz*#*bdlMx+c5bMnVq
z05MqP6C1YyT_wvme5YB!Bw`1wVChYhh`6|6T#J6E@&5RseNE?w_k)TGf%CAF=&Fg1
zJHP^Jjo?!ro#Xan4j)uAoQDCz3Qt>i&Ymq<fq1o8+nZtMjq$n1V^zepwobQneF+3_
z!J6dCH(fOL#%?Ls{p;|~bAn}aKV^9sK?(Jx|J8`$$d-))_`@w|h%6@yD|qKzhiS=;
zc;#4W#D}9puI0yu#C&&E8Q0N`L0O=-)e9D|+hNe?xS->ks^>?5s<O$2CdRes+=3#-
zwGo{bzlCC%)3KCDpNhBLkJYEv2~(HZ#|jJ?+<JsOOfVog?OMG**w(XvZSb#z;SH52
z-YHk+>DzJ4vMgXxm+~VJ7iZA0!XOmmJ}4z&SBb=<<9ZdJPzdZW;01exb7X&&S<9_I
z@oe!q-fnkWmAqu1zE?V|oa7lyzla^b!Aet-M08I{s-bgSIS0%uzi$shH3mC*kQ<O<
z9*-Xo{OWz4iWB_;p?X32xIm3y+RWekq-;UvuD2jMf8var2wy)jn-3gSH^1k}=bWCT
zjjvd)6-ZPh{<L4)fr7qu#iWc#>AC`|S0D5tX+AR`HQ1y@5vG>HOEU<CotfI9jb+wT
z++lrC%$9weue8<q8?WS$suK2P?)96DqcIMZmA8NR<cZvFP|G9%<jW49us3(C%hh*X
z&^YyD(-KS?nmannFXfe%q<9(QkA=yg*^ltN;Clzp#Fld}ShGKnVL}NqcMmz7{m1Fv
z6R+&<X4gVS@f3{z@BKFp#n(_Uw94B8haFw+)IuVQ7p9Rsz!n6SPwdJqRaJ%ahv(;n
z_SQD+OIhtwPZPVY;Qnuou0uOKD@r#n8|-+m<4lye?Ob4z9xe(IpAfvm@c0-B+0utC
zLBOpp8_Sms8hG0ete?Z*hc00gv~@VnL;dDPh<Q%1@!wX&U#NUWik05z&IXZ$Xb$Ve
zS24M&&P-_r^{dV=O+U98L>@x=um8mp^mJu$fY_`uU-C>9!_<_xTRP)90{)KvWBK6(
zDqDV&?PN4%nxbBXvk5+5<T|3pF>LWcgFwDHuxWZlFW2#^Y2mG2@bNf|#hx#fYQF~O
zY42){^*Gz<wU0?e23xvt?zQ1MC%!Y~vvgEOezWbI?l39riqRdB(%O&7A8#NsfiyI@
zy}GIPs(t3@b;_f5?Uo0e@SrWK+#qF_$LRT9`pqxMy#i}<=uWXqYNEN?nM@&YnZbX0
zgnWNNf8|h{x-Ur&{Uz4YotbI04L!Ci@y|lbwzcRn&zlDJm3#|ROFH*Ys98Uk=n9R;
zq?Ji5g0-h!L&~e#3T4p$c#~UoBT<XZIju&;G{9vMkep)@!`d8R(yrd-;IOMlgIQ0O
zouuD--GcU)GcZYqNtXl+4|FnqDM~CX(S_yxohCRCT&Y@X*%fepD_a?YQ`tGN!17mx
z`yMKO#1=csAbMuoPlm^A&dz(dq67Q;E&|Ae_~{Q*eB$KxVN}GB?Ioidk|u3rqcv+i
z<t~IW2tuB1Qr?%+zg*^zkq|0oy-=~_5W7@l4yn0ZCNS5IpTi@(BCEvZ9f%KhB~5p{
z2o4dKTD|Z8{`}w;l>XqYm;B@w^8$&1=*4zr6HImvb^Ys4pYrBFC5glS1qTrHB(Co4
z&6{vdQ|qJ~D|~E>^LBzOi7D<<jNS;>%kx2CUPk^w;?s}!V8v!>*;!Y@!khdII`5_G
zwRa@(?_I`BjYz3G|GRLx`tFV1hp%pmz2^OQ3T>nl0k*5}%$Oy6_&awRCTUj8yy$cm
zG@^^0fLITAC`qr2YjS*S3Hm2Y*zQMbF@(%6t0B<m76d9T%gu;!h90{Ww?NSbb)OnG
zmx6tYzlQbF>;?A7u8jjI2pA}EzE3~Lm392-YO3rwNyD3VbW~CgtLg;mr%J(eUz!F3
z;<Buh%m|?%4X$$u9usjKT(}E$sP;b|9jo2iTE@k7D_rvTjNDN<AKBkUQB^rMN(iko
ztkq(<J(Ng?D<vaLt%f8Wbkqr6UnzGk(WtSRQ3tH3)Y^A%!v^%_u9o9M0!SqI=_eC~
z;^YqKc*%Tsat&{Y!1(aebN);*bVtNkr+ip!kk64ewA6o7g1?IKf{gY<KY}fl++p`<
zYD6_o<0&H1A3JEag5rA?RC0vI(Qa?WpFgNwK-^el?~g&{Qe$H^ho13n=QdaL8L<Sd
z#d8=34my2c3pxxh;y1Aai8nc;DHC$2svR4agpM6>J{lXPrNf5j;N#Z3tFV0<_o9v2
zQbnw^19D^2?uJ%CsOqL*al-|geJRY_&b-CQdVrPBw`f*a@W=pab9&tfx){xx$pG7J
z`g(!iNQi1UOFO7zupd_Cex4kHj0tK#Sym4|ktAWVC?|XVgsWSe429h`%~qQ}9>(1%
z)KklJajiP`D;9;Kbx!5#R~Mn(Q%CWe`H)0|^dU^=WqKl}=~QAlMStq`CIS$Xx<N&r
zrD$YW)ik}0@`tH%7Yy>>KdyLH_M|Ibw!s-X<}i<l`5ZO}@AP-e&lnvU2n@?(<Ly@;
zV}0OsklQrgGSgtU7W?u>w2>ueBbzWv{}x0_J(_-lEQH}%#QQ?93l6f%zlU+j>z)q}
z3twcImZEuMwF8BAtQfbi9y^}pu1`8wwwn%`|4v1j{T8P^RSas;XnDDAVTDu_R%X~y
zYl@AkXvY3SmZ6@+X<%4fH(%tL{pYUmMi9VpjlLCv4~lT&!33hJyUL;ySiqhGP~XuJ
zzQf6SI=CP?0Sh*aX@o<j*}uHHVN$lJ2$Jm8pZY2U3K&_~K4kLeIo9}RO`S5h-<!v_
zq@cYPi*vSL0wl$lq3;k}0}Is3-Pmw8CuO~BcKyC)V3~p84;Hd6^iGjRV*KxHrJA~%
z-j&@!M}g3Hm`Pm!GVIKNH>{EMMV|R=egE;D8*x>iU|{2j#$lrfJ2J8BH=;6Kj!Dm-
zFmyv`mRdIs;L`OtH(;~v?Giec?Ka(l`iL@j+@>!05e5Wy4ks;nM07^_^BnT8C3_$#
zk8VLb5)uA3ExA9!8-de_QZf4RwRRa!JgDYWJD@?7AP$&%V8b;WPI1(!?(P`71^>_!
zMU<ba*Jdkps&CtQrfHe0T`*a$bOrsx(Q54J9UD8{fVHD@_dOo3V{tq7-bSh7WOo5Q
z;S+h#>X4W?3k`B%Z~jCOTrkUI#VipYY4Zut6BeOeF1mfvEo=KVlVVWymtMOP$~TXF
z#w#Wh939+{kq_hoIoF;D3~`sEX^i@MXX4O%m5mbe1c5%C_*X+)OBzpsW8J@7ZsIQh
zxG{c1-}w#l^m1lV#mf%&TV&xAg!t)E;$d0QHB+9R)d7F-r=dE>?s3-RGr(4Pi3kW!
zIF28Hv`z*|d+Bzi5%vYW?u>hsz2UH!)3uFrw7)UOQujpX!HlMF7U!!;PxRcfV>nE4
zyV?nNua_SSU#yAXF>M0UqHew3_pbV@Yp2q<q8A5)$IH7A3b8-?7wDf?A}~hW`{p>I
z7VW=6FCX__BSJfsy-$|;Ge0Rxt>JwU_AC%P3{-ZO%gJA}x+I8#_3P7)vEG8BO?juF
zrRwY1h(e1POi;;}Fn2YqO{-8O&aPP_F35L2moHg;=Xt(2`XadutTwnaEV8nTMrLIi
zm1^?k=mKU9HJ=$ty$Us1B%O&%=a0rrMC%%HO+mprb>Np+{#EsDDEX7*ShBu5dOoph
zJHw1+mfVYwzRrI7F+cx^&#6<AJrX}RgY&h_qzoAbmv3%C^CTYA+YI?bee$m{S`Hr;
z;>rR&Xzn^!@-z6-x^zHMqTh_uj89%3sW(KqWQM7$+;r?N#a}kqa$8P^{k+L@5}qxa
zllZx@klX=H?yzYZ42X4J(el45yCnf^z1kKbVk0lgu&Na()V!kwoUskP`r_HRiFwQ}
zdi_NX%7_tAtf+2Pf}D!cc8RU9bP!Xa8Pd(!R9n&qltt65ooLc*T@GlXzjQD*dNpPu
zY5FEV1XGzSn|7A&U1g*TKfENcJrC2<*JF$QGcC}QEhFfhY0IW<s%^*0uKXJCoX{W_
zpz0fEqs7-j(CT}-pX3sD6oV;Eikasf4!*Xy3<(X5OsN0JgH>lmSCOHzIH-|J3BE<n
z+%6`zsoAcEEAuRT`8Os#Xoa2;sp-!kko3XWni8+ec!Md?Ey%u6XRSd}cH1>6)K7g#
zh#qcK{|$POiF*c}S>4bW4l>+2b$gva=9%1iFwGWhPw)kBecWM_K9{S>+HbA8SO2Bs
zWz*m6EuC$60CV`@m0(>adDj(J6*CjFEm=L6z0uOwDXFqbl!2Ead1<7jsA$HZ4T0@{
z>plU2@E@F^Zl+7Gn!casP!LHC`^>aRI-74#|4jFzc?SD3t@>khwtUPz<<f_=_OzCC
z;&|YDln~6M*()zyl_gmuQYEy|JbXEJ?%Gp!d+BrsqUu|L4uUV98ub<DOjj;V9ZZWp
z^LD+n?b)XHwDxr@G{(J|8%xs7A*eFP5dO`9^n38vmESsq<L~|>$<nd&>q}-kG3xu<
zH}>V|XL_0L%uV*m1SRA}o2HALV~6yO;jkA2J%QJ~C9tF4_H{TP{#lK1QRhGD93V%}
z%fic(R>^tPxAmmjdPrA7$L8S{QZ%T%5pC0v?I`B3$2Yq`ajFnASD~BKKTI!0ai0XM
zjhlMw92g>*t_vP~_M^@IKhpN__spWrFx_PN+8d;<W9H2g^7X=<jJnlbz!E2v@Z6-N
z_K{e4vhBHex8i*$n0|lH_=KhO(G9iYE_wSf>Xr08jmyenEy2-b^|$^v3FhTfdAnk6
zUy>rrb^fe5RiiOkOEQ+Lb!1c|Pp1n4ow<1B9U5FC9!83A=5|pcC4K~d|BsaYFCQ?W
zjN{R&(v|0(PQAj7S?;dlvQ_o4hPviZWo^=8Cq+NMvYqOquMx6XkraAW?l7*L9e5d#
zZl{vb=NX^;83Ym@f$(JmR|~4qi_g-;^bn}Q31z>iag9TL^S+-E>IwJg8b_nU!z;xB
zEgc{7CbMQ$>}6g`NXT11EzRHjx#Eu!dwGHbsPNtZWa_G?QA?_rzZn(m_gHXA-B}<8
z-0}9aGSib2f>+jO(L_C0?FH@s3|SXwRXK3HyP1=Vq?0jW#1<=DUVr8`0D8D<9I-C0
z{dkU&Xw!CcA!pKe3j=uP8jL<9Y~MwuaP<`ZiZo3ou~dBVEqbzcXGNba&?|QN9vcZq
zP0~8+3!sDlwh{5+(r-cK4A<{yKmW&fdgurmZb9$QD2|pS-($<q?9}(#tIj*8CBiOV
zZlib{m(Lhr*P>Q`0f1JFBm##V6uk<LIF@+SvHT%~!ChYA7W6zYZp2jm+-`gM7_Wqb
z#&}QtXU73n`H)?%j^RBP`d2ru3Ig5?Vi<;qr1hO+I|#%fKMCIbM}qK27hVP-%jPPU
zxlWf|k(+iw%4@i$5eSpWP}m@D^tb!TEr{>rW;D24vJQ!ycT-mhEr!O(I|mPW!f>NL
zqubEGfKF@us*16@fbK9XFci-l+wPjHx7|$TL!5SQL5_ikm#oDIT>tMI&$(kY8u$M^
zIscK!e;<@XjEAv1O&bg4{)v8J-CL>|1B=-523#B}q4LJgavV0ZtsLT3r<I_k#r#KJ
z2|PN&In9!dZ7&e1HCe-R0D7S6uM~&|JZHhS!DpZ2-WHAqUJth5o7wXiz^Y6Bs`}6P
zHFi|?8SgdC@WF~<+Kr_X;Ks#QdI*+2S|dC2S5_kNp2^WgT5Jj#4>P2sfe!zFld64J
z<50ZaV{3$ZZ~vlFKhCAPK8)Q<Zz)eC)grq>J0P`Tit=%esh@DKicEpkMAo-=h!<8C
z<u9X?MEdye)JoDx0PeC;mPG>baq;(j`WdtPEew`UjpaI=6`W6p%g~uc&GFIp3Ptxd
z%7nY;E*#lw!$iXdbC*q?7IH{keFkRgf8L~dnVS>(<0OuUq`h;Mt)g4P_*`h3G#A3O
zgeNGWiLN0sk=`a1txyN04EH43xjsH1W38~Nr>Rt^U+uD(nY-kKaGrLuo%PC1f0cs#
zxz(E~%$odPMGQhDi7785JzGf*aB^^oOV;l`?cs5cvjsV?|IY~le#P(J+l+<ox_s@8
zvLiMZ&tIShiqetOw)kuhJw7vTbViZp*evqY97#P*hetcFJqKhR^}CyAfbG!Q5gDZU
zH$X1V8z~e{PzipOBZ1Y=Dlz!6Jn-Agx{)m5OLa|bvc<+a^W}vsXX&$k$CZ2Tah6~C
zLIN!QBRT)e2jm-7Vp5}CCwAj)EITQ828#=tp-PAL<*THmj|_@g8&gt;?=j*tQvV3b
zToUDAXJU_x*jmjVw7Y+u{_8OzkOcF)X2)a=3q*u;ekM;?sf#~vx_>#d$)k!o;8Z(L
z{Sh%c1aoDtJ+vz;<91vOAibbU&v1B{4vu?hpsei7mDC_V+d*VvG0XGBqV6*YM6y-D
zjvEauAGc~rsBG}o3R@JNAFooZe`)1(%yICSsxxak3b_b}_p+StB?Fd}DEo{lL%_e*
zB9jb&nll1Dfb>(G>R+AaY6%5{%9Z|+f=aFKL#9;D@3^fKTE|11U0Uxqd+K>Ux4h<N
z4|V<#xgi(@jkZxw+`9Ph3K;x9vw4Tb_q@3Eu=43XP_c5&c*q$t3a4W%MdPHKDPHf}
zxMC^QOt#SqNd5_@hL|W<jQ@ug`Hw3yWABRKQ+x_0TX+TxF}I-mURAMB`dZ9`vYppw
zu-seFk8`yiIZ{%TaZ7!#S-%moMw4eMqikSMmb^Pm7b#J);#?w?+$_X*@ayq8Knh-o
z&p0k$Bv|{(wPD*>?KsBmm;V;j`78?+ksC9w1%7@rUACVg95GN7*;Hz~%xDr**0+&L
z?%Hc0n{rW+^^Gv``Q4Ng!DTL=rZCv}QqW7W+Km#X=Dcp}dSmXCYw(K+D=Qi^Bmh9p
zb5%L_tz2DOX~noerqp&8{3-Th=+i-`71`I}eo$IyVt-6~M)}YFTwirvHSXzi7v}G!
z0a{yuGrsftLQeS%yO<&G8y?G>9T=T~9n+zu%{(WZBXmqzM6&sToXmFiZx0A+y)`P(
zS&Ivq`wK;4G~Nzh`->6$P@%KYi2dYEp;w$bY?h=Oms%|u8`^z-8HoE^NINj1!L(cC
zJ<ic>AmMdN$UWN`FHfiM$2;IUf(kSoPoP0<940&a>#8bJ<rfJyaXFWUi)L+$l;}+u
zlT#sFT(~$%U6T7~s-hb-b#Evj_yY4TW?F5%7j6lhzO=qQ%Xpr}K=xDGTPFj|H^KIQ
z`i}lawoF#IaqX3|u;sG^&B-B*K1CKuj#N8QuZj`KinxajmQnQoF>i0|jF-3{-R{Ng
zMKD3l>+DQ#{+Hu^v{gQhfsbps5*}toMNYclt4!cml-9zt`bd>Y#Z=ZOTil7V9a;+i
z2$o8`7kO-X&NJ~x$#pd@bC-@!#V%;J1ty9bF8#i%{<N%R@UIDjzOg17w8qIVTyv6*
z-_=Zr>*UY&AGmNABMFvx^kvi%;ZouvYO?(vuk6C~W#&5SOEX<jlkRZ+8}C}awkgsP
zJBKNP0q{#dqWzat%=$z`8N^?p_q3<di9-Bu-UqRZw0<wR3UfV+hWWbv9+<Q&W;AwC
z-4G#vc7lD3Zw(_4VBu+j+})szr?BOTEw@1`GV$(<RR4I!c<1kDnaarnHQVFE!r?Kx
z57o505Yr1CI!1y%@5ec)g>u~(DoQ?*hf?DfuRNcNrPdH75BAH%ByoydlxROsf31Hb
z$;3<Qx0&SKdHhl^=7s5GtUos73>>hjE#~y5%~?e+Xo2VFn>i9e-sy9O4AOp@d;G%Y
zJbXv*tV?b|A{{)?V{GiMq=t;lX{7NZE3Nuk0}f+~JJaHIKbQ@MI`ehC+hD4dzZ%tw
zAG3ult_q1vu-JOO%KNIyI+PyR#<wNiXP3Yo4iO*DxvP&21T4mDf-yH)ZZk8jubh>L
zatB&UC;+EBv72GE6?v|(1NHIb`B0+h?u$$rkwN<Wj<(TH`^UwS_=-EQos<9@L(9h>
zBBtvH+tWTQcPwez7z^cOgqFz-FzQCce?(~s7#aGo23qT0xkVl6o<ER9EV%`Gp2y$^
zn_Xpj4mwMF3PF=ltwAuxN+H2Jzm1Ff<Yo&}fq0Ic=>f&2=W6ve&xV}(OPWMWaJr9=
zAK@F=W;u;)E*|fD;XZSAyo-^Ji^^(t!Gy*6VD`V}#)04L?kBI7f@#>UhVlqn{G+{n
z-uIs#m#IjExg8_;QKxNzPf>Bn;61ReC$YF!dk~+!0_3U?UAbg)cf<0hH%K)^BfPE@
zwB7ZI)2SuIOko&+o!A!*-x9vg_(cE+Bz&^Nvacgu<5{^OCd}T>;28B9$EO^vCNQH?
z0h{6>son%czD2L|;$d#m18w1vhvp_g5Z9gPU@9p6wc?u9SD}&UU4I>MGOKya(p4iP
z6C;neF+%@M1WM$G!*Q*WvJR`wn(<E~{|J^;;~c3<sXXoUpG;rjA(j@v;M6f*lIs`4
zEnJ8}+6&$mtE{7$^2t^%b1&V0xEbr#UCEb5ik(|y+|gznI%Vk%%3XT1x*7F|k{LA%
z6#e_IZyudBtfU0W3rp`A+df~0Z66FA3!=cLZ>!ik+e~2>vxEXnGp?&^syqE@<@X~X
zluh5uNgIlU8ig)J?#wbxx~KNPf74D&#xuYE+AgLFUe(W<C3}1xyPaDHmDCw=gnckr
zz}~@D9L7(j`07NP1kgNImiR5MCs9KkI7`7ti+Eo0>m^iI!t>!RNchgVSkgcf-KQ0#
zCJngi;*|vKYn?)Kc#*WFsO`8<n-U+F^gu(27U%fMxkG*4!|P_i(a{8#Pw6uBsuk9K
zn=WXE%(X$GFLFAb+w{@PG{#P>6UAiDKE(>eZk;8dRea(+VP|B$YgWq!MJowOb1LVi
zHRa`xT*tLv#(qS}g_`PwCy+jA*xGgUYuQk25GBo-7md^TxF1#YCO2yulcX?15xaDf
zpkIPE9fa@gwkMhWObGupqW2R3Eul*u&S~_>%#g*mT6qR$!h3>Pr-@gecl>n1vlcsQ
zUMSr+psfjM^1SRVw|$XH%K#>B3hKfj7a}R-x1l<iv;n&{9B~--7qhyXU+Zz*TAiD$
zeQxd2#5~r1NSn!r%zx1~e~~hgStBPoBP=Y{Q$R6a{Wlpe6q%buIJiEN)E(>*5nrs#
zd0+5v@PJ)U)2rn_=-j{C4*vUPzuzN`-$(gSgCVmSsWA;Z<G%hZYhl$!;}{mqu#!Xh
znh<aEf+g2cnj-j~sO;mM$Yd)pC5-^Z8qpf-tUz+CmlkI#ZA}mg>_YoNa3B%BV!M;P
zG?`q~UsdEDee;Xt5Qf=$cQg{-<W;4LN=455#IAM>cH6obG%Nzi@lMd#a{Y{^s)--3
zC>}(@N0q^DA#Ag7=3@z_epROIjYn%$H_}0@d$^5iOeJ$ck=`VAo#lkL?2NZ5UB}a;
z==ME(_*JGgzu<;o2OA4nW^}nid6)uAzTBm~Wo4ecD658eq2wrWWNQ_dnV1B)bd~As
ze*3J!)XZbn%7{8nfrCbmhMDtCFQpvnUbWcf<yRi$X|jQlPou3R&n;hhu77h~<9N{9
zlk0~w_rss(J_~#6B|)*M<)9=C+l`4(Y<4olX4(5x7O)U1o6SAxdNg*`rc9ojDW279
z^AJw1FJ@VgSoFgB26sxlw%g~qvk_3yl;sG+ER$#KELD~1ztX>sylForrYf}bGcJ=?
zC^fAT52jyf)t@Nib3E?$PI|XPM_&Ez@{{(E8=l|mtY%GNq%W`UTyiEKub!xPzJbdl
zm2W=eQ#DS*1!mL>hE>w)LJs`frL;h0kQr^R`kJJ)m)B!?Fq?)zByjB&qX+x-5AfPM
zqLL~xdhOF`4fRkyy*kZFcXw*QG3cv2h*McBHT6|!H6EGhr5;pmwn<As=yOM8zbevZ
z{N*YPh!oJBs6g+b7oBOHlG7&zm9%{>yz~N(xuJHyGzpdjVysat*Mun&oSgc)@N4C0
zf7*_W5eVeY+2?;>{XcK=nR{{ErITXxJQcaA{icmGUd6_S1_ot`#*6b2ej@z4tEf$C
z0oHNU)7o|4vjQ_7AH}L;j|e5_ED?eyfbb2m8*3iD_}5|*0OB(4d0@47_(DQNq`az$
zg`B9Nwuwcr3t|^CqnY~9QQ%%#K!C*<a}+*}W4+1Nd{MoiW=r`>PgX?+a6t8tvM!B(
zRRW}_d-(CKi6LDb+X<wApcY*bI`z41#Dpg=&;4{R4$jJiHpsZemuSo@>%6OKWq}))
zTyiFt+N$f-(zNr>HPyfrkZcOc1Ia3>pGuO?8pT+-xy>OTb)$6^a`1A4wH98)SR|dY
z*Kwm<Orx4jZL++PZyZ=U7?aq@;k9@Ey%_)3ZH)mQhpD**P2mjy?-0HTjB{J^KOVlK
zbnIx~YjG|NNNt41QerB3t@)=;C^2oJD=v-up8QX8>%N918SW=a(3P8K4%%0u@3I)N
z|M>W&<0S0&I`1g)6YtJ&;ulBr^15fkltz+O?@n{8=Tb+4hxOH62+JGiO>Jl2j~DHd
zc@D@inmbgQm%gr;{N$FR+pTk?OW_qkMDDOIRxIq`*4y9TS2Z<Nt0xpQg(;(MJmFGO
zntpm6y!EL}$<)f--IoQQy`X4MkH#|v!5tQ6HfUE#l&U*OEMf8zYKcAwnK9G91P|SU
zR2&JbTH2iUbBSZ~?71t73X`FB-+jm3ghZZ<F8oc^dZR_!XFn(JSfi^IxOTSTal%DR
zNMXhD)Z=(#p)YkDmmn<P`bL)iSZ(4A6WTu=5a-G_B|zdy1GQZeG~1g=Mm#fXfHguK
zt8+OZDG#Vu6}wrsXa(Wgx=p_<k(~Gb*(GKaMIK{L$Tu$Zd<B`EYQCeF-x;+QC!e!&
zrZz-Ab|ka>**Ld}M#%S?*1oct8ytD_Rd%B9ifw}AP{L2H)TX8UTlI~=;$icfxYz}W
zo+UJaxL>sxxU@;$hO4S*zOu_U$9z88L9ncwgMuha&<i2(vE_;*bh|=@H`K!(CskCW
zlK6qw8*FolX}&Vr=6Q$oZ%<Z_H}x!X@-ba#oE(q;$lrMF8Y1q%N~s;tvQWp`F)`B8
zZ=tWNS`ey870sDo#95kf9?!(P?$Av^l)PFm>M0}3vQQleS;+J6l$&E_%p5Q*$ZyC>
zJy+gmy+f6R)Ph$%eyZ5V*Q2Q2p7oFQ;<qzXmFHaDzqOU2X_e=H%|)-NV9Yz-m~UFe
zgC+r0rr!YsVl}B&{b;|&gK90_H6yVEE$+R^qCM-6HW8;n5uv%OIbS}hfM4LP`lvPK
zyFMc9)^{FklbtkQ-c2Isf*ASbx#bmeoeoh(r^J3jnb=XOB3{1a{n-)5=6S&L(HVd?
z8SWHysv)i?!>Op!@nPPk`EL1G1+Uba>)hbijxE$7=g!`CeNfNP7Lv@l%pGm3J7b92
z#*IBD#TAKA+=9xu>3P9H-@~e%B-qz|)X@c~EQ9TI(>a1cx2L)J?Pbm!5a~A<o;&`8
zdx9?IUZX546r5t;6k8A}h@#S;kE$mr+vTTjl-oS_B(HuQO2C&hPk+=8)_XN9M^XAu
zm{C8?qWFGbR#mfKjg^St!+80qL=UQtP#He&=@b3r(In=bonX-nMuA5qnBSgWF)h<;
zHslO{ePGh-aJ9s}t1GAyI#T-1X?|MwQ@}y{S6|RlP%kml?gTx@yozDYgmAgVh_-C`
zOr$V@O?RtjZ&e~s%*j9GYXP}UF1DjnxDYWZ)OFB>>EM`O1Ugc#Gsc%(>2pPAlKnwL
zjjgdMNw&Yr`)VWW8eTdml}4!PtQL%UFdqV5k+i&IX+FI{s}Gu3K@xg&QADF=3UA#$
z$~)~p=HE79EXwy}i5&Pn?h*x-Ws2WXQ2@KH6sgi5^3sH&+wgw8ys`_uBF=UxulIJF
zc)L<8SlH|HcxA|==}`(15!;MC^7-$ew|0x%*|?f(ah0$pk?ZfQvmnqDycsc0c#)r7
zVbeV^lB*`i7`$d7U47DF6%3b<5>HFb{EK6@fhZxdfm={%Z{cxbSHy<QwQ>k05{Vby
z=5*+E3sOI#q(76JcBeSBK8OvR37|gOA>|Xc|F$=iImw}DV`Z5j8X;dN)-asNq-}SX
zcdzfA>MuY@QhWFrFJPoUcfL3fJD{)mq1^2u8281M+Sb!iZYK>n-@i-ZL$Q^PpqkN0
zAXPIT5sa*tv7Y1Pq5nWxnq4p`qX7>?%`N%=6ethWzIyd#4AWFtmf749Tl06`buM$O
zu$<cLqTvGOzh(^6<2)sYJ9zd8+h3)cD000Sc~QbrBmngYQ;EDMjSf>h>W=Ga%|;hB
z7c_>=A-RNVXP!W7CK5j?v}{AfY>~b}VRap?nRxY<qlIM?huW`}N4PSQgiZ70HHJ||
zi)`{H-a#p!50**I97Kt=U2?x1#{;DlJj*$K3k3t){~=_atVWuCdv;;5*ns|7VO-42
zcCMI^XHB;rrA*#5#l(*`D4KgM9h-H^yhiEdw|~5WGU80~tK2aa`nH+h=^Gf7of4V>
zATG_vG}ftT(U^CGjn-TPRMl3;TdL^zlpK5xC)`3eW%JcPsdWzeB8VC0(UTH)Lb7Lh
zM7iY9Bmd0P++o~os+HEum@e1FK3UJJ@iDAt&Kwr$rqSm8cG4rtl_C8>knjtQFk#8t
zKtJ}4&X=Ms$fjm3*Ilq$p-FO{0cDGZ^BX&bS?x$Gfg^)en;JvqrYnMH(KggPO<h8e
zvOl3CeDOWdKFPR6Kl>Y^PGxGV0rz^zMe~Y`@z+1&g(`aNP@Nyv0&G&b!(uUN5}xZv
z&Rsa?K;pR5pI0BBhmEGifhFGk#J7-h3a_{Yk>PL~#Zsqla9a0Ri_v68c_*sFY^*1e
z_vVJHxs*C$>=zN_rM4*+`zm(airM$5KdDvkjf!?i1A$M&lNby&gTYZpDDv%OfkTgT
z$Y8~mIzo#iX_1DY)g(`0BkmEbOi!003Ora-Vo=_zYC|4*Swy(}&RbyYwNJXC5-O9P
z=eJq5<tC3Kkwb`~LWDgsg6e@Yi&btxoqWY&USNvM6l1u@)}%1rRrR5%A#$;<+5f@u
zr|~1DE$s_8lU>685o5D=2nET!6T;b(3&PBv9z*rAVHxcR*aE5HxCwKTWw8wpsrF96
z;0FoGmue51>`lR|UwTIxu%_mbm$K`H#1_Nj4av6a-uYTx^AMwvqqbcUN|KKOFXAlG
zYeVqn9Md+Zxi{f+r+L)I3{N>F8fpZI^?e`U8KbTMplnFHcjMJmC0fVq=dnUNDN>V<
zcM3V!V_2bql{AN!YuKFWC>?SuevlF@?$>PfOa=h!+PQ1SMQM5&*69`?n(GS&D1mZJ
zn!bb1&#H313hPF1p>I@LNc7-l?~k4zPv%|Knm%|k-?de9r=2|sFQof6{a#4>xv55u
z+3Qy3o@GNn@HS+6Z7u7f410ffWGJe=WjwU9TGJ0cAxJ~0QSsMauYk5XfU4eQc+wIr
zy-8T!?6p$s@j;tpu;`KYZk)uDm=9&PB(-?HLBqa0HCKWlQb*I5Hvd}DHnff}cTTj)
zUUO%=Yl<~;#6yv_*f~aNZ7K0l17l3eLzDR4Uv9?3IfNCMZkb*Bn8`R`5<^iHO^p-F
z2IZcDRB0Lq`X8qS&Gq*Psgz#xr8x%MszJLAB^^uYWYjW4pDY3ASOjVlrB~{MrFphc
zhUwIYX4xjyvghlX%F46?S1%S>9SSkeiqO}APuGQ%voSPO*Wcgl;Z!jFZoYDhC(AQ5
z$BTDEmlyPwxCMa{t0dHmq*sNzs;yFAx*rN5UZG-1t7aC+)g9`N$uAfr>zC3ANB)(D
z|1a7*>IdZTiMIGNUO%qt=L#~1L?0*p=_(u5+E&_PyqC&rX1B5pX$@m|>KrIeS#r9p
zo%IwlXp56<q%jzamq);e4^Ib;1G1Y&J|na25({^<+fLn_-gf*<$(y0mcC4K5H7d5S
zJ0&v;ATaT!kbxz*P3ja|hxM3*X+IYt4oPQu4FWy-dD1;Y6A<rpA&h#17@*M35@;0x
zzcbRQ?qU?3Gxhx`M7fYAG?evIm#Hqr415muj$R)g#m@G<Gy3;}^AO*3wON>#ZGmfm
z>Ky;U>x%=CA088;zZ$j(;Bt~hE)M(gFu1Xr&B*j5_^-;!55o@;L>Wng83wZea)spQ
z|9;Y>h}jwux{v#o<DI4{5Eg}~(!lrA=Y!sT#RAMG4LbX}!-PTURfbM_&wXEQ{Hb`V
zwKwhs-_=d@m>!hTWlrAUsD5^O`Q0sNV?2o0?)<^~14vDKRXAE=wu;$jF^gikZXf<m
zZW3*k{`XBqnnMIUd1Y24{$n0@bt8$wZi!_Yl}KIuTrxoK15L2`KeaV)Y+^~7g5C@k
z)#Puk84lL)!~JorD&r?yO@pP_x_xjHlD@P0f%TV%Lb7bb35u;=WK8TYzE~?h(^C1#
z>A;fsiGxh#TOZ#Sz<<Ms`z%>PoU%)yrAqrc!FC!;=|DxPlG~&>DbEf)r+Z15#uQ$W
zP=hPv^Njv{L-P9g=_{Jv_p<#`z4bf_GeWFU_M3ze+3R!6KF$-eEv)dm-M?CE^xq{X
z|NexlC--D~641W>6dlEoYmZun4+vcZwr)aSXF_?7iGv`KNn;!6(IWR9{6josF&Ra4
zQbuyV)kM1gbRA_Pyv|kerE><Iw_%;fW$DVqz7hL|G)u;4BAX3i;u@6@M7kO;7ECzP
z3u}-VoYf`tv{|Gmlckt+s&p*#4DU;Aj{)O7I#J_KG4bYKskidW;_B<MU)*Jw`=;B!
zCj`LOBlRV`-X>Uy06}ctt&>;5SQrq|^}EpXGigW+4(2ne<i*OYw%_?U2QpYsqxf<d
z+}<ddDvsieJB!3+caz&?4r+23ni7QVX<6jbi0UHyy;KtJ@rC%``wJu@y<~jGIH~bX
zQ}B@`Z5U8*EPfWQBApC;Xj4jL^aa~~#4iXbpK;=-jB#zfXQnvO|8av|IQ%ksk>^LW
zH-$+wO%o@Ac5}qT%1UI!GaK{yW-{fH=8b=ikb*+gU9CBe)adLOJ8t^Yh~0hu3TWO+
zZ3b^8L?Cf%a_Yf26*=WY`14u&84{A=sKllZ&xc#@t0J;7>m?12nF=f>KtRzq9uOhr
zb3c|Mb=qGif8S_+;V-gZoKUDX)T$_Ff!e%nQOzqSd%*um=pj%wcSQUn{vp78cu(Jc
z{XH~yN%_;sd&qy*`RzXwV;>E!5^K>JsS_7^B%iLjbNK6TK?~VrSBvRC)o+lM;vlEf
zOE+g!Qf;8#Ua}9*?>6%Uq&NVSVT9&_)%s865F6{)KX@ZqwYvjkd3SP~#ofm^tPZUj
ztlTF|@u`_qDAKul?GgzLjt@%<4X>6L3D1p;$%^=-MZRd#N>Qsh5}EnckiXgbm^F(>
zi!(zt`g!vifD{|F2*NA~Im27HshZyB>%mDV_On<r3CCpl|CAO)ay*MDNO&Bl^6V$z
zOI^pi{uOwTDHu5C<7N#0G(0FTi_ljnTB-Z0)2EcD{Jlmkv6Qy?46YgtKvf=hcS-B#
zk4JnCj|z!$Ik@^X>qE%QJO&_oF5k;yiFZwFm28rj003RZ-ht?XCoDTk0sfZ1c3KZg
zL&qh#pn;5+;7p9~lmx>tE)KX5iyQ&jsl=;67sXZkc(b5WeGxckuLROMv_jd`W_P-D
zZlKKhQN`2jMz~CGOHg0UleY3AO?!m8sH+}qXv5kV0Ax!clJd{OgVlC;;^Go?A3@h@
zmikr7XbVJOVH~k4G{PLwI%k9M<USs8&o9}R{nu2ZC!a^M`#08Fn~JVxjIt~*!FwY?
z`Ag2QAFmejb{#Dmw2gazGBq2GTe$I7aqOdH<)5eXZx>z0MnkK9sd~4Emr$PJhde9d
zQF6BE72?-^HruCUxkaDR=MJ6AwgZW>D>Ya)oSM7pzp#+-w)z=xrVxHvm1QcuAxr6f
z!aa8NE!NX|j_+DEdkZOohgSLBHM@q4sP66^=@sdFy~%GYUQiGaZH1hCTpJj<pVB8D
zjE;(D*kPk)x);e8uc=vG1H1nCHBgzNW=I2lhQ8#cDv0vBhDiS90r|WG7zOk;s5>%P
z<XxMTEaRz`=QHdZ9Z$E&NQT!pQsldG_JMYfm{1JSE0$@CKL-D;bRgVc`OcyJGWPiV
zYUR3WqX~-PhcRf)P0@g#{hG^T+MC`rW=l+@zVLPq-5*Pb4Z!8v`W%hf==uPK&qlNk
zNM4x~4t(#Y{|^0s-pF0beZ!PS{Csy%cs5D7`YHC=QKL=QR?pf=?|t*GoN6?NjbAjm
zV@V!$8$ZKEg_oTFG!p5lE80Uu)4d^upIF^1_zVKA38VYmuNUnYmhm12x*eO?#U5~t
z<6H=<8TN%yTnru-GuW4AZaNT)-cGCk!QNX()sd|2;y?mH0wlqL2bbV3L4pK_1a}DT
z?wSM(?ry=Ijk~*UoQ=C{u#J=3nK{4jo0)UxtU2@Dwa#zdb?dJ#y6LKZtGjm9`#$gU
zXma2cf^^L3d6szTlxuMdY*frWgfe`#Df%nqeV2i+oXAbAfNg|IX{VNO@uRoK=b768
z4U?qg{F}R}K(To$i{blb@c^|dd@AtEs;HRzZo*=D4huk0Zj<rw*C@79vy_WN(GM6F
zUZ2!W^6l_`_G99kPV>n%%*?l(C0Wgsm!4^r243?9d8(5AWDGg%(Rwt`vO9)GrCw^5
zh}#=V|6B6V;e@V$Ch9R2ket9?Ue1}<vZL+1sLq{Qy<iqiZ+27zuBjMJo=NE<J>@o4
z{vIs1+CTri13|OEAmM!?XCv&RI~te1^b^kBsHS@T`H0(+v-=UpxdP1$T1HcP@#%S;
z9FZ4<;$mmh>}9J#Q~K$zKY}GQlo*fVx%yC?|8%@z!=b}C7s5&h&BhmU834HRHWp{!
zo9sd$>wZx)>eEf$gqNMb9<kg_K+hF1Q$RP=-HU|Ja|XHKyZu_JcNZ0v1%r9{k&EZn
z&G8ua@LFumix}wFY3`DE4KwxUuM0&4D%7#<Rx{^DkL<S55EWu)CBC1;oa*}x{%Mb3
zU@{pAF;GBTRHq1_MDnsZR{g7u1W^n32cBT?Nk1-^gq(t*fNzlFWQCm2I)5H#seQPd
zVDsp|xZ%Gz_1vlil9am)an}hVO`eyktJ=zh%oomWF7T3@22IpxHZ{>ZQoB}2(cNzh
zSd%CZ#-R0Mj#ElS<$G*%wMh#86W&zwO0E3Du(n5xe#YIgW!7}kjv1ePUajS<NJhS<
z&>H{9Gi<yv*ix_a`6Fh``U3iO76=NP7;X6gEe--eoC=@y2?i!$F{>v=dp^sof$(9%
zvY^+@D|md7>gj2kiC%M5wJD0gBKsAMKuM2eM$%nn>(<goQp#<bA25rB(hs8;kAa5;
zAawh~3hc9-Xt+j8^Pv;s0EA9rs6m}Dk_G`1H{X;BM}we{6Zhr|lm*;XXsCf3_=3zM
z=1O%IEb%D@=)hktOyK;T)F;gb*54a+9QK+^I13JfCu%~7np>3)&)n9=7tgD#bggZ^
zvxH}x$%hfXeRJ)g18{_jFYHnHL=&yxCj4njkEu_kYB5G1_<7B(w<ee+-QRA8?sLPM
zmP+shRCLY<mK?|wgwdW3X)(Lvn#j+rM%fl+H$0Iz8Rx)~<GPF?)$s`fLp-(Cd!y%~
zGuE7)g(mAORo*NQQ$Xc49@%uwPRn;nyYp5JJ!mV>g;e`bh{m4VQd=iFe)poOOd;@R
z(p3G^u02JcOqRqKJChSU;(a{mcFs{ce#>!nV>(puZ6aK@L?$tTv^$;zOU);wcr-^;
zq|ec&AozQq{;gERMDq8RId@K{_<rd4!aUD$ZH;H)YQKNgGKVF&(|bl?SuFo3pW2z@
zwM?myvX>G^M0q~5kujRmfMp`q3wH%^zcy>qKke4PJOM{PRgbPX$#ZNDZBhs}t;JrS
zRKJy+*WgVssAi^hs)~<KDlI}s6pzuVO#HAhK4VwERGgIEZll<rV$6*s4y<ZIwkH65
z=+CH!%VVaK?b(ZP5)b>FM(kv%G)Qd8Xve|>OR`Z!cua~Ph1f?8$NiY))*u>HF4o>y
z3IYt8_0a-4`R&W{2n<0W+@W}Fn|0oOCVi&qOkXo=RZ@0;+*cFoE2G6nrV}S8-gY3D
zJSR0n&?6~I3lLqPAI6wuOp108H#md?qCGz`*)XDYX~I)X&5<@oMpl;fi=1BGMP~+s
zO1Zd15i+jwO_COfIkRuE$)gZehj|%FvFwa{#Sg#Be|(XQ%P%DVM@#Lm8?G5e@aDU6
z?HctL8*SU-T~ux$){7_ki|n&2DGrn?DCYLtrvm`0k-S`+M`eHf|NpMTUfY`PqLa6f
zx!d=lTe+-kzc>KX`5ZsAtlvlHeKU)bYQ`q2%ymw(EMR{vf-2K)r5G*PX`z~30Hqwr
zS4`a;G={gCXGm;9AH(kz_*k&KpzzHdNlS@2JG80rbXwY+W)MTees0ayX&*}o>se)G
z;HP(PM6NI4`o$Us5ok+@PB3T}vFsdzUV0BK$9M{_Nu|*=6E?6z`)1kAOLt0lDWADD
z#rlj?rI(LE=?{x5ht#)=%9q82cr_YC<@B~R)*0e6Sae*2@a}%VSZ00p&I||W1)%%@
zOt$+Oe%hxDVmh4&PL8YKMZA4Ut7iH}sN&MV)UG9Upr^VzUZk{MO!X|SQD_u>#k{7I
z1RQTEp{5}zZJL5&YqO6&7c9nA<7wyG!_1x*&45Fg!|sh=qsszDHpC(h5UP;ekS<Et
zK31<w>U!7fPN{lE7+x;?EocDa$>m4Z8+SW=Ihd*KMsM0M?iH_!g_OW@?)97_XBm2C
zcq9R%oZ6|nqJf5#Jct!i0s6(T9}b<-;oQuG$gEn!JC?v?F2-CNI6;@}yRWX{gVw?e
z^}SNN^v5M7Y$c3(i=b_eFX~qjXe+PdLmfpNUyQIzr3Q)Zqx2p47f~Gx=HFprUGSqG
z(D>-;;#sPCvn@R<kprs`+vRBhZWCrI#1rK}({8-NJ(|4(0_vH4i>hDzCU(*(zJTAv
zEgtv&6T@v+Yy2?&=9z|NXk@ly&CGO2HT)WBq;BY3=pGVJq$4+++7@HRbpD)0e&IrX
ztjIJikp0ueEERS8-QpI`{=!&9$(1bWGF^6w!~&Zq%zx?<HAnALAr(PRlo4(E)4h+A
z7Bm#g-94UfSMl932993H;f|Eb+`^do-PeS{KLqi4|A3jf(f<L1DQJn>q2(=g940g2
z>0G)84KieO=mZb9DO)854rkI76LxgVec%qyXpISc9e`F8M5jr*Dj&;^5|RwTg(bc#
zxfYw`C$C>i9-Gh7u?US;Za4}$iu(AZs9*KXt;8*$C$Tg)_@=wmt&#BDfvmbQTM^1&
z*ut{whCy+ekO3Nj&T-+~P+$C{iO}B-zxx1NPw!?~5(kIY)6I#*TqfKjQz<cvAu}#x
zsP^i<!Ad-+C{ygQsE6(q30M;!_ngD_R!|;2E}SprWDUb$&(pH8YI<kZy5^op{REbx
z@S#_!*j`m)y{$!`DYjuuU5yGsgC;*R;%bvy+5&$zYJKUMJ^*NBKx5qZiWy7k7TYRK
zFV(08kLK;o9o(1j$){T3DX+3j6Bfset#LffPM?@f9ch++wW!1z;-=eLg#h`+9(js&
zEoMg*7S=l7WLd?1mUWC%!V5)*&|{K21{T_-=c{+bR;}~Ln4?0m^#v%<U91+pLuMK8
zbFlc**4$-v2Z#VAC^E3xDd2d2)17w?BvnE<cJ6gd=G4+>#m$SAr-BNlr!tm$RxaGT
z&J)QAh{hAgM+P_Ftdi;WFqXntym=%??{}SOGhN-P%|3Zd=jdMF0N*hvnc(<BMzBdC
zQN4I#hUUrh;HH@&!V;p&nkOt&N{<y;TzjMwQTH_AJFmjdShGCZ+pO`4BBzq|Rm;a%
zF}WnpYjSBHJF@KE3{2h!MQr5H9GI*&Vwx3x)GT!vj;7TywGT0V#Rued66208sd9~5
z$C1};W=Rf`xD;FLS%TpUa=)Ef<yWPIhku=Me9<S2w=L$;)3woSpi+W1CqpUMr=J@6
zL6PlrnV-%h=z%L$PX0&^=RG7Wd-Hh0&5+UiZSzMQ>sm=l6)X=6x6*z{>`aH+?AWoB
z3)|Kz9eWK3tB9D&>Y{^&JaMDbVHt;TO^4!6q0pVaCM{v?gjcD%y;zCXXdTr}VNs-J
z=slhMQ#$77--CDpy2mRi#|qMV-|=ZtCTI@JllI>P$ZeAK8x^0(4cjjA)=~35(pNO#
z@Y7LVd!&Bfm`o~PQ)e#SNkbPk#`@@`3v{E1TGaOJr2?*EJ&nw61Rj7;{->GIe}Aw_
zdEnazC>w?=LPY$Hr_tflX*J3|$(o>Qp^DUSYf2wZ>p8Ml!D1V<T@sGbv<YD3jKLiX
zzc<&mXXztE#8l^w_ReN9W0mRFChi*xa1AAR+;g4m8GMCh9x3gI(|c>PHrM9zu2P84
zg;omi7c7jq+K^Yae!vVK2fFEGoHKGwaVHn7Sn|O(uscPS$f~}dto!7bJ_q?qOU_oc
zrSHVMOCa1UK0;ZU9{<*`1qt<lWOzr@f9@N5!72AO_o;Ize?^NO<;*A{x_iU>jGZ&<
z;<@ihv+9jowS^w+9ef<Ec8e>9UssdWzGT**EyG{=f&rBwF4&_t8V_PAm7DrG<gUST
zJBL4DN-f83TE?m>1u7ySswG8Oiit(S&7tjkmw~%1;#_<=tU2lN1FjwK+Cy|SVP;$q
z(lk)cSXN8Sy_(DmmQ?pH?Hy43;$oTh$df+1<grw%<(0{!k>R+OC-AEC8C#$k$QB~z
z8=QqCvfxkFp#+=|ZW+J3_W88L!ADla(NnIUX+&)ky=y70h`47sbrD-|i$&7^p?qen
z(}{XYv7=sdH<?ngWqkW3SWxv++-e5RWIOa?Y`QKnaIaQi*E6fpqRjDtW$11ap&$#q
zw56weLcSZ~WJIq2O;NOVh#^Se<>qCI-v!4bdC*~%4deL-Aou;DcY^pT6JXyV@Z2wH
zinFssFTLR7KM~1UHe!4hV$O?7jal+k>w^^r+ci9)CqOy<!s(b$IMKjP441Q$pGgnc
zpP>tE%=g9BoVX5-6ROlu*M<g21vM#B9~aGiALXj@nCdq!(O+hl9i%~d(dYzDibugb
zlIOtSm)3ET`tp*!|6yJX?gxx0Q7b@U=gADPNxb4X9OspM?P}^N+^~{mK$5|U2Kr`6
z?GmWZ!B(uO++HX$PquC_G^{j-_cmC<j=V;mdt$(Eq-vHiS5PTDKAa;7r8DpQ<;{8_
zC&I`BDWF`(54oq7jq}cUi1bSGwBBEKo_npGu5d4BdfLi((t5LK9e8j*Vx#0AdDrb?
z?rIe(ob#$axM(%^5^eOG*^H~J(cGu85Aux&5DbOzsOZ;x35}Iv<<1PE${iv+iPH7Z
zc*^rO)J8#bg$?^t`;B8fPeYFlneuNuM3)X3!ed*@YSpXk71hqe){0l=zHXLYn~iOQ
z&pSCP;YBS7TTo7rQet{~Y#xoe4BW-m^9~Td?>)`U8`DAuF+^3ldoG3se5{_HExIdV
zN3hy>*(hJ@nHcxBGp<x7M*7`AJS_%1XDvei06sp<q?EIlUK4wk^Sh<-=%((eGgdcu
z6P*i-t|lo<LDd2^El=~ff<B3NSX)DHXZ4qPzw+d-%0HK;v~Umhaa)1Uy9zin{npeC
z2q~p!%w^e_qpwb!ryqq+IqVwTZ0{ZK>5TFFK>acY_s>Tro_MpL5kCRGnCDksKVX)j
zz5%Dz?O}H@Hb*NP35WG2<EN0G6PAd$eE7*5*4>-zFZQEY&7m(odl_|?Y;LKOTu_>%
zvX{q1X}w9<!D98}^x|Dm5n9o1I%6HW0k`w&=b337KOI$*QWFn{P;SheH3#h)R}1hO
z@+;LJVyAw(=*K0OCT$lmqe<Cl_spmk{O?ulzjK6~KlSEvROy(R(0m+<JP%!fl-FIR
zUgS1b71$Ivg+<S@75Q~YZeuS+&Gl?So^JYDB<rW{=hqAqtX$Db73wAApr`=tf%$9!
zmLk?$fO0SUvDx~A_2@z<m}erjJ~Uwb+^E*{qq)$>OU>Bz&3RRND2l}r|89I_Tx8r;
zoatrWCH1Ow&e4_XISrHM<5<8i^w{GQw8ce7(=_yRsBvn}P2*I`{ZP7i_uxT#ZL5|Q
zv6<)b8Ib3s@lXP-@lL<7Znr#JhVq-X-j`w20eyj2ROCMXmgO;SrJWg7(khie_rS+)
zlu$g6YEultJGO&d=48$bAf#fw#;e(4#8=#SraCSqO5U!Qy!yO<k|w%M{pVeNTaZca
zT)k%+U4SGpXXhU<pkx>nAOQDg1y;lh$-CW4s}*G4Pjxjf3hNe{#BPunX*Am?BD*?Y
znLUd+Dt(mRpmw%RNh(Sry}G|mOILmtZWolVpUe%2vIWF9nhzih1X;tE=bzu+kx4wx
zN$S)pVyel{9na@*8b;zcIV(=DdiNGP93mc!nee`#IKwvGbU4JH@d3kC|Ax)}2h0r~
zL}z0K3FZ;bVwHc~tbEC~6Gz6><#kQuu1Ixa<9ph;0_8DrZl@E(o>Po+#>Fh6;M)B{
z5lIh9C2T-4y>1IEoCiAd=AGQqFLty<$=O$a8Sv5>gHLCw%EdIHR+J`4g?T)3R3@Fp
zMn0XZeS%vi#8{vT!35UtfM6L6?N7uf%oEymr}x+D0Z6@7*)55Tj6RG7$tMnlWQSnH
zk7&Nk?qP96(>Hzzh+8knJNU;<SCmpYf-|Phk!o%Cx*8I*M`>f#RG6jeO-@{TUp)Ij
z_sePsk=8-lBiHb`^lWdPANUb948Tj*VNK`#E(};A3#;$nrNBl}-}GvjQ-TuqE#1W+
z#fAiwM-r(-d5;v#Np~?HkY#l^KkK&RSiKPA??{z?*$n_aJYn2W;N9S6QOPu`+B;{>
z#}7}()e+iX4jAEmC<sx&tqRc$LQ`$F(#OE<$0P|(Stl+-!1;rzpN{E(1Ql1aiegQ;
z+@{<}TJS{LAnUlo!lKnQA$m1o!C`MJv1JDIx4B3SXlzrqp)k8gbElsxGLzda%=mzW
zFU0Y;G)83*vmyFInyNz7)Jh?lCXaop!{-sASr_A^-ISQL=G4DxM+X0Z@m*4V2qgf1
zz5J^o*IaOdFg=%{ocT`Xxr{Zkh10i4=NgW9@0yxE6k+#%7*<wTr=1NB+^OAXvnrgq
zObFb_VQ#eoqG2PD>(<mgd**4L8o%+2qNl<kyn$(;y}37RLmHVe-LDgN#dj!261iyP
zQgnAya(h_Q=N_@abVAeIMpY8Gr;}^KCN5Fl<PF@*_L@$|G(J8L?`O3p7~Ia;%+TuN
zy%vWCzKZX9zl@6cRp<tYfjlb9-bJM^G1QdZPZciO!LVAN3Up+Z_@+c#QK}bjH@0>X
zM*b>O^_Qi>GsT^RYv}>f#L946loW*bu%8!CN0<d1HqVaiMvry)Y$|KR3HJy+HYaLj
z6<NRp^)}Tm`wXj&;n6AEU}=5hpV6#OeVCs5CDqX?Lyk4gS^Lv0VHnQj;BbY{1=W!b
zLEi9GjLB_fy1}(bWa%GebD!$PL-qMT@wLIAr&hizj%J~o=QIgi;B|6HCQU+0ow$_Z
z_t@wX&!$P?^Vk%P<F`Hfp!C$<n3BUj;`Gwe^`BHgtN)<_3ZDJnsDQ>WupV-kM*u%H
ze|US-@Yu}Si;U<rI1Rd^9n^uOILvu=lc@?r(MF@;F`DOZ8xW%(lOTm)=JYU!01{2%
z$9UH74<OPy7O6@lP>h|LJ+@F+mn>!au&kuwydI@W9E>sRX_G~z@?++o+LB%i>yuC7
z1Gi)?KgEWme%Wp&M&U$xHK2}XM5QpAm7`~<pOSmZFU#^a1&(1xoXIH=OVl7t^u_xG
zW_urfT-X$tkGFDsCa;Ii_GRY9OmF?t6gIu`^sDwIwZ6ECZ6wx)8J>-wj+Sl^-etRs
zL`bbsTl)uzka!|%>ch_b4?gp@qif@Y^ai`@OKuZqP+FbZd~etVT8rK8d>$>XF`8zb
zdXr<kO+#-#*nXQ5m_xU@!GZ?J{N>Yg{Y6_m^V(w<`?z9AjZwy81#OdaeGQ@lU!voO
zS5%oq=Y1nb%JLKtn5Ax+(*=n#)hTJcXl0R|SoqCenDHG*53yBJjx)=T$<xOeli*Sj
zov<efgnNzYXIIch>=q`TD(F)Hg2%u@0I8|`9uEle-lLj>PHhl$ixl^&Ot9GjdB0Y<
zV$nOu#OqiSlfApA&fq3n(#s7!QVq6n(=$7?DJ>oUL@&~Tl&y=OlmEz1Sv$;MW$jLW
z%Gyo-x2zrOf63Z0|3lX9VQ2+`>W@>=_pvNxR1Idm*+xpb<jk>Tj<311Mej<Y0irk#
zt3dG})KQTH5=3d;9EP8-P#ZeB+vFU`#FO!YRo(nP@LVA6hH6=HZnF`9hbvqQ6nHZ3
zpH%z|AxzBQdHWn(wZaNGgUVVyudlgAC+kw^Q|Z<pi**cl^s&VGCk9TQQ}3!Ks&3OX
zg-3@(7{dhDym(5ODf&T;EK($f@~P$u0ID#CdFeGVqK<s_=4QfaY|d-dIObZXVCIl#
zJYF{~@|x0eJ<;R5sp@t86K@)#G4dBzLHMhgk%&4%@h{svu~1R6*bn;Y90-{UcA%Ka
zrDNQC{tbE4xe8`Gb+2=~V*COO0LuE9x(?s=`@9|jE0#xeFn{O$aSvk1opNj4OV4$i
z(AjS-^$4GS=+1@E3aH)~gFQL-(gj1q0i!&8TVL_8_U^dabNA%ix_J8Ok<Q3VG=t&I
z+fz)VQk5GP{BR$Fvi3-5O;>WMahjwC4cC>+RncY<ht9LKgFBX$#Pi<P$@a3Bf3~3b
z*MCPog`{Q}H@LFyEWL1){A{Q&t~rw9T%~k^gal8g5H&A3X!IrGb42L-RvBwT%$Vnx
zq9SJ}vWBcaUPHR@{-0oAR%R|3evRq1A=iJe?7wvncw=C<OlKWu!+dM3&}9+l1a(a8
zqKhyA886%tonZN6$V7?abiYAOEC`Q^C+fD-^QQ6q1t^0!HHkML5A~GO5xFp9V=uV4
zhZL1I>@>n{m`R&D=V-g5R|`de1_iW-s*7ugzk67B12l>c)+>NZ#o*`C4+d6SsdTgw
z>?%Hc2UkK1kL-yOTz&b0bq1+5f;~#eYqRD6Z*y2}=wtACRVHqQ|Ld4(1~2AVeyQ}u
z9vD@VKN#wNxNLI=Z1Z;tHE$1Hr{P`YgW%|6(koZoW*7gDj?#+XXAbkY9hGGe1HYFl
zvFsLUYtZU;(by<p%;_6bwhP?}{++g9VBQr%zUSR3SpaHC0utUKa^nqDd&}cq!8gM<
zd>3ju6RnL8=nwa|toy{==gZ-Dy*&WXd43O0@)I!dv*!%<_d~`4OKr&0C$KZOynX0l
zvS#~-{qVvi7A5^yW$9`w7cquk@Iu<n(w1h}5ZBsz=1Oq5p#>$VidKbJwv_vnc8*Uq
zBNAy1P-ubCQ*U`+w`}axlDiE^gQjM*N$sUgZyrUIbI-hRLd=I$c~ZIGW=>%tSKEQi
z9o8VPIxopawXlb#G@x|5a1U2M?*S+V0t}vh0&8=#=i%cz>r&?owU_Dr_c#}gSFX1M
zQis+h?gE!TV0K5Ab=XhSF@C@pUjKmcuD!hfmGAzyR>Ty(0@xJFolYLgjsWgm9S#_r
zrr)0b{W09vyB9xTzT7o>YeS;sPHKYSE`NW)U#`6m4>8I8daZZ&X&cFp-%HHHmQEqs
z;CevK6m`&>WAL%u#ApzfDTHX<dtaZT_aPtU=r7;;cNfBHtDglQoVQYX;xCq#mNA;S
z*Cw8q1_oth5Wh+dLeo&5wV0^D7=Iy&zgtR{BcDoYlAFd)^EA!QdNu{7>}SLkknn)Y
zF&-Y-PAB7sU}b=YpvrMrj&=<LNtT}PSh;RJzmWEx<GHTTZu!gMP+t=;XqEv{|5Z^>
z)DylfBw)DUO5c=S_%Ce`@_{*JSWe+%wuX+tGNahBbWC4mYgDw3Q2n5r!<CdrSmMQ-
zDxvqGi~8KG_770z2<$MReYmC~4;^El)5yx(YmIE=^V?(W{Rb^m&_s9yeXQ?2r{xR>
zYqC@#EB>xo7oQ=Ma(||#hhCv{vOwcM9^kk3_)iVD`Sm&+_l>qrD~W_5%I?cs+wTVU
z!guo93lEHm+bMnVE4s5hUejl&_QzmmA3#@=8@fHp@#wcb|Le!F%5$|Y4#nH?UL~X0
zYTTQy%7J}Nx8BptQe&@FRn!lq3>y}S-JOZcpB1y0%k|X-g`4|jrAYBu4b`<zyroO0
zWKsvBfoOmhu`%%?0Oh!Y`J&CrmHwdhhBuRI@PhRs_*|A}uk(WJU}WOBzUBL@vrOQL
z*T`+@@ws)L=klr7av|^4GUJJ-#0md%xSzO$P_4J)c=(C5Za0uoJ7U?A<y!613CtX?
z$*9^pPf%rEO8`jSBSzC9sXU#T`Ii4%28L+*ePzD1*FWSvH708VBwNqO>bRHWWnJ!~
zwE_O>Vvc565=n9lw>jQ%Yv<X04za@;Nk`w}D3(11H4f59WNEtL$K`$llK~uxed>aL
zFyDV_-ha}A*|C4^eCM;jB|*m#vPeL|6f|>r=ZKTKCE+CdLSb@NZk06EkuG|)Zlqv%
zKV^Mll=12hegBKq#~MYBTLrZjOOUm%xzO<=dhYyc=w3~|Gg?2MUe#edBO{8IB#}kb
zpnxk&a-UMg=A#Nxw!o;3LSk+((~AfI--&Pm-JZDiWZebk#k>Pp6@F|BPc_Sv$U!Hf
zMe7=MM}m-d{T$QQO?wlHC56d7x9R#-#cT7bWnF?pq6}tIE<@EF!_Nnb?SlN;zuG0V
zo2<aVkX)La<lxlo%#{XsSIwK*&ZTFb*GhE0$xC_(GAL0O8o8A5LYCXNjmkG_r8~Z^
zO%M;PAMn03ix#MQ+zvcAEcTk)nc}dMOsBtEZOyvFIQIi++@2vWz(DeC^=8d6m0-}@
zm|es0SIr@n<+O>)J$fQvmnrzMIqai!|A{&1KlKub^{_;$opxWtW`^N;S|qOrDn|6b
zpd>b;(bj-=u6A5eVt&Xa%)DKRrUWB;6s73n0^7$Hz<7VB0N|HCx7-W>{8GJ^q9IdP
zOOy$jQ=Xe$=%gqJRH)8wYMOFXbrBNDyiXYjr;O{21z#9UQKEdwm0suW`vv@B%%AY9
z3}PDzpd8?&h117LwtLh*9cymM5Lurvx7@DGC>lUtNY4`sodPXIsg}L4fBup;ktb=8
z;=5*1_@@lMrGQHuSSCS$I;uAgZ(C`$>XNH^!<g5!dN{zz&H_BZpH?+AxivXzn~bu4
zIB9WALP??$1=&*(FzAGjQL3z2t}CR)fx=YT02X*|MHO)&&1kT|o0uGI<t($DzIQjv
zGG*#7mW`vWSbFMw_1~}nx&o#Df(2~<?_dF$#J%zN!L{KR0nXkx*JUuxD}QcdOtF>t
zJDkn=8)#FMcWBjnoVq{1_FtWW`|R<t6BnI9C&Ds-0a;Jd3XJXlT9YaT7enw2GkU#l
zpyh65(>@qi>dUQd)F$0Fg#UO3=loyY@z2k}l~h}W@2Uzg+-tNRlt=g5l^jR9LO&N9
zsx-H?mNv}A@;WvMLB|EOGK?MdK1Et%y@I?JuM0F=T($o~*-%d@`MwI62%fG(aC^L?
z1u6w@1jW+iH=9|z-5zP%6Z5O2PdPwK%-?_$VX1VJs#?N@2F+3%q++`+0uMrl{8h@-
z2n#h!%TE&(+fjCajoHuKTDWJ<;bx-Wr<*4r*IIyATraDZCqUSq*QM_k#ceLax;C};
zP`PF^l{+xu^;T@>4;cO|mPoSt-lJ7TfTsrLr`Qn;?NUzGjZ#1&Vfz?MWM%h8GJ5#^
z$3j}WJ?#}u<=CZ&<dzYm3MR{viOM2_uz1O&i}DeMY#us@6qjIWnBC?Z`0PKL>_71$
zJkkNZMB9+tdUT4VTQD^wW!c#-J4+RF;)_ChnM6lDK*-*l&_Bg%0(If`oNZ^4-<}{k
z*d{5X{$t<&S{?C`;GqOBD2m8$Wy<ju@hX)VjA!k^K1(yB$rV}7I~T`GfD$wx)Svm>
zl3?j&gzEb+$qRk@SQgD(IXO+=2Vm;`Eklw0s&O>*63wM=vqg28Zbcso&igt`cX+VR
z`c>`KnD$6LlyB|zQ_5uWAjTjUd{zZw3I@KQjjjjzn@>N#Vt}5A9i7`sy%`%lmy}k4
zUA<;?dj5s4U&qj1aewJnuFSohn)o|jQ_jb(_>Z_pmW;Xv=#r}iZ2F4A7k|Zd3gR7`
zNy=Vx%xVjugJw7azMTxC6hzK$met8i%G{5b%b7cx7AjRfdsm`X_6{Fy;1WH|5iM6?
zfTD+Q7U}K}CWHIxO{4>5SQ3~fT>JsEztskJ`RNar{HFSW_>0bWop|g0xl@S;jxj;)
zmEWBFHx7}}PJ)?cz3G0yM6kgkwh+Cu)+R~NQowq{!D|ZEQi>KL^>`gC-CzcZ8%SVA
z*2L_B0;2V0?^~b!8*TjN(&t-^goCE%;NF-bnw(0Am18PR>9i!aa(3w@L#B|d^xPH1
z1(5r3Fco2|U%aO={`x#B_)e2lA0MITmjv?lpR#`c*a~i*P|Z4nUM;<;?=5rEQqPfU
zvNCDB-JR>ktb{KVEs&q*T=eRTginu7h;Jw8nH<y73E9-Vb3{J`BONlp^|ZmjD1%~%
zIA@zd1=lx$MN}pcFK+Ux>z%XdWH}z^e4%7NB$ilu*s1OoD$y*OC#b9L6MNWjL&=;?
zD)Q=1eWkMmYbXdj8rhJAGktsk-Dx?ksRdNBLA|ACB^p;f^5zl2Lu$=iv-a)PNEh=4
zo<>f4o=#4bZ>kgh9zwYuMLxQZ{ege~ZGjaL2xq*-u_oR}18BQ1&U9hj+kSig_s7)X
z?TlB9gS8ijuH=Bmfyp<<pFd51qrgA8>JBsukCW?r;gG1ZZBSeLysNH9Rx`uDtf|g9
z<TS}^=w;!i>BaqZf2EH;Jdh$FedG5x7lbe%de?5Kn@%);=n+Ik6{nhSe&|L;r2GA7
zc!X^C6Z#^H8q-;%T_g(BbI9}Y0@+A51vz3&e>rXdlmDyd1aYeOmMR{438*=S_8^Z!
zw%lJ!AvXYY12P;fS-2^lE$z?sZ&2TS$Fdiu3Xbsz!(mY6yehTHZ1KNJJNqSWr~07i
zc5H52nz<d^lW8q)&tByo5bL@&AQd9qA4fCl%nr=)$#!F3S>gjelziqoo*Qll7mCx|
z_DD#3wPF7>GO!)g{QY(i$NcF3Z3qAS-Gf!a%gf+F?%X0Pam{Qu@Y+tg7$G`q8H6|*
z(p<#ZjwMs>+tj^V*u-B<R>4=_(rgV4u@J6sq~6T3z(A0A40wOr%~pOghyw9jZ)e=z
z%kOYVE<<bH*ACy0zOFM)j!ZL-aqSnOPli_t$hB<Ub{Z~nP<7u595uF%wWMBp_uqQu
zAFq3~c3)6+U5Owq07J;`_(U8KB`Db6*LmCL$y4jK=+-DLAA2h=xqJd`0jJ#%HJhG(
zupKw+@#yM3xcTBMgY69kXE5LXmtNtH?3buBv3gf0$-aAS-C~#XspPaud3OFPUvrs3
z-KDssn7cLS?4m%8^f&ej;cqeC710(wmeizu*Y&<ff#m1&{H{<3oNNM1k;C=-kBiF>
zEfz40F26hfy9)gCD~ca5@YhZc%^>phbxTb6zv-KS`6lpEfflq0IYk3)Vh)68@2yJ2
zEa>)J6&CRh*@eW3Y>pg#WJA&d!iioBo5F4YPbQ4c%J2H}mn(?1$^c&6dcPUe{aofS
zXF=WmgZbf<rQH!>6kE6HJcG72c0<gYS&BHXQ$x;(BEx|v20^A66#a_)_9?&YoX}AB
z4@dG&Z;U4%TIFWo3Y*BZdGrlj8a8=Bdfc;eJn!uAjc6(fju3J2z`aWIe6tl<=Jgfr
zgFV9HWr|f$+Zk&=e2VDG4zb<>86Sh6G_a2ukBAtHo#CIzk6Nlo?D6tRjnDDsWM#iT
zIT>d#+-7quI!Y)UoXoCJj0j4#y_@ihesE>+8uuFg2{8UY3xNL*jemx~%bwtoIP|{9
zfgjVSgInrSMoxt&0?~8Q<AT42@bELA42_`m-2%>`eq`^^!*j6A9?r~P9`fH^2uG<{
zx2!th&T8#UZg6<wS=rz)T{kwJbau^i({Sh~rz$-J78Dm0sV%Cms4nMq<|@#pUDKK{
z#GqkHLc+&o{-X8?NbUKrB>*h?6@JA|%iDTlIVZHLA`MfA(4n5`2jp@$I&RAkrjwwW
z60h^9?oZJPU)vY@C#TgIzieuHz#|=<3)I#9q|AmZ^r-8(AV^Exs;cMH8jo#nbw7PQ
z9G*%@V;Pvv5olR%<5?kPHhEdBT|ey)@(y_Bpx;{zfpU?2{S_T9(&xoF&6HZ}c>FH9
zwwvSrIAt%Tmr=u(N{7G3QYdek!im73$%4nxp#*Urk`%8*vGAIyl8l{3@?Ew9?)!GS
zw%=I?2Id3$`rt$NeD6(XW9z-g42<^oKV0yeDl*x3fcF0M@ZMcpLZD}k8U>is{!W1v
zY++gr35aLx<LHcsXQtpZMiIs;#Q;AI{N@@Yak5~j<7H!4-yM?E?pfDw7iYhp?tFl9
z^LJVTR_}^8dGF6H#1g`)7oE6*&E7QIt}i*~n5*SEIh7WKbNHF9+PFI#qASqN9g}Wp
zh&G!UGl1LS7=$tF6d#tQ;fsN0e+t&J-X-1x+Hd1<?6yE+=^hcmHlgVt9&XCHSMKB?
z9bMtmgNfi-s^>x@)@9lhHV>u);>B0JX_Y+8EPCrRagx$r9V@>q1Q2<9t4XiSwRiXL
zN?X0lKg_2JXVj-Q8F>0NTZ(hegXf_|8g}56t|6>5$yI=4>O-P<<ih%;O`$jRlg!Vt
z0Yqyj`S1pZi8hz)Y_Z4fQnthS)Jh5`g4L3W#C4oSj-_P209;fVfz?E8<ymr0i=sMP
z+-y^V-Et_&6j0q?RH1)sO893FZ(0wun!Jm78v(rT@F)16X{LWxG6f#DuCy{NL#j8T
z?<_MvHMjlQmH+zhcn>aZ?&Nv#$9?kb3N#7+$>Bak<&^KG3$P`H6SpH`nF&_uXe$ED
zUydNiQ86pDOQm4eb64oj02}GQQy#;9bIMMI^rH)XH*2`}^^gtUM&)e5JBT@8AP}EP
zJ(MC0gYUNNHW)*!3A27GPDd?9;Z`0ukjS7wl~*d{4K(@NNMPaj&_Va)pgS$QbmntH
zS3eb-y&1~{@HQ;2u3uGonD$^Jhtl|~uW~BdeVyVT-R>hZa|cj}krJgNmI7$XKUM4g
z3@+q5c(YpvrL}+@pBg;Pkxg&7a2kd5%`n=}-B)@-gwt+nO5Fw|Ta63rlj6+zZim<6
z)H)>|0c`C>hJ&}C$n$|d0jc+g5)#Wm>V5C(#Vx|<IT3e;BMa}F8XRN^iPri<(?hEg
zcF7t&#Hk_<#Lkbi+N&qvjL894k-FYSl7C9Q|BuWBZ<m;j@EJGLPZ2(m;B@v4?{~4H
z6zb*q_gZof7FL*u$obh7#LK9k&MF{~NPS62o51JhK*m}7JMX~H^_Ox{O747Njb~>5
zx_oju`GwXCM$4MOs*Z+*{@pE>io*SB%ZtZ`)2V^EL?2ZWLR}ik-`mt+(qQsSrRe@N
zIT~xaw6VYfupc~tDb;!58Si$Ax;~iV{={&qP_eORE4Z~KwfKV+lX|v#EIlxc2Z7Jj
zuDJs}o)1~dU`<_tl}38sKP;@IjjzyI0BAh3>$mHk$#Xa9>G??&PJytDh3*>hZbDt_
zx{|7-`r2dXsmDjbnu@ej1I^`w!J59_M`GU^=o1TT57frt?^m!D0Xx6}g6G^a47D}t
zYXoMR(Bu*v4d41|V^cp3r}#9Dh3qt?xTIVv23M%t1|)~@WA>L2@hO!n9#Pd^28mBm
z-~M39|ImWq8^9%BPXFO_@EQ&rO?!6`23@0$f7w46!!(T(k9Lx#AUErSrCGw?+RDvg
zW1$PAjPIswFt+`n^MA1l?`jTatQ@~fJhyggXlQFLI9f6uZ=j_0_x1SVM9pofVOS~K
zCnqieD5%zj#Y#E^W+u5@WwW^Uw;I{NbqWHl0M5WBd;Y#rywm+&E5{<RRf_qW+zR`=
z6@7D_)psE!3gtD7HQ(0ANU{>hHGH_?>N+f=+Lt~mY~wOlSoi{4a9GtPMYl6^+uUFF
zgO<PB56;F(0Q<q)!y>n_ugt&O4i@}lJ7@=V;s1{9;Mv*(|62hjfrzTiz~txFoKGJ2
zR;s+F)JqfMOrnz|kM8uKiH%~B*-SE~Vn$Y2xoWqwh?t?a_?y21a3vSk;?_k}@m3|9
z_1gePQ7U^nnf=ii<Yd{jr|Q}=*tJ$8^9arBRFa!p!aJN0dzqNBC`aD%@bZ~oe`U%q
z8PWfp<=uuzvCax;{BBNkHjst1DhQ~yimaCw3&`^i-SgijAEU-Q%%tNS0Nmn$W98kA
zGvHyPI=snh4F%%vIFsoZtIw!_eLdpYI3Lo;!m3K<><vp_N4Z+l5us&1_0a|`wt12r
zzO(-DJ&tBaW>1M`MPraN75i;qU!4Skbm;AVok(lbuMCGpZt>#Q1#&>Dcl3tNZGB<U
zI)KKtMqV(Uv(3MwZ`EIfx;gBRG)obv8MnSDA^6On;w<)-Q7%aFyS~9JiufNsn}5$E
z#CCszzo`-JK;`W09l99&F)e60m1--XKNg&DyVFaLP2=p2UGxN0IE|0*_g(@YdpxQb
z%<)yUoZwcgCpS3#lk<TWml{<vefd_ns`t|$FsD8fQ}^W;Bh%JvIQwNuTlzc1()Tp?
zl-D|wD4|lDEkze7oUNPu`pc)?SFPxC=}k22Lgyo!*L+P?UD!LD>Y~oF!<M0r7&R8C
zv;^O0CyA;eYdb5OhdAu~A(2{*Uk56{@g#kV_7Pn@Hi>89R>Ds2@#?M(m(rt%8u#mY
z08OJ7_k4>mRgTu|K4G<-S8PL_7Ywo*EwXF<&N!&yTIph&#NVW`*wraDpUnV^yO+6r
zuHHBrm#3O7Q2*)@SCZ|dp1O@%n_GDE#Wd`qoEe^XhB;^*TVZ2jQ-nUoWgJ$gOUW$T
z)EqretxeJFa`koFtIJRs8j^Fp>34`yc4io?fQJZ9&2a8n3LK4l;)sg4Ik^=VHp7AK
zR`YQ%F%(3|u-cW1i~Ty~DEZsQ7QbDg0l%~+eAbonJdur)=TBwv;R!jF3qC!xRzOBj
z#LEw<h~%M_kBZFfc7Cg+vu@si;GUMBpLh18MeBt}f9vs63wOBP+R2*1&tr{g&QhJV
z<yO>Ma^;5A?4EN-+~(XB>xCHT)DcG-45!>rP&W>#s|7cBeG%=_xdwcDJZDfTTZ4c$
ziEoHT1+J-5T2Dd{#+E2SUT-<5NPAS@B)!+as8ztx)ymp>&1}p5)#-6=jZL?t{UrXx
zHr;y^k9n+-XzJ+7WGV(p56n$w8=|ZM`oV@BO`VG37S4Q|TjCNtY4Z(}*udP_yhCMw
zXAclZ6R%8YL$@lZ-Uj?Ft5YAP9xSP2+b2vdzmZWWccOEh1;Cx>0MsG*S^NX@r$wAN
z!&>P}=Chb%@y^=PbLr`TNtXGemNCTXVckpVuolppx%Y?3!cCE1t(I6$f%MpSr8pKs
z(Jv*0qv1J(N<AhI!<ptfioOYK;^Y<G>Wy#%LiZ)hTE3Y>o^S{(JVj8sf%|LQ0<oH#
zVqf3z)|I=C-fKM|XFaC=(vQHLq2R7d^B~n@+3-}9#J^7aL_tiK-&Ab$iNiot@fn=@
zVhy>`oIANQ>bH>ke*c}q7HLfnmX{BHeb&g2)QB19Cr&9Lo(iZsC#EBgsFA}66BoEh
zC~VmXISF35qxK)`^GppIgTcL}Lu98BJEAKEk(rj?g>}38L-dnL0`80)h&T3K4i~9Z
zPVtAQkkN?-yt2JO8hU#-%G`Hy{bsxep^sMH6j&6&e<n_!LnXn--<+dABAum8n>@9T
z;Q+(tLW~?nHhmsVEh549dlZS6+p{su)Qjv4K5Fofu-mg&7n7<pR!*?n8mV5-OW}$R
z6$wr`()ebda1F*Fv6Ek+OYf+eIrFJJ)Bph4R!MHH1^dq&@4aux&Z%Y8&@Zlcx$#w&
zmf4|%J@c=&cR!q*ERBSo?nH0N0Lj?;`ZGA8m2r(V)AL9@l_rZ1eL~?D8am}b){+_Y
zC<f(h3Xl6ua@>b(t1Xw3K94#zOeO{6HG*s)iAfB!)_NxP5Kx8wAhFGOrw?1{{1tyW
ziZF0sf&-n#;pM&c_~ruk&ehG3rVI<=7q{kWw}c$EJ=C3vIBLfhtB<bIiNcK^0EcwU
z42`7F3QKSFKFDal^yN~m4l!oW?Gw-|{Cx;dQrZ?CUPnrYxuqK!pRas}T))UWi8{j~
zg5i2m3<f3XLW(Pj57z#PovLZlcZIc1xx3<4B4=`a-W`0^kcpLz+Q;y27oz$~@$M`q
zA?$_Mg^Xaj7f+_3Tz9$lYAd;EEYgRvhmB%S32(D}K3ZOH`FyJHtaQEgbs~b;)ST~d
ze@zfSn9h2Zm~Bp_^3q7&twg5MWGGswjdKA#2e<6gklTx%tK;XkuauwXT^}HDp1rMj
zQ@1apmTUM%e6QhQat2V1wi2)wlkM5>x6V=UT4eo-Q=+zfc)=EYVl>8M{GlM8+1sH|
zw~m`d7qZnnC6H*Jc*Ozb7tdw_F*3glIopcya^TH<KhP`~6*m8@(R4R#wGxA1qvyE>
zH?!@n{|}hTh@}{WYJLM}>kZtJ2D@PRH6LMRj&NSy(ThOy)Gua@1)Hg?i?0s|zk$2a
zl9Ly2N*LZ9?hL&SUJ0z_wZLt!P-5}OmwycC36FZmq^?)8Y3Vf7H)Oosn3a0Gz0X&}
z0Ls))?`o^aoh8@vdeny+tD7XYeJJyigL9xtP5-$?Ov?hGzBgZokgSzBArSWzUB!VM
zhvKw^w0mf@QEw9?iw<oM@9kj0uqiOBrmBA&Oy-A#rP-{>vcF)kuebUP267Ul{raUo
zm3gDwrh{t+=aQ3N4?0DH_L|*^L8}JrPeZV*)w}NHH!pEDMpzt9KRedYTKgfEW0G10
zeU`cHk)!OF`{Lg=jg_k*D9csKX7$kF`zC3WPAehX3~ERzl}4Wxoj6Du*%;qVf}T<w
z2E4$RgFj%*VKX8BG?C3}bUJmMI#;-cUGQ#tYY6Wh1)8?dov;nu(P`0A6b;k)w@e+s
zu0-a@RI_A$VzYMo(Bm(g^@Y1%BOXESafcC?L^w$exGL);?YzpSgrEO@`h(+@z*taB
z7Ih%@i-{2{rMg|~FtC?VD{^h0l2)soPLfj)1rHZ<^Gt79dw%#ob!Y*{r+AUmb^{g3
z@L+Dx%~@V@Y+S=>8c(cSwA#Osf{Ju@{oRmm`amc7GHRrF*PzPlZy7Q^5nD9n5zp3#
zTKY~4O4zU~Voq2K_iq&(#H|n_Z;5!EI3nrKOLU-G^&R2rmCf^ewxU-xRr=99fw<1_
z_v7pEBqPxXbpqYqTjBTUCd7BqV4bU;NA9C|lh=`ptr12wy90DML8z#zb8ilgg{LV>
zosbHT^5Psu96)7GKD?MbR@TmYRrUC_SxlvjUnmz2%Zw5F75h&!#Gj}4#=m`#cMcmG
zbM)HV+Vgu7ZAgcaUJRXSXbBAS#(*10Hi!(oLVv}f^u?4oy_!vTW|?-(FhsOM{?<O-
zRoH3;&*L&&)JrMmsCC(~ly!?ov^Q=IghG~WakS4v^kH(CM0{qOF16m1x0*Cp$u|Bq
zt-|Vr0giNcKblZNeH&8bYL@-=*{u7eQTMgcI1?U7s(1I&4sY!pa&4iWS5X!Jd48=-
zbY6^9PKDw!a(-2iuZ;Y+h2<uTjb0L__cK=rUv>Uq7Jcid^C$y7{bP-?&V&3l0aNpV
zxidqu)u4{<{hc@vdnOk0`p^*doR4AEH)k|@#$}S|dK=-5lEE)jA-X+WGG03=d&ncC
z*7HxWA!LX5a_FE9a<){X541N2lvXkeY#*?tYO4m{wsBvDFttb~2bjbPufsFgdBoJC
z%GZNVEe_ChmZ!8W&uV)ZxV5V9do-tRUVubr9KM=R8_ttEkms3Tpd3&(a+La3o+t6A
z6iQ<-B~rE75^S~;5HeMVW5r$Fa>fx0`1-iwFl*~reCKOH>wI>PM15U%>rF4dn$>qX
zjHwnpQ$5q3+2}aN-B*rtiNnoU%Z=*ubDQ=z3IG{a-NQFlvUuS5>iWhLl3%TbVW+7I
zLTJF6G}XXUR>rGp^87Tg*?5hm-AV749FxEz+QXW%J^71suf5r`{`^5l`o<fwdkdp9
z;+GmdiSG^&<{Vgom9JW*)$}+d8{cXr%P_Pwr7n1F&zxYjf)oZ}Rp?j}a>aT-!rl3J
zhs+}t?N|pMGyP;&MATV$^HKE(87=4ATxoQbQ&N0Rm9vzXxXSu^cFG{wWM5_RgHn8f
zgfS8c7SH;19tzdnslk2r{Wr=_z{4X>;Y{=R>z5p+$o6lX)TT0uPT$bleu*&8Q<u|F
zFO^s13ZO4mQ=wmwN^8d~P&ECbyCL<G-+nh;@WoHh!f(sJ<t1z<l&K|mc*1;cb@esa
znwviRoPTLPGNN>@zuBCbtmNXte*;+KQA^_)&bvNg>K5N+>(QcC*eLLNkbERZ+!f$A
zcfiriyJ=rwZjYm#H|xH1E0V7UwJ<a5@4<V^lQuFpZ^b}(XGT-jWFfU#H8jHfC34^C
z1RP=h9lymn;A7LDBSjElH>3hxaVpsMaTYBz^e-o=c={xdc`PjwYzF#HwKE={ez*|g
z(W4&TnglJ4hrX@Kev9*(Cgj^pX|!T~-WUIDoxl;)b_dznJrMQ1RN;fS)wJ_tPyF=t
z?LzBU1w1vbX0yeYYFh3ZaSjcr{FBAA`(=(X$O~Vm^ziSn&4_8EHy?Sn{noC(JAVGC
zlIwNU-RJ#1XxV-$r_THZUTO!?nYxh7WD{Ex=7Cf;mIgDn*@7=$7z@8=6t^Mu>=TKs
zSN+1|<uW_c5wx{krBx6->;CvuK8+HyYIQ!V(26E6$iCuUj6St$VKaO>F>^O1G6zkN
zDQU+G=K56fDVF%h43eN)=<O#9(%<#vFIWCg2Usv%jLR>=BZ!nHn>}+^K(Qs}^|<Q9
z%CDdnhF*B5?RcqMBZ851=a$N2_Mgl$C)mZ1GyG4j%8YuYd-~C%DFx$n0i4wGAo_{g
z4IfybgK=+U$-CE>@*&47{N8w9=bLxF4zU0B+P<Lz5@qw2t$x7xdvQJtpDtgvHo%Sl
zeC&S;v82)hAr?(rD(&BdSR90Y39-Z{CHy|bqCoOL3bFhp;l}RUMpIFfMaJ4%%>xVH
zA<DF=%ljPW=P!)9V`GCLQxfI|z7`+TXnlnG_)VWVL=NPUy1bvc#D~A=Lx0leiGu?1
zXwKmAuh)#p7d&oOlbf@}GOe{FmTF_F6XA@v#3DcOM;;#RF|BJyb147va^|A<0G8~}
z-v<XMZcTMQ(jflE0&Pz#Iu*wWrLT;bWOa~1<0VoJ>A<(Z1wu^LFS^?#b{6ZCGeAyj
zVf=df9F{jJU(r@=m1#`<y`%b~(-9(@Atm^MN8aga6)JA|MJFgdG6-5`pNKPMsG0YD
zFkDYErv8ExNcIo<^S=D6Gl2KUs%G!yGQ*-qH9e9Jb05@8bbGF*NbP<I<Twjmz-n07
zdk{xvge>4;cd5q9@Y`NaXsnKVb=(>9wIuxFCi!D3x7_3#ZXlIAj*XRj=U%2@PE}12
zcQy~FLYCAoZCSTTa`kQ^CQ|JroX^NX&!D@voGvEe-mmSK+yESBbQ5JRFP{SI=|atm
z&9u2)OTuC1v5RZOt8HeC<emv=l~Uqf1qF*#_gluheP4;YQ{Q`EU}FCYJR@>#fw@(N
z`IeZCWq|=GVZFeb_@*QDy;^r)(Z0p;%;K_fVkH&++nXEzk5=6jgBBo~^RBW=9WwzY
zYwOBi^e2C3sDH=l`&jpI5n{I0!nofOoY@Q6ES-Cjz<IQvCpwSU*HYN8UaehFqg^xx
zRc1OZlRtiaOtBIvvnQj&YL}%?P{0s)s)UR+wnEm=aC(~2$+E~g>L?>8+rqSTd9NKJ
zo9t5yDWv>V2r-E|8NvGK)NH|>GDNopUMZpp*c9m^b)gjjqelD9|E}}DtN0U%E%2DG
zcQz9;3vX%HtfOVw47YZEuugCY3vbF_ME?>PRMCp1x6(3hjKB%XW!-~I6Tl2>nB~)7
zi$nGA_RZ?Z8fg5uOu9cIpJkjClQ=!sD7mvw;bpa*I8Pr^!5I(^Iz~JhBoJfzKpxp)
zG0rS&_{dQAcU|~x<>e0;Ao=L#^{n>;=tyU6BLWrA`^hv$S@E0Hx3Xg5T?`#M;mU6;
zvfWOO@fzIm#|vMdL^yn5_-dG#yKqHwvU>m!p6Wlj=(GMAh5Q=@4rY{BUUq*YAj5@S
z%d3N*O6<%(c^TD7(g_ER+(H(b6V?m2R<l-N;;SLX=}(T0Nfy<tI%p^zPZK=>`6WVR
zo36Re)q4Q8O_%-wbAIF1h6NxXlP^ce00d;I({2y3)RNm0u|ai9*=;Gu$?1^=e!Bkr
zN0duxk&r5f+Lm(uzNL*68sFw+6n=k7sb4XWMfq$2?8iZAtsuw87MTBE=xbM%G5VL#
zm(XuRU$Osx3w<HlZYXFhUQ&g(>`n53VY@;4huU1bi`JmMLjq=#y?*DWUEXHB0+`u_
z<Xq;G6lpn0PAgFYhM>nR2?GHn%+b2EJ|}_eEwQW11Q>G~g%^rFsk8g%E#L++h2-dx
z!3Jdzul%&dnh}P(Qc_wFuX9+&Lf)6v-Y%L*j;ESlBkyMtfVk0fSERGZ3@-gz7f{sF
za@FH3!fx-THpiy>U<+YU@Ts9PN4j(hBsxBS8U*%ZcOSoWSmL;W;$sbIP@sKg_50fJ
z@wF4DVUJ?~!?{R6pe$%zrgofX2@dvLWN3J?WqO(|fH7x~v~`b7xim7aLIiJ68U!Xo
zsGraOob!wZ9PO)7Ck}7q;&=DSwewruh}7KeStU2G%0<q@METm*PM8(DmW9dHOsWqr
zyd+z*=iPdY8Lbp#jzbMYymWpWu|Qc4m(&j|tmhd}Rh^raA!o@~P)6gnuCA2%P;D@S
z6U@f0o(%u|ZV83#wLy~007)?W(*k7S{BlWqf*i(DnGgeZO(12B2a>N{x|->-cVLCG
zL{M4Z@LI%PdmrMs9jxQ_zleLQsJNPSVV5XL2oT&MxI=J<1b26LcXtRH+_f7B4&Ask
zG#VgyXuNR<9;6|-lhfbY7w21Rk2A(v`~2hIbv5f^&aPQKN6lGP@B2LF<lA+ZntcJR
z?QdUGMJQUAd3~~60Xh~*QdKJDDB56PYvIsf#|<U<`@Nfgl(6q&3{)kNo8lAk5U-xf
zBbsPGVm(KQ8O-DlQ)6jYNK5jDH72K~q76%vd7a^~c#ZM@h{0-eTCGuqtrB#^p6eJ{
z)cE<d+NI~pxc*jNv#M&5<^i!AsIq$nn9*m>0q~n<PmdIra0={)=_;{Q$KZ}>GT*5k
zMf-TyowP+mC%F-|GL!f#<DdhQcIyE-C5MDhrron<mrkctq)E8xY2}k8v<3@R@w)V+
zTO**chVP8})xh0JJWkcYpw~-*e#1%NlpvuQVv>&|%fy`_TCqz2Wq?FRz05#Wn&0bJ
zwxhW1s8bw=z#7R}Bbpy6xT3GqOcw@FEaHySx=9zC6EGCauVm<_2@%AG!9#OwEF=f|
zgV>1L(tNSR*mihba}sJMNN%WK$Z_=(DG}(1m@Ngv;%lWW4tcRNXl*|Oyn;Xa%%f!a
zCl#v}T9Ce8#Dn#;Zk<d^Mp)fkIn|sO5!Y)jd;~qZ5ucVM>H9)u3Bs}gcZ!$a<~3PW
z<QonX3Ozvw)d7kz9zxC?#`GJYS*GvIoIhcuHdS2CIg;N83*$GJLHo(sFt;QV!=KM(
zKz>3eV}50aPSL7Ii7IuIUT*_QAybe6mumZtm)-G=KWL4rMIpl*gHd%(j=Z$fN#<TZ
z6orC8PE%J^g|AGna$VY5TM7>XFxHE1+w;B0G}Jk5=$lzISv#c~BpiGz@u!lgw`<io
z8XcNJQlbveZzPRGIbShpD|2e>{sbd1QH$?^*T46S!t+!Lg;W5%U891D8fMaE2v9_6
zt`7@fd#Rskk|vEAY~h_CZ-uA*p=Me!k|W;H6bgGL=8~U0K?;@Du=e!cf#(W_0=B3|
z9T)i+m@K&}ZKPBY=F}_7BG!5W(Q-u|G?QI;u7$FW={Y?fz?ZnYwKSw?aC|yXn9HHE
z*K$_QOU^0Biw9~4P0LH-lVmxFwy|OpMP;cQi>JTxkQKow3vJk4<Iy?gL!qV-pj`Oq
z*g1Ix<D%UEtUE!Ob#9!Jx=FEJ(1_Y@)kk2i>g%okXuXwleWZ`d4Woy!1S__0|MMa7
zpH(J-Plu|o4k7~weK2}XthU5l^?2e{9K8mg9)YujCY*tr@+tKPtBpW$^JWw9A?U)z
zus(@448NlqaBvn;!<{78FGY7WA=i+Io*H5BD=++p#P@7#U{$k?z5{xDM_|a?3~_dT
z0<mL~;)SB>%wK|Lo;g_<clK(V(=VHV6A!EP+vUKJq&JpEL;^%oIE>^kH~c*;2acR1
zKv&PXJtKsr;qddIG&~#@MS5le68|cxMZ!V3<klX!_rq0%>IO)@ek58l(R-Aa6u<PF
zE_Oe;5G&~=&9Ypfi!~rEnB%$Dqx)R&H-<Bc5)^qUu2UlAV?#npVzF(NR0ZwJ9_g(x
zS7iER_ir8DYs62Z_EI4N&Bvs}B^i@7NIIUwah_A1sDBJ!>G=y~PM-ro$fkEK<OcA9
zPOW>S3c(j~W8yc(WKZMu%;F~s3a;5!<5*Yn$=yt#+1OD{E|s-}r^~Am9o3ETrVT0<
zU|(f9<$9j@VS5CZ&W&~%T!L5nl%bYalDHOsRs)Bqceo~7Vb6G=;!-qzbz+sVZ8`yR
ztULoc8PQymJH!v*Gnk?;r{R?Kxf7??>CSDI7FVc`PnH9O@#q%tXQKZ$#G+OEpeYuu
zGNkbF_))NAML6zKh2CB{zv-|u9L3*Z$;d!AuvDE~s@T3H;`_6kLCGq<)K5}x0Z=>u
z%>m&Rj=(qc^cn!4p+oH3)!BKzeJcuG?XL+ob%zFC`S|TJ;ngYF{M`4|AiV=w-gkdc
zo`@G<Xc<;)l?}MMEQJzR_z9o&yC7G?6dQe~4!d5j_>&(`5Etk9)U?JYYnYu|Fgp*{
zFXV3=U}8ta`C5A3YTo8nc`y|;YfmQ{WKnRY6<M_^y6_wIZJ))1TAUL4?nlSd42Qwz
z(fi&@dk>u+7JNbm`O3liZ0cF+(+Rcd!xyC+n;MD!Y1?{A(rwJs<v_Y3ItDa{pCZj(
zYaO)x21{0=c8v8cI<#k*3*EzYNp|lBqz~$wE(F)@17VC|w)OG$N8V;$IL<L|QfYXR
zScujE6sx$okG6Sc{_wVL%cCHjj|&EwQp?iCb5{xv+V!3br}3gDm=?eAVk)&Yw09Gf
zPp}yP&viI7jW}({w_k^&Cj4^l+h&t@(D9ogqbRw#L2Em$-pg4gVJoejg)l#rextn6
zx;n~Cx?!6fk!g1p?Wpx?j%}hB%BHQdOjx$GwK*t-0jTjN8-(5nLZz1`8@Y%fbBtXV
z09)2@9HVSE<GNu@8gz>hoFAqvO87~E;J^y$q)D{~DAjk457^TtNCWL+2^^wa-8Re1
z+X3z816HisUcLfr$&bA~Cc3ecC3#C-Uaww-KkO0D6jn}O!-X7mZBT@Sv_SPzRk}xW
z0i<|>IVxe9O$JVo9N*gq>stL#(~c@cRG$#bdWj(mDYu1|VY~OX=7e*YwVDKOB+T{Q
zPB^mN($f|}X>&&7!*l!(dddOyyb`VGH&tteGG7O}%m}Exn;0_E_@gQZhew(mJHj3Z
zzO;IW$!MS&Q2|Aw(vO{~_nx!CJShx3%GM?Hd(Fq{(&qqC(YF2MO^*EI%ibMQ%KY3f
zLMWJOI4|JX_J(DSme(-OXFYan3|#{Ln3mM#=LIev<gkTfqly9YB(-I3sKSe!Mb;V5
zMt={QQdftf7#`Vuq}$mKp%}#*gj|S!Lemoc@fXadP<T-Uy`^~|ZBCJ_ImyFwk>C}R
zmJhbd5AzRk(#k;x_($6Xz?@!m^<ku%%jP*{V|-{*#~+lf?!iILMe|>Q#|s^6kjv-m
zR^w<jAQJPhXXc~SPe}dz%GG9Glq@ZiH^#@x$uLy`sA)8@H6M}bJ(h*iMif=ulnR!Q
zx4)Qc(^h3qYQa1YeHOwKf*QCh7G9-F2Z9>2`8upiIu0hxDkj`_sYJzEmYm1?b}fuE
zqUObQl?$OMq#9c!LoLbUsIHwlWBQF6fNn=7g@f2rzD=;kG{g=RJnbUcjA+GAS}{wD
zm@20_d8D58j`w3>S%$FpOJQ^AGUF88D*V`uuKqmQ9$wY~_$BuK-CcVTLz{%di6wr?
zT>9vxM!VVm>9oI_V1|3Uhu>ul$u<b%%PK7h>8#W6A>ZaBl>OZP3ORFm!+x^BGV8+j
z81g<QGT58X{`x%A4VXqK!D`()KnoD2EAOV`Y}J1f63l66+KPPikm-M){r?A3p1)3^
z*0P6=IJJRo7MT)x7~8Q`v?i%8muvxw%+){g^DUT}YTC8eRHcpV=5x(<@ch2a#WhHw
zqM3=ONNhm|{(*n^e*=EMz4<?o^COl)F0t5|RRP^ds#w-sQrtgU-!_hJ$X)+%iI<vA
z#uajxxf*YNXilf;Wvb10$>r=$lREO)$CNK-i-ueU{)_u?04&QgSYma<;9(0;g2z?&
z8ZI_A^;WkljI$o8t1yi!Y2aiD--)<8OeNM$eUhfJ2j7!XRas=we3MiCmu~#eSOCxB
zL+Hfz2mhd`qP|B#;qJMQ;pTgHutrBtd`t4R`DJd+esyP;k1(nYf<%jnm1(CptQPJ`
zN(Ytj8>8uCoY&LYQ1?xvwLC2!k!dlR=>LRsC*FIm6NE(i?F2{9+-o)Z-l^+ZawD?L
z5HZ!XwMN>N>KFdZR_o>&4M+Tf_PDPv%q@=w<eB(zlt}~q@LU((R{UIh$78=PLbCB`
zcT-^=(BhyY05@o|`068!Gupf`{aUGCW(B)Da5rP$v%e(M*gW}kmrR-Z%cUEwWBjl|
zhuZi6ld~wHjdJc<u1mo6$-O<9XX02!x9{8>JiAk7i%mTB(chi37TWAwHT%{pzhk+i
zqO3V3Zpc6OEL*SLeaG<kXqQnp)Pq;GXCtutFzIP?SM7;j!@aMPN45HK?QOTR-qm3X
zug-L}J%iKhCq5;5ug&JSYy0ZTcaC3O6wXjC;hsr3M3<%wxC1!TS&<nSW**(|9;I83
zD%Y08!D?HzwE03jG`0MmBd{#L3Y#s^4g6JubwDFN?0t^pXwMW-M5tefiFK43FT-7O
z3yc5l=1=RzQE*3`U-v0>)xuEi7|FDkE#gxNJI3Z*l363(WHYlcUfbB4m34K0-O}P^
zaaqcYG$q4QPIV^9EICVA{E&${V>-hdfvudbC=D%~;YOtpy0r|P0$_Vn2dm`G0d#8A
zS2pLl*I|rCrgr~1_4wD~2I0};(p@w&myZMFp7+`g<8&Pw&4pAK2xFNlo>_eD;V}}z
zlecJIW;V5g;xkT`Qv;r{7sLZo-}RG^NEpiWsn$=O9}2LZX1^>4a-oC%puEo?IVk{_
z3UHa&Bgi%}Q_Zswo)$bA&IM$3my&qB@=VnwDNb*PIr(NT0TJ>ZW6UEtR9GGTmDHst
zawp0ZMM1m(wK*^1&ZnPsYo~0Ra_>cy0ZNs9b+xnm8UE)1zS+bG@0X12x<j`7wvZS;
z``I?RuVJs#BN*ikO;HW%FCg8^+zQ&Yi%o3<{d0U@e6;Q&$#>uM_UknCcCx)eN>T9D
zP+2INbe7`pH0grg8j_Gl4RD3tFZrLogsunJ+&G@tyclj~{g##5c3G2mvThwv!|a`-
zt$HV8#TTZc-<6SRe`;<dB4euN#fVUfjGbCjvpxULxD{Lf)X=Z;B%FSe;^b!HHsq5A
zm9EmT(*tRDDsoB^aqUf3cG4MOso2BX#Bx&Y!DTkBnXu>bsRT`DK(E7nC$?8E!y<d|
zjpeG~k>FBQQxF^sUrUnV9NAZpQogI^wV@pGw4x~4v}Pt6ZN*t8(mr45vsDnaU_CuP
z-3X@D)1=^%6K_>nEW;onzhMUM+>G-F3tM+|%fo5;wdRt(9|{_)&!lfYs?~WJRwvwE
z4MLj7i5Kx^4B!F@MhnI?0BT-Vmwm<*5R^__^_TwOX6KU~m2^n=TNhS6v}s5}NQ>7t
zpSas0GtsJl<A`w462+(yn~K-Avvh)ZU42n4(+Uk%a1VX=3n(m+b9qpcR>l9wWN>W+
zPk(J(BxAnqMRAS6GM_II19OO^;Eux>iL;izvRbuG_23E}gS}&fd3s;wUaK|CAJ2!8
zV4cWtccMo<A6Gcm_Lx{Wd|R(S%R(Q54C@MJ+0WLm#dT)TCw^n>qu8~a+|^!xAWNIm
zP$Q#XnjA!6;xEUyM)e~yUu^;IKx+L9@@iJ<fZYAeq|MZS`iF^+2P&`Y>emLLhO^qZ
ziaf;2&fFEsr^T1wJJ0pq#!1G6eD@$2pY^w))>bK6mc-oANjm&lm=jL{JX#H!Ob{mO
zYd?9(2WJd>0KKu0*LxcKhWZ2(or0dc{0|eqkEsZxq!$zjRT#3n)>h7+?4h8TCYCMY
z+)8BMa(QYI^$qEJVSHIy5^w75T{&n)ojykoGHdSR0y>aT;!y37{fL*hbO-n-WeU>q
zz5-dpFhl!Sh1-bt;6Se72cG(3rr#}Nh$0^NM2qB<o`Q^~MLA-2$9+oiAfDd(FpZUR
z!u@CeCq9)`7Ih0k&hl}@kYXs(?&<g_-$s(i*cr1yQhJsDbtZ*7D132(yQ^o(ThP#&
z`Y5*n4+3=}Kx?B{Gq-Z&X|<2nIHl8jTOC2Q08+nwpD3?dZ>)VBGRU3{i9FDE8yMRZ
zk?|}j9AER6ij$qdkE-Y{-p?L{5aCrD<U>{ZpLo|{9g#upd-NK~q~4ajlN@CX1M*0m
z1QiF$DFq7$OSVW~)SNXPn`_w#Eq=H}MD2(gI5-%LMa9`BZ87X)bB^!_$)>d%|D3(m
zHpl*W<~(7TsMscJ#U0Lg%z9CM6%utkI3?ua2yL%TLg-B5Ik9fFE*?p{E3LfP>9o;i
ztC}<5Y5rJN*FVs|KR6|C)h30cC#7*|6_5CI(<hS=0KjXZzIQH(qveTuZ0@zr#4bwv
zF`fBmxej_Y4wr`8<a<OhIe8%4SH8C+bFBd+gpIs6@#MYQgCWt`=S;re9Fukd!vwE{
zPwb}J@}jl&psXp=ftd#I$TBJYC3)q;+n^Dpr?ZY;11Z}cY5gkslneSGh+1!%eGSK*
zhUgpYmYy|7cb|{0HiW;NIJ$WTzArE0L_yV`xj6`)uM@PY(mDt_M=RAq@}l&($=)Ty
z%j{*KdMpgF%AF+~CuwQRM9&~U0~Ln7L!Qfd?_J0V@$%m10$CIywpRszG;a&hP}N<V
zk?1BnX=aY-0A+G<A)cAGddHEcgni486UV~P>(Loel^axRyHO|3++)uHHAJFcqk~f4
z2kkMCxBYfZZk#X~5HZI7Oc$y=OHH`&&an_DIgU-fP|<Q~_?ax5RAjAR*`hH`paJYO
zNJWL`q}X6lV^zZOtyjNu<@-x6J;#jqI&*Jn%oaJ*quJI9>=zWm)ib)tma1#X)1!s$
zGWmm37B>wXRz#%02k2^Ypo0Vtxft;`h0-NG(s29c3h&j!VluQB?^a4YJ;X_Oc;!>A
zdJ-*2M9YvI5roq$am_c7UPTsY8E+)bGc+4yu+RG?lWP{NWZp-<LFeCY7M}iT#_>DN
zj6t)3p#3@zmTOoP<%l_ee<;{%WfTwm2Zh3U;M4&`B_L#SxI(>kms*;+5hw=N0(-Ff
z)U#;+x^{3*(jxMf4;(0~THL|Q^D&Y=p!5DctNA6VUxO)Gq$Sk^H_L@sECJFIHM@9G
z75vz7Sj}g5s!;Fn?n~|!eNC0)v3G=5M$JS&wJiTEG1x_JhAMvgjak@uHqG_-0WIBy
zH=maUVNV~%7sHfME4#^Syx5>Al}o>b*mFwT^rPXE*j{DZ#vBS!4!5b3&~n;o*Fpk4
zz3V}jFAq;ya!ZU@D4x{UrU{X!H~T65p0hq(u&G@V4peXrSJ1TToMdq)ZT31GhBUI*
z;CK%*^bKImGP&h|RM5_pNLqw61|Otwi_`t{pDL-n7bCeH%rbtz7U<P^{vcGUKZsbg
z;!-$CX9+xtXqVG#c3OPlpgaz1s+L#t8ofm*&#`Ex%Ps3C&5XQs*ZA5a@lIqdWa$qI
zaW|(R1PZw&=<sw5gg`(mV8PX2gT*SPS9q*d?Uxv(Syo>m&ng1QavX>Ii@@fxniN)x
z!N;GEDeD|jfxnrz3XINtq7vLfj+Oe*s<>%Ms-2c?h68Hmr1Gol78h$oIeBHY6@~}k
zU$@3EVmXzZu)rf?O6}T8%;myq-La`hns3t2h-y0|&PuOL{0L6i;lypbTgbdCQ@D4h
zhT5LdEb5%r!)VZgPRlf8o9^P?^XQwKoM+QkQ6dRlT*amRwuKCz!^tw2T3v8tGK|Kk
zuU{}biAAwJ8_q^c^<|sBBtAtCBrlr8{GHR1ECY~6B+R6^6>Hjc82lsjFcifvpkoO%
z%eH1=4)8M3**`mTobr~!wFg&JWVtMrZG8cSA@d882Qb4(qmY&s2@51fxX>ha&hl{W
z{oTRnmBo?$!<+ghKA!d%+@{`xSEHmzJ?eZL*a7LQ{3NxA(9wgJ=;MRJV4}ilz`R9J
zHk8VbN-k>i>%Yth{>>_LlA&APe$F;#X+Y^iS;>L1bC=2FE$7p(6VpV{eqFDiqDcXE
z_77Uc*x}!~!~MPwE&XiQ3Is-7l_~n$P+A~gDMaVz<F%(D(+BtY1?RV+35FpS+d?k<
z2MU;2d^2ML?sBCOx20Y2VDYJDb;qTpLv)t;N^R!JcOOn<a}QTl*ka=(UBQP{g>@Fd
zfe(L8Z157cWomg_i0#>YJM8IZ>A4JKTK#nLSxQ>Ji{x15p?a8fEw)q>h7<XE5zpf!
zK2$A}?w7<CbXcY%H??ivBX68JwSK^-LX6o!rOQy?Uj86%7t5gw`SLBYE9hKUs$2@|
zuQ$;pPd|zL-0wDH*lSChzLGco19x>RRz<?{uPmN_D*sWO405;f_=;5xxn0Sd39TE_
z^u}*S?pStDvbxbzxi`mU2+6EEYQ?=Zk1azkzl)CzzTd_nEMvn}AcAorWkDrk_>UF8
z5_?ao&6hd_TG&X+Ewjom6V!x)W=~y8)v9}^z1&Y!t53tw0WC?E8*;r7iZNLfSYDU~
z8l($Dj)6$=5zbKpdP$pL;+5X<gG}|6c4x8?pK~QZKFhcR{^JHWOR6bVjaT*t->Fou
zY+LdD9#u*&k*IqPQezp~o4Ynx{&N3~RG)zZIp_AG5~g|IMt8k^oglM5cl3Onlpg(y
z=y}%QB>@+#R--nXYfh+l0hANz#%w*uJjUBR2s3RSD^Tt^A)xHD7wTA+%G?E0L2DHU
z9ROa5DADpSgU4m<6r>x)_+I5kG0QkFg4&TIv10##`?JN~Q+j%M(~mU)?VGs#gN_?3
z?N0&tv7e`jcSw|BwJb$w^8DFww`R!9Fg-RikL*F7wTxi7Y!s4(mBs&R4|RjO-^nX{
ze(0Fg@FVy&>fUg>3)kqa{eTvEk&dKurILC12t$m1*^POTxMgLLma;l-E3aE}a=T9w
zR3i8wx2tfaW1*P0rly$Bd@Al^=l+ORX1SCT5qX59y{4}ozs*~_{-iIC07=nYX{cP~
zhnOIo!KRm*@Wnv`JCIa}-ac7~{A}({*~Fp!IjfI$MCr6cRfc<GzkUzgA>P8wJXvou
z17AQ@GFuQtc=HeJZ4|_VUdV(`wCmSsP4aAJ=SAg4LC=9wpJ|CssW%TXxdJDV51B5c
z!?fv#g5i$Qjs}x_)~oFUVEN%_Wam0C)C^%XE{G~4zgF&9lQ6v)=Hj68FxO=5<=t31
zLF=tlykof<G#s2<*(=%6I)1;rG#l(Ld1M$U<3O@@gj9d3io~53o?Pe`<koadJc?`i
zpfWwuj<jymtF@~6UdcCWUMJNVt(WWm98?gvRv~KkEO^LJSG*o}B`?r?RPG>ZxwJyn
zf}i^Y))&k}ZXek2&Ij4qv!mR-<)kNBjO24M4g^@z?J<mSM=Qc*P?cB+SAT30J@My*
z`5||&&uSpS<CARx7Khbh@9muA>W*_^SxEHSf<}i0m7j>0Q(dp@_>?6kCH_KSHQ_oa
zN3Tuh;)+i3C#U5hE^QIMc8gV1Y`u<6alWMTN7!IfR%A|0@<)WpfPDWD)p~rq$IeI&
zPl_H_8OVBA(Qs4X+bbNc6UL4G6kFP<%FXfkb;n<#@=EbwW4Bo%b@EToaDNGhI(Hgr
zd+oj=&E~W=2LvXsHFf1YVdM)Du*JwO-84LxPM-zzM#<O?QK~IA3632-0mhjV@lLs&
zKx7y2C#Rfb9oOj@@OY*NJi-ddzjF8pyRu`uYZl);T5n^7OhV+eO%&6b4vh3vs|;u?
zkX{-bj-+a}6FFTY>1~zO8xJU_=<yQb;>j02moME!WF&PZkb(Vmm}jSB&hU%UDj%KP
zQ#!UD$4pmG<qiF<N0bz%Wh?wqche(UQ6~!mA~q!i%q@8=4ah#=ErLogawgnTxN>Gm
z6E5eQhp%LzZU?5`&hBudS=Fm%OgFHl+2)`@=+L`G1asDC36^uD3nU~bgq*6MQ2as3
zVt*g<^;QMx8~CP&u3u|Z3};Sn3uJM!FID~g1w4C@Jf?1e^~9)luq7rUGS>XFY&idX
z#X!vaFv)>expL{PgJmc_-%uzAQ)Hnz;}k)&XojEqz;6dhJr50WtYb4!mFX<G^Hxgq
z(K9)}WDuZ<>PAvFxc@AJaUCRWKl$_rCE9p(k;JCo>H<9{xY>Ae*2u$aMsR^yvTxX3
z?IOtFlZRu5d$=BRGwydKZLEjSr(-##2q{`JCi6`q(T`5f$N`WLfnY56f0;yE)lvhV
z1Ed*^=FwVK+==Fybj%v_e*_^RTc%a@r)q1$<7gxd>=C~k=y(++2#~K~)Y*Jtes<Ye
z#7w8cfMNGX@FCM(Y<_QajqQr_a90ml2*$bJ5+ur8?&cZhws(i1f$cpDnNp=N#7ydX
z16E~g%dh_)7-KjKpxq)(?1v`{FO(HA5bV9_9cir=ZJV~(b2@-zWo$1qcr$f5LKjJQ
zM)c@BfLkJYCdoIVDHTqnck(;re&xRm+EN`1&ogus9Q-efA=TU~0KLen#Z{^%Se{@L
zSod-dDpY2l<NiKJ1^6>^;HhA*Z_J62u?;l^#b(l%3=3AO2=GqoeP1~X>rC)W61IJt
zoGuf#rCn5vY>lLBB90Oji#u{Btua1If8Ov(&z+l~yMal}4TTMD&A!?US#W;u&Vzub
za-8HBL6GEB<@W{+nmD~msfu+8z67fXqQW!)jUp5lBr#|ED7MvQ86-kK*3k~@xS_SP
zos7(vklACkt6bBauhWxxt2ZuG2sb5FXtG?_Afs6(5~uU9>_^7dZlSt02&Cn;;P=gv
zxH$ZJ(5r_8$7bn=D6Ji(`(Gq3Rh0qR#um2rJR0Ijhdv53U+=)ac<<7wdoSP(BlKus
zs<4kAF3|G1UxGUvo${)<o%82Nm|bNvO`Hyy@Ec1tj!EQolT3!ORV)mcCsL-b{IG{c
z1WnN7T0P6~-76nfw=sc{1WrlAb74)F<MV<={Urf#`JU<;Z9(qMYc^BEiJI;GQ&?e1
zR*bsy0V-3I+bWl+nwob4tSC6@(t_w4CiAl!b>F+XvF{%nCh{dd99b*H8#rcV;3W_1
z1aI>W7i>|oyEkHrMClM+D-pN42U<*WCkFn6qTMc%o_*y@FY7K?5#&=r+>B|6Tp3sA
zEb5Qs1ZTk^fvs@?SF|r4AlO<6&XApTBIYc&UB}&x#*eFiP~0VlC79%;29%|d9}Y6e
zG}27VeNJsUwPL?l3;er7WZ?Ommu%BoV7LkiljW?OI1$e)$Mks^7Z9a({}0NSHT(>N
zPtID$WE}<s7E+sj_$o_h&ZQ%6DvJzK`kfI;+Mp6koU&0mjk02HRSwK}5+o<j_O%Jr
z?@ZED3th>en;TxcLcAiiAaqAytH#WiTckqmwnu!^gqO-IEl_;DX7Z5dnMO5a^LW_*
zB--z6aPX~r#2vtf*PTyzk2zo<P6pa~P_3d=V~CgNc69S)d2#-GYOn6>!_8GoACM~0
z-#<$<`~2-|=ijv(&KLe(4Xos%e)i&v52vZ_$`iA$SZeAG3Q(g>=xhHD!M98e<48Qd
z7b$2v6(k_gIkI(g{)E$cxo2#NUWs?t;4*yL2mN_38yh~C!aPs2sDh3}|F_bJ_e4FD
z!0+TOFZ293v-WOmS8zUX$Xnki_OrQS0mQGfPs)Gr?Fgwzy0MEmjHSJ+2Yy?%Xudbh
zC~G+E7kGPF&LgU4Gs`z1B|)FDsXS{~;z56ON!UO17m6E`qHd!Arpo><MK*8-*e^PU
zN4m686#YXiInsYJmx8GGON-1Vzi#OjgL`sUR~d}sAeq?sk-THD3rc>o9WHln-I)#-
z;>@tvxQ=5L*H(U`^+U2MlHT7q*|E57t7%3a)LOA>wrV-d=6=r?!R8V9%KaVRUs=Qd
zgYp+fsnUn1{D+MGDd}uvcmf{C_bTraLD!HMjMowGbYQy7hJ~NQhc8Kl?Jv~`=Xiyi
zkYyx^HO!ShVh9iz`pX#eccyM=Fq?0U%AgObEe3DS{99gNenvva2d|*`T7D2mo%LE1
z+Z{TO!8gIoJU6l^hNak<d9LOz%H)ykVOK}oLa~oXtanY}D(P(DzW;6C45urHmr&Hf
z>|(bGt7(yn<4YKy(P_Y*AT*6(@xgp%2VZaI>Zm0{4<A82e!o$p%6|M`IqytGg){BB
z24i&Fn>KL#H?~Y6NlJPX?|9tj)-_Tcnpp0pCS`MS)nNdtdD2EVqj)LoMzx;YGV^=!
zG*p;Uax*e>_n*ptw5Rfv-&-^}W3NVXKi@eFm{g^7PQiCfMu~2f>AbzA<*(y#Kq7@9
z>aJ8QyuMU%o#nCR+F)n1LB@R+_IT;~^x3t@3BdD2qQ4dz_Rz$1=iEmPJr>eZ5D~}F
ze@WD?V96>-R3$*7#BX#Kjjo_I0%8Wz8E%JIquu3izUT}6r)K}XGH{ogcwoIx2CmNT
za>2hda|jqC1Ew|2=v8@ut<2S5wC^%f|7xgSWch>ghP32i6BjxlX8R`Rx5&oGACwLD
z6ka|)N2f}arD-4M<d&mLh`u8C45<bon67>Xm;3!PcOJ(|<@<%zW}}`NwqSA%_k51K
z{tsw1e{aG+A3u@o60}zA&Ndms?#$d~q`K5ej7e>~&Kb_+z9RYh9dVk~NJwd5-l}1`
zdU)5x=C+sQf<RmR=WG1CwI{@2BZ~^BJgvs-Mar#{E90VBPo-fu^A6%)I%=hkc6z$@
zQzw_tm@|*Jv;y8j@SW97hY1K_9tEm@rAquOF<8qO3vwlWTtHx04)KQ@F9}V+ubr~H
z1~NOfiKm^ESEQWBGM<@mH`}Gtx;2?iRZ7(<v0$ZAU_DJy{J9{iZ2A`=<Xw0Ne*+hk
z=P3WH{2}V?9+}LHAVC?)@Zg+*9xgDbn6<UJxuR-zrs)%5Q+?%qZ_*nt<t2*F2&4qE
z;i*d~^2QU?f-EYm1y>ioALgr&_)VP~xfGQXGQMo{D!FI|ZPQA8abN7ua9{ZjJ;ajF
zkj|#VkgN~i70o$+Li-VU`rpfc!UlO8qPVy?XfAK~)7AhZ%_ARvHmc=Kif2BKF2Ez%
z@U?wlFs$|aps*XIUM6S#!QDImTqXw1ALge0gB5Q>|Nf)@bo`Xm4{OQZ))*Bb@&i7#
zk-=OoU=d_+km=2-WNx)lJ#jnmV*}`awp)|)UO@sNr&o+y&`&v+qK2DxAut*r;qx0h
z&2pPGP)@$O-Gb)S2kHLEGwdVTcjuv?bV72X+ZmbX6EZ;FmTkqwDq<Xxns=h123J+x
zzOo>KJ)QpcPfhuI#Vj~^k<~Lq=(u|3=FE3MKU@L9T68M67*hflseYOy<mh@c0_IAV
z;zeM^j6`!R)fnPJVH%=hK9uEI>!V$~S4!hIDyx7qQ{;J_0WI!NjtJVBqu5x^7HZY`
zMD*XDmfOZzY!lwzGixWDrH>TUce$Eqm??&?%>2C-|NilFk@Ax~?3|@`K%1_<Qw@at
zU`lXQT4ly4m*Q|J_p$~60Dj5(!o(4yY;OJO`$mLEMjj}-F7lGj@OGE-LnsQ08xRR;
zf860JI7W%-;F`Z39KvMUH}0Bt)fs^|FUMD7g5L%3a(1KpnI<To*?g7F^ydZJLcj5-
z+2s~Ts`mNu>2g_=Sy)C7r@dGB@E6T!dqkm<ntPlFJUo09$;KwMmZEnUXnPHI?6~n%
z$cs5#qK6c#SYI4C+3Np6xpwoBqVZP}a|ll_XnFxgwg)A;uERzu{Ag)Bu%&59d;SV$
zTjB9MfyY83C>d6)JkNQDm+(17j!^Enbwys0?1g)AALPb8YcUgZ-!Y>1UpnqTLLZ3w
zI1zW-Z^!M7t=9XmSSJy+OYyN9HM{4exGs!(#d_*Ex6uz{NNmz_cmQpP20jKG3W^1I
zUxhu+9w!d(uo5x11bP+>$!MLxmx|a$p&K;7v(;hfu<C}BH+<3?tlZu_Q+f4PFtVvV
z!H;|wqlAP>=OyQ`shITRy93ai-zjhM0VoLLqzW9+R&zQ`AU9tU&I%&qKX^N9<weSD
zl$je7fXTA|TBcq(w!ywV#(lwSsmVx+xQMWOm_2pX>%^qkJ`Z(T5h8I*Ht6`tt~K2&
z$eL|d)W9pM(7*kI+E!p^V%-8^poGKVwVHXWzz9U04F`r{@s5%H&N83+56T_z02va_
zBZm|y|0FREEu)~#_u)_L7o9G&d(2B%#l}Y@Ao&C8U%9}xuDq=!;GFs&v%@j;jRf}f
zOGD(!F9wSwl?(%gaTj<+Rzs=9=pNOKx!akGdDz+aeG{|H>aRq;X1oaimsM3s%noxR
z84?pT-+{(Md4a=i+vb)dg~ZJ94CZ&8C0B%n^>ygp0XU~#Cw+aL%*}opULVif?!=UT
zNQ)cbmmUzOBaC#^FIo*WXeqbz9fy2WNb>3Yx|A|VsLU&m*c6aa6GVLja|O;sr-dEW
zfj2+ug;pX@^D1|JXd0HpKO2F*5f>Y`Oxm$e+fZT!G(z<)xP;K7G$hA{B&Z{mGB{Fk
z%h4cDK(o<juTOaN3xG9ZwHIx*U+HMp$(g2=r#0=1Tg$=quEiGB5LHbCtbu5l)BN26
zo7=vj%_T~U3(=#FpT`IS)ymP>*rAEA*B%gI#WXW#r&-M8-mR4tNppaTRUx_HgibC4
znx<n@$rvisRGc62Ws|pf-o5Aa2SsLE=}D`gf;C^A&|I5@$pWv|Sf^UJoB>rvTa|x<
z`+Q_V2=%+k^UwP49m`Udi^F%x^ABw%UP4TkX#SeTFYh7~qLAQ(M|PBXKw?|Fjt>Ek
zeGPbOSpOZIB$iy=xh!FdvI(z{E(Ko>1fJQU(vwr<vHqD_$M(UHZreOZ%&yL5X)m<r
zQ7$m^tTy(6MDv$uu2loY#F3CWn$q`T@BxQ+<>^ut58CWP7&5Z<&?{=FZ-c~GtTzs?
z2d34K;+pU3LHJot?7uZ<F|(5wKKcX|xjG(^Jx{qww@`8zQ?^IfP?a3K!bc;eHAx*t
z)%79p-7Xt?{4ELp!&fHy;b>qm=ZRDZyluC_tR;M}vsxv!UCYRRQN(!x@4O<XxJS%|
zrlWbk1_2CH9?>w({<xRJg9c<wGwo-P#IU=s6t?s-{(~}ozf`p@7*y7~a`&{Q+Afh*
zA5@8Fs61$&EYC*IrLO4h*a+d+@rJq<l{iA=;&1OfLP_XC=LTCtXuHXR#oFR*PT4La
zeq9CVs6S*}Da7I<=i*HEK{vHb2E3Si>Zd~FFAln`+;@I}?-pr)FRzPREUW+AdHZ>@
z@f7PP5)>2gFwT4C$o2=trY`5;j(o&*)5>9g%orlSVl4iWQ-LrwejcCCCRwPR#E~YG
z#OCFI5-_`!9f1aW0@hkPy*N>;t;xK_ORR_Q>DuAP+(zs>@!*k7YwkFx*Q}SmTA1YB
zli!9?aEiVv<A^8iLW1o|UX=R|ogR-E>zVQ*k}~HJQExp<`lYeAGH1%aYvz%yETo_P
zlA;{Ov~Mc8=~AnP_AUraaa#9;`0(>pV6yLY4_d=d!~1w)Ix_(W=e+**>+%p0WveLi
zgU;;U0`7<<d(jc$K+TNGjG&Hb>qb`m_=ofDjYT3$9R(sr300@!LGn)_c)=AWRo9>v
z$AiZ=-43k`Mn@6V{!&ZcV-8p>k6rmEZpzWgyrt@*TYEE|%8Rif%Jyl81A<)fMAJ4s
z?Md-R=hc2}n+U&o?iC~7nGB}{j=-3xDig9#L$U(Ok=}X5UD@$zpS|1l2z}HiFtB@I
z04v$O7gHmCOqP18x56<uI1}U?+|P_t_vqT6>~#9oG3U;<2RRrfHR+C9G|7z1OYd?t
zq|FXQCNaxz0zox~o3t>rc-#fP!#5m{3-n)*go|AyLFNHnJ><mq<y2E#Aa8H!4Et<f
zt9GX^_?y8`P4CaSZR*6M*i-f_NBRIdE*n(y^Bh4xn7f4k=3^+H#n#%G3;A_!!HgEH
zBxVJ3&ABzebA`u9N4|DRKc$-X5bbr-4ko)Nn5BOJzaBE8ONqx6IousU#QyaaWnl<2
zGO@9fprdI})ZrSVT>7RHGSD{dnNcN}UqFVE(QdP}zQ@x{PebA)fXYHS%*CH*SOxHc
zrSfyc=A6Y7P&P8Wd3`K4{A{o#F+Zclz3lPJFP|mR3`{iuz`)qSem1r?DocOa3q-`5
zI@SKBZLcq^U!FRn{<NPF7p4$SGChM@ak6mp>UT(5=i@HG4?+;qIIno|kYvU0*3f)m
zU28z=Tub?kh&EnM+7XSUafoas0fMc7j^sPj!4wS%E+cnQW<kd{{?`?Aa|%Ik26YcS
ztGMGNAH=<Qb7K?tKps4mI<8X@j=un&rS8i~ITX(|2nR4skcmlCPp3O9{d~6BS?Wl<
z2S**f`(6sFn-{-!SFqREU{X#zIvnvkr%K+6Zq`>1Tw{PQuR8+wh`3C<+kqrA&yUv(
zQj1}^d>-fI-;~Yl$zx{j2;7ZQYB|NFgY*;YDin>XVtNLle&h@HJ?mfLeU_T8pYaT6
z8Pn;w(oA`B|Jy(R;?C8^-L8P@epYS2^XbU%L0uzj)`{dVhy47Dh+?)%Z8_qqk;EeN
zaz`bD7Vakfc^NTI3o>ET1;!w^P%^P(=f}xa?wgfe<#TTDZz=Hlt2h0$8~*T@bi)?n
z+RMe?sSK^z%=>knOM*$chVcYQiNr0pVK~D<0}N`Im&3v`e^9Oj=Ki4kvQPVi68DWa
zXy%Ry3T?J-TE;z;H7{<yc;K_ra$}i^w{kP=)S-5I=bL7kdn5~!z)Fn?w2UpvR#H*_
zIdFb4q=T7u%xRdkp>i&EU7oOvP4OLd-(<d^6n9?1r{{s`?%7dDZIJbv=55IL2f4%>
zljt)Fg2k5hg+ulZ=5UQi17nse-f4JlaGv4*y#b`_TiJnrja4%(YrHl~78uvb7?aaQ
z4nmp-44K)+z8+cU;mi##>rvX<OI|pC@%*Y_>+|N)po;twR3k*vQwUKYA6+pvR$a@}
zY~Qcbr%>%IIQteLXJ-^?<hkBsUWzp`e`B5=YeU<Uu($uL_2ldZ#}zfT>x<P7!!x4p
zO^9ZK=%klr*~lEv?TV8xC{~Fe(<MeWU^}nZoyhrkH3eh2vfw!{XBj$zG3+YT;RQsD
zwHDvs#_q6F%&o>N*lDrY)Udf4%psVXdCSZz&l)RVE08@{zJ#85Dz@)te~;&$>!&6r
zVOe@{{!JGdZbX6Xx$VY_zH8ltWvo9aeMl=;`C16*d|p0j;6dvSiTh6a`arzqa2wtC
zz?*VY8-t{Rp)Du1tR3^!iF9XnVYT?ZUk~aWNmp98oq&u2yg8G-b#)vHb()60S@}5e
zz!&X%8qPTV^Ov{2E347bQKxGvp^BkzIL#z^<Mv;#oM`erxgf|Uw+OA@fe)(b`o8?k
zXfEW#%4OAFzq5olYQB_P@40tc|Lzjpd2<HgxH0mOY34B4*7}f-nY{GwUp|cg&FTw-
z{S~JfjxM9N7JGf{E{l}I`?&FDC+)_&9OHvVL@TowboM?O*QB<UoApaQTW;Xyaal`W
z$wTDzNI=jB{A6ULE6{(FfBe+u(&i6}EE2nZUr^!e0rV<ov9<P?yai}9)iqRbH7c`P
z0;SMIx_W#w`fkb@sTw!Q{eS%WJ{*^1uE)r6hZg$Rr2IMC5&1-w>jSF#TD2zhU{s_T
ziCNCIC_XsFI)Rzf^CgrcY{6W!mhzTav0)}RbFb2FwqM`9zWb>3*WKf}sKZyp=YAyq
zTv8u4;TTk36}yNe{@NIqq61%8t8ecb=dxDj4ZpZ}+q)90khx&2u@_@z-8B<zy1Hil
zUwPzz@3jA90{!1GyB+^CU1pTnO5nxUHgzzC)UQs8@~aCvx^M6W0_7F|sjOzEdPo_^
zRNbLt{l&oy%b|qOGya&lF?Ii3T6cg=T#3fdI0lX9rhib<kK@?yo35WdjNKr|6pW85
zzp;wFPh3dPJMNWgi=lkEf={QPVaA@5WqJQfFla7cH~nxillgwb+Qmz6kMLKZY?smC
zC{-cW^(g7%n%fvKI{K^c&y#f!n-(Gg?8_d{Q`1{9gsop0@YQ`MsoW5ZU6~@8mR75G
z&}6&Us`q?Kv!7`!OHL7YVSsB77@G5wa$x9WZu8jPX!p{3b={bMv7H>g-rg?#ZGp7E
z<d`pjx-4FQfZIdNp>c|qzKoPN!93g(w%!{>^(sXe8Jq7GdO@u5rL{X3p-xcaxutA6
z!DnZG2LhH3%G<JPfDr0hI#V1Pm|%5$VpiohM%zJK9XJH?4Qdv=YNOnE9GspS9O5dW
zpE2$=eLI5pMt4{Fd?^ohYw*2KHoHW2v)J<5+zT^Zw?`wJ7vM&{2Nd)I<G}0zq;Gdu
z;0fh<G^d9PXTS0%4<Aw6$H0ijA->FpzCCmmiCHXdB5kLJu@c}`5XRFdB~9VlIU{ma
zKVumr&k$^`CiBl|j4E+EGt}x29uic+YrlL5eP$25=nzkE>?;;cX(qVr7=*~2y^62x
zhWuU*aymdfV=F+4TjgwS1s~rPKWcQbbe~TD#Ju9$HMtjf!g0~n0AF-Yc~W(O(OJA-
z?WXCxUTR`;;15J6PBE}YoK^BUePzms!gBq9SRNKN>Wy$-;mq?&EOSc(E)4kv&5i4$
zl`_?z8^=xFhJ>h_JEA+Kcp~Pi83WY8B=#0$%oqcs6vmz6u<u-TTwn46XXz`8bQmGN
zZBnAI8X`YPW8NrVH~c{vRS6YaUds#lz4iRcb`t!uI=kL+B@JI`(sm3Wso^+VtUa88
zj{xvVmPoz)MWy&Ig6@NQ7BZVF-t@We%DvlA9uG6B(3EiX*aIq`Zju|0e$L9RkS@Yc
z|6F5`-}i_UL;Oakzz2tXZ}kJ{ON*dgp2{kW(`E4)$yA{I^$r}ezw>QQ_(eZ1+P0us
zLR7JL$tffbv;>+yiOcxmxa~gtLeh*2m4kdMQVo?=Th1~mo-k^_ZU3o5=ob=&!Ir1T
zB+ExrD}f?IB;nSJqB&P={fmavrI#;``p;#3Zx8wAoZusxona<>c-K+!`uBa2qDBDC
z6#8cFAwyKRvTKtVfvM%PZ@VzT?F5`aeKLff0|c<!DObfh@=dm8>_-G~ro56ghUM?$
z<-ry;>jeV_kS7jD955OBP2TmLxIuqGyx)zUz``2oG9?cm&-+t-b8~~_n_FpZeQl|c
zBpRBnEpmt1yzb;+gaxQ!1h~&sc#70%!rY$;+aBrdh?+Z?TZ{#67vAh8^n7<{cMu_~
z@VYE>u=RQi_-;-*510T?yc%nbm6qxuA%o#@zA#e6q3s#lCt5~9RrybS-h+2(;=fko
zD#~kbF%|8D5E9a68Ii=wTXS0I1#0ra0;mbayExS=g}an7A9|X?BPKl}TpJ{f#M9?{
z#^NK8@6&%BhLo?fvnyUz3^6baUF*u~UaEbVm}CLSNJ;j~@G1cVEiD7MCvAI0!X;%>
z8A`<IQZ0KF;t2@9y~XPA!%S=Qn)SMp_dpdoXWkF+qsyQN49XukHSE{Gt)v|kAT_OB
z@N#&B3saMa2b9{eMC(VGpD21N!32kh%79Kn8y=|Hzv%jA<Bic`mT443qlQORbz*i@
z3%SXY;>2Ox0I&LRj_>|k`xT?NZJXr<Xjs60n~EuA#65lfb=OlGr^F}g=odbz3xM%h
z0cwv!zo?Tj+RK;o$c@x`=hJGl`9dP>Mfo|`GDQ_p7Xsp0Uc?YM9Pc>}lVy*rA<`6W
z*F~_Q?AgLtCvcVswUAuHj}9=_N0i+ht2>~%*I_d^dQiSBO-)k^&k+{7;y}#kL9|Dh
z@}B=N-=$%5#Rfiqmw95dNgk}N?Cmqi_ex1=OjbBEJ3DhrG&m(AGb6>lc=)w|0BY`q
zjms8AyY7BfL%CpVMIcha(+CV5Cl<=|OUllj&ptB|unqRNaCIX-S{#?LCkv{~T_lsB
zJ0Xnbr8{Rjf6C=dm6GBrJ?Igs_*qBe`LL^pf0PbHbH8P=Iy26{TIVPoP#SDq?pAI<
zSzP61V5L1sx<lTl-!NB3BK#$<7WQ3O4-8jM(P}Ik$)lAuM{b;n|MyX&W#-le8FDV^
z+&GzAYp|E#hOvQ6#p|9q_aA60krK=f>;N`qFP4h3k-vzGk)xOAcl89MkJc2Xiz<59
z2w)bt*rBsec+NJ%?q&qLeGcY0-_iorcrTt-HdNNon!~&-Xy8?b8`gz*$%$GLL@(l_
zG$j{cazLB+RMGe4fqEg^<=-fdTwvYT?oU1Dz!P|cuLMigG05w+t0V?=z*?YlBzEV`
z`EzC(YO}9D*Nau1$Z#-mNEdjKW114TsuX8(B%?yS>4$;yts~ui%;xTxV7uMNFLx9N
zAy&a{cAZ|3$MHMB9);UO_0@xW&n0rLhQg)p<~aRYjPI27k-hzAIpQt5@k!98bg>w!
z1IYJyJ+{BBo`cA&TF+rQ?Q5aU!SwH1)zm0pcuv9`qQu;q%F=*ty)kp2t^tisBe|c$
z4fuTb2{!K_%I~P#_F>1d-G0&(=%sL<?@sF7Ge?hL;CzdpcHN<Be2f#VL#qZIeF0xw
zf<3@{ORFa~>~#jGX$c!g=q(TCOV>G3!{qrzoOnM8Sx36b;|&Aj3rxX%bO(1AJf<A=
zw0QAJ#q!><QVOJ?QfrdwTFHEqP?RekCFAG&{^<Uz1QO5t?V7pG?kJW^fbjRcs8OrB
zQ{}MSg1YfB5o394cXw+A6KG0ib|&)qjAk=rC>pIQO!OcefVmSDOP%_#xFdO+Vtq2)
zNT!d_!^ycyXNdUuYTHbKZ&2e!NQ>y+-BYN^T{z+oN-M>E(%8LA`pqr{+R{n-)yt44
zwc~L&uU`m%dla9vXK?IqTqebNKgLp&BQN)w&9<H|RRiNgKECEiON?E7@*sy?m<4T*
zyN6{@9dh~le^OpiN+1e4JHEIay?WrfwfW^DY)^J^TYFUidzicho7H`LsJvufOc0GZ
zr}%nrl7BneoqOH2mL;=v^g;PI3JR&s96yZXjtgirE%qQ<Q1`;{T5sp!Y-p8Jz#*^A
zEqdxIza@Hs)ul@GAfY3$9q@8@{&HUYVH%5?m5lYcN~M7hO8>@;-KTfFFeddUD=G3r
z*>4XcVD~5yJn2A2U|?ZWHQ9mzSvvPlI>NWtcf0d37<+IqRHC}Awx|}$@FC#4-*AKB
z8Kq4JZy@08N1@5zkiGCeG6W*pzLjR-d-cF|V^Sx8pD-&1?)JM60SE0h+3>W?7nnX&
zBn<`e{puFFP^o4;>7wts5f2Dv4D$*7mtOqeaMC>u?=T<bF&JgvLVK>rd{*DS^raMN
zLo@JhRIkQR?f)Y`G5GzzwEW+!Qjd8j{rKT;V{!Oq)XCPkCJ*ijdYtqj5+UBS2ZPKl
zx0Y2?8|O6AwADpHa#}0f;gR~te877}yno~;N+5sb@A--6CrS&KA(Vkn#zo@XOTQhA
zvi*wh5=$?GThh?JUsq8>#=rWn@M3Pu>m0o~$&=$lz0SU10u#1AEF&G~$ZJ)(FbPLD
zx3jBx^i!y{R`TY|uJny`Vp{R^njJFJG6-5fT(Nc%_RoJ;WZ3^!;&W)dyd_7Q{t%@l
z96W*l4-~oZz~oI%x4PaXjfRt5Vtew`>}_fDlHRXb-i31sX2*y;yVv#$mg7u|u?9sj
z_l5O^lo#Z8!;y-PmaTu85&x<DFDdEJPvS5ZFj*;*xo<GKjOC7%-$xr!6A448ZAFcY
z4kU^cX(Q#^V&}Dcb2xq&rk7l%Lwg!byWjs))Bo)X-e3(s>_YK0da85kaa!H0md}L!
z*8|ZLRa?9G<&7%Au1Q;9R#rCCWXz`x;!hnLmQKHoKwZib0n>CMT*>v{kY>tTf19}*
zvAv5&6_wkdzvPrD+t8pVwKTr<B5YY4PzRAb{Q}{B%|h;Vy>k0J`Qb}LBRs&HVofej
zWd-&Rli0XZVjnDS!D+jcl%fA6r5jvWRzphZu0TI4JWlTnkWxCpp%Ub+RhT+Y28Flr
z<6N_RZ>dp}7HLcKVBN#@#hl(Kcjg0_#}J848$n(JMTg8H{#Z3w`?4UgwP_|3eg&hN
z3q+y-C8sF@8e244UPPOB)rl}}zJ3<FSpkihP+;s3D#&K+9LCL$8q)oVyu#nh|B|DY
z=jJ4MN;2Oxgwkd|#ZVHuOo~03a$3XrxkS%HD#lKnE~%SNNu4*@CJnF@Pw|pX!~+3)
zL-IcqR-=HO4B$fk3)^d8-$k>n*XLzLTgE}l>2QWmUn67CY6e;i3ah|Fx^HR>#S*pB
z1qr2~F<~W+HvF)lkN>Kw{{J6VBa@%Jw7gvgI@g0`XH`mQISEA@IJ9A3y%_~7&>Q6L
z@|}-aQ4mg+(ClqiVbY6v0XFGkop2w{STe<Y|DhFpqJ6<uw=_*|>4S<Z;`h{vNZ~JC
zwICgOnClmMD;3MdmZ1C?w!N{qH`}9M!sj+p8Hp{kDe(qW;%~S9FPKT6mDDeQJ+Eyb
zLQY}W)yDBEn{vwY+u7n0GQ~$d!ORPCeCT@IDl0&X<-*Y<!_fYVn)F=-;Pjgnu@E~2
zqgXYUtoYDsCwZ2`nb^nWf}eG<ae1CM8>_v$qBGOHK^zCap8Y?xy>(QZ-MTMIy@f(s
z+$j{NI0P$D++BiGTtWy0_qI?dUfiL$I|&ZOH8=zaQrz9OC+pj5jkEUJ-yP?UaqgXe
z<dtO1k@rdF9CJR;Pev9l0O^whRiJ9u78p`FCz|UBQSMgMEakTOqnmolJp8M=u#DX6
z)gdnPLpwA6Aq-a@a7L-C7hkmCxXCD-+}m>t?t33@_ZePox~87E#pk~LP+aWkK!G?J
z9rd7w6){=?XZq3H=YNiq_Pa-;dO~r$QrYgOMk57VcWTOFbR|UkIZ5xb$%@c;{dV4H
zDdyqz{M<CM=bRCyp$P}6#D#oPQZLob88-}kHSdYfrAH&D7@eQA$ES8STFd%GRi5z<
zr4*+%dTy}l)%~dMtr{polcLU(fG1(gGrY2YFy6Em{3s&+QYm4@y1*y4!aM3bI>sbm
zyH%BA*ciX_qv&S?R*o5M6$`B$aS$6+gNT2Kd-KuFzmM#Ho&QuJ)}S{Y$XF#a8i2+|
zd6j>&l;2slqSqOp>!SaEK==GzM-Pr=WjTUP_m$O7_@}3^Cta__#roe*SJ{5y${I3k
zbZ{QApT{4FA>IIB8kivk9{5kY0NMLRK?EdYq`1;*JF!Vz;U;ywJ3adgY<X{_vbDjx
z=#9dfh0@nJE+Pib>a8%(&%MC3j8YeW`e=Q2lTp~J(^=R|=nVq?FsBdAW5^T4rcmjX
z`MG3@e>o&8VLsjMcq3|!(-2x;b3f{EmRCJ{jWlftmy9siSA%GIIekCh5KyN%P%H13
zWt_w-up@e=@q|q&)eJfDRQ{jAdGPz(cN9sgVLW>dpKqFkmBF$l*@YdN<}IgzAcEe&
zD{6c%7IykZSFi?G(Howl7_Uj4RmDWkA@x<>ThNu^rJh@r2C{Q8<q0D!XFOEFx?%K&
zt_<E^)S#4U$hg(X?Dn)WQ84{PK#zn+HVW!Q_T;V}8?B<!-+W%G-=zND!S&jr8<T!i
z1FmgBq6~aLS-<{VUR>oDLb_yKP><AzcTAZ~Kuqe5Aps_oj1DccG4iip_1}+?ybHPG
zfb^v4+aN3x-xbg8a4UIKt!;nYRo>C)grPqJ!y8}>#9TM(t+|1)&+B^SnI}UH{A^s%
z1d?tjF7f}wRiT~O-TdTF{lX&j=MBFw2y>3seh-gcvFOt+_@OuCI8Xo{uH1>+ZOxtn
z;ZO79F{lPbQey^y4A`RND=VA|EN}=T(9S7nBW8lG3%2I*i5G^S|6mxtE4U7laPIKn
zzdrGziHWSSsNK(l5hT~g2eA;wbN|8M$Zk*5fcG4j(24GTPWg>~!gqrZ^{piPoa&=$
zGORPVJy8YV^+V4x+uJe6QZx$+MJ*eYtUQ!WgPdz}G!4_m<5p@*qZeP2z<WHz^<S19
z{~o6Q&)2(;v=0VW)PV;J)Fla5q2N4B6>0lBQP)DG$$YJRvs)TW7jIHIYi~DRN{RyZ
zy~LQ-JSh#2zDPw64=JFy{po3b6I0KE5I<4mt&2zPK5Nfd^;BcIq|%J@F!Lt!R9)v;
z;LH#zd3B7k|6F|wR3hl{@tYSKG?>gu!3j{J9WBJ?L!f!)<-_Ul6*SyR=auMex#Ov@
zZ{M^KP5#fb9!_u_sff}O*1XZG9R*M}lP+AZPrWq0I&VRVpb=R9LtWm4KvR&pMexP9
zwM(pZBgkRl^4G34z9Kj;yqJ3Yx>$R6{wFXzbpUD8uy$AFh~rl}%1p2sGdzR>(7N`8
zLbrfusJx(WG~-gXvq4#%H*gr?)KkL~QgXJz?J~KRg%j~5*UF5Sf+VE|PlI+~dg4Vj
zE0p<dCFg-+nW}Y}W9p|>6_L7MM){%a9Ji5ct;X}LsjJIdI<+98N=3M3)-0#m>&Ay!
zXr$eQqfy%VM`c}>lQ^#zSrjH{JartW@|PHBj72kEuS>f6?eNjT_kuL-oxIt-MyVht
zCL3PN&^)F?*J8y0FsuAFz@~Np1wLg?l{B}V%9CUx@26iIjTIfps%-~O6J(+RH0+DG
z35ISk_+ylB;^fYbO3lbO<jo^}(3va!QZ$^-)}XSX2=_}ZBx;D$P`=1+KqTGk0dD8;
zbe@%m+UWB~m(XWxtt*iQ=c_Hna1A#<09?7+rB?d@rAJfBq81<WQ_G?Z{*+thr_LU3
zT*`wT2NfdgK>bGR%}tAk4IOd%SrzV^<%0vA6W<WIhVxi8Z-36oFAIPpSqgYVCZ=8<
z+a+)hO}9e?(gNH|%qUwr-^TlFW!%@EopnFLYraK#yVuqyv*vT3<9{RJ!+Hzypg!I4
zfPGK?gMrN#wlxH($5qP?sEorp%6+Z><g_iOtvdA+AaYftZ6keWR7*L0og$#a+;+ew
zog2(JU%XkUqdQ$4#ne!m*ZO`wwF`=P-tdhjFhZTWN+PdHSFNG9<fnD!YH~-7=-UI~
zxK<)f)cAEirQ=@m42f5Ki2!%(xUpeXq)Ilx>D)@r9Z6K8kGS^HRcnWkN|%_YddbQh
zKjyc1)dy@Fj<{z&s#V^^QJIjl0f!czmo;Vf_0`tSl|%q?m3ISb6#eyW!0}80i?h0e
zZ)cTA9zPGskz-(7t(V-2!<c>cE54%^%>Ih}iG7n`tdHp(mW7tqY5#@`?Ky+d%zM4-
zs=ba}<*Xr&^=hD{^N^GI23iEZI>zcP>)%-_K6&BZ{od$9#r;Pah1HfC@JVNv-&d0N
zy*}@oU}2r!qnsHL+Pbc8i;w8LU&P!(AY~4IA6x!zM#e{7J+_s<Up?HLKh)L-=Vef-
zk2&dsvm1)xd)mnrwohhvJxk>sBGhylU?8pJy3yFL{O5(Sgni?jn$*xoCpkw4E5_f-
zjQ9b_ILiBl#;M3co^o~A2<NQ61afl-@N_4RPG4IO^>Hk3|M)0eH7%y=Y`FV;jf*Ed
z#C2x{kCBQFE93Y~_}9Hdos7d(R@NS6{^->*G5v>NgOL>R+A>WGzv*Z=>3mrf6FL<!
zO%&ZhZ63QI3y6!B+gji^dF;eE!^KH6@YL>B>(XE0n2Kf(!hgr^Q>8&n7~X6fSECfC
zBK3Kr2*24Ii3qqRq@i0Yn{kP0-YJx=J7*oN(v`~N2nDhmWX3OXpS!JRtb|n3-*fA3
zQo?7MDxv0YGTaQO@JUx2q)t!0@NJahSglz!Rzw?vD&NPSb^B;^qjz8MR(uiH%i)2T
z0T{O&e42FFX{P|cG#)Gf&;Ct3LpiQ^7a&wzDkxtqY7$hVm9@U6$K{^iX?v6s1U_uk
z>|J_PfXmPq8M|P3B$c;AmXRM`VV=!Sc%rt&v!|;QSu?-K-P_Tc<|&QM@E)z`PR}_y
z$Yn7n8e<)$TqMb+=(K{tOs0Bo+(PH-<r8?_jm+b*^KH$$n+Pp!EaP2DHO`4k2?;Qf
zK>iS{MuVY0taX1pwsg9^A#{8{!)xdknesaKB1mk&FCimhKVeHyRJ3t2jVL$UHN1#0
zQhb(-i}Y)=6JUVW-#z)%Wq~Wk9VV+uGN>RVS4f#-Lmxh=i8jsLt0)fe|DAE$@2x1l
zIIlr6E<`!-c&bz91DKIQrqof)xr03fp`b5a?bSeJ#$+f(zBVLpdu1N+#c9{tg-i38
zxNPsL(0%K3W9bvsjk21Sagtts%jUf%%JR}=xvFP<cyVP~`2ay^#LS1&3tBk>8iU_S
zJ)Lsgo-q$97aw}OUdy0s;Dz6=DfFL*wBzcO>+fhBg#gQE7v`i{%?&imU2@|bPq#Gn
znyAMOiw@pV=BE}Q#sex39^|1Vxu}{hjN|;?&mH|vNVlgg1{HNx<(xp}Yn663w<t0=
z74$82OU|{y`1T4faRSC{8P9T{AX+VY15r5_%qpe932tNxq^Gz$fyUhfr4P)2Ic)>>
z)^(a)EF$JkfL`WnwU&jf6<al-cJ`g((&m#^>#(Y-qDIZc9!_}?+3@~89x@B<?Qi+M
zOevTs-^1Er#9lcat<%S*{ZcPIzxPW(5Q}=?^ohD^$t=IYHcMh1E~1ch^25gL{8J05
z4+QAK(bMjKtUvy<QSXU{ZN?7G&&`jukAqz*E{vk4^tgPpc1QkTSV|&c&U16Cl1*Z2
zVe(FRTsRE)?@~+lG=&4$E4en10xts4M7XexagyN+&Q!GP&SRye^qwh^j~-~u<ax^1
z7HwGBs)e<Wj{i($+$Ct>v8`K*@I%S!QJB8e(1W5-#YuAJK(fAuwiF6(GFlPEq=)}-
z%q@y!Q#X_&$@3^RQdV(ha&%m{4H-6Dk?GR6*ciw%*j2Hd5r&a^@s;a?#L;!-j?f0C
zcVjj;254fz&%Z269}9n55qwYOy5P0eoHq;aX>pR0NTjtXCMDYqbRl(ZOPDUPdl@3z
zuHw_Zt^R3~C61D!b|AK8qQXzZoc{HH1nB>dWCE|qZ68F`_A<5vDW50rlY~t6{OKHG
zx5CU9nhP%dni*!`H!H=5AJFWSG2@@udb)wZ^CsZiHalHIGbX}_Bg&JZ3;4p2b%^xV
zYX2t-e*au&>B0dZ3tedqO$)C-4==QE$bb>Z;S7H3OU=b`!GE<g$Rz~nEm;hY#E-i4
z9SYUkEZ%+-NwYwqh2q%+`Ecpm%%#gvP{m#9tw+kbybTWjHk9ng7fnQk--Xs0zxVr4
z@4yi0^NOMk^(wVU*w3*QbX`ac4QtFB^{k34vmN7X{iFhZF&rEB!~Y$LU_Tq^=m-%T
zt1COm8kfj$ydOcDx&ph6*iiy~YHjls@Rk!_@7jqTAFn!Dpt*RT-stiXL|zjU+*pRJ
z9!T45dhg=*OxH#Rx1gq-PR-7n%Z&f69&ib}fYxC6>d|IyvL)9eR-U1sWdPgk>~40N
zqrLSLJL`-ge&^MhvMR_^&9I_kme@tIIqwZ5J$8sFEwPbn_z}xgqo3<msao}3!-dws
z_-T0LrxE50ERpKzlm%#cf!X`(F;jci+h+0Ze)ZnId&BO6c~eqex(JB$8s{V6AB^sS
zqp9_UUn6I)s137n%7$}hSkHU+M29vl_I#t5<LlreMSTWv^Hrx)`m#6stQ9I`d|x5)
zYF8|Hw?~UvzY)0Dh@u6GCZo~X2|k4}e!q|7fHmI?I?3-p3dW;L)n~QWK?lsyg}zbk
zY&~k_EY!d#NAi;B3ae-#x`+tirDY$KDy^K>p_cwi`8K$LU97=GX{;$qcTV`3eYq`I
zd5FQF+m6*KYfo+h<cT!mDI=tV8S{Og3RdhVWu<A9remTbBOy2h$_5@AiQVd6LXa&&
z*AGRlp{*R&*h*zJXGL0pGX*=$yzk6a?bHsR*=STD0o=rS7rFyC1GsjoVniy&>}H<M
zGYv}n<Rk<Szxln`@M5kUHj>?s-lH?Lu^xy>byNCSSTDqfVy=~)sY1!+@Z}eK_umy9
zX9ig?DQ1Qgs~Dq2Fndi~+h^I&{0h9&&V*;iUY%#b5K4q+_i}rN8%K{S&Re=#8)TL#
zq`v#7Q09jV9PRV%^$9zrqFLP;tvu<e7M$gm$eu3jnO%Ju(>=3;Mqa>!QSu>pXh3lC
zWOBNoG<8*yccm+K`UUUlYM5e_)oTU-XH3+G-+;@dGEWA2$9z^N(PAyi&G{F_yd^7e
z4>o<dS>}n&M3niMB|V3P4|Fi5HgJeD%zR*j|L!xL?0-xL|G30_c=YXM5v5`4+ncA_
zioN~z>3D%kEtq!9(G><g%x78=-uGKE25=hux}Lpm4ZiZ@-%sv8|4l#Hi+zMA*FH<!
zO?e@C8rs@H<(qwIiZV|5w#d$uGr>3;=Z#VrnxGS7hOSWko*CXn69G&O+OQ^!q<6?i
zm&awK-^8&S>hleY55^XYI^rKAIE^3-UMN0On0<_MbODHz<vAl^(M@>LGZvbd=tA}Y
z_2+*u%4+wQ;U~C8f-bun^*Z5`Xtc8fEG)t^3rgq{F_ywwV8cs09b1^SRuhAEmKq`e
zPFq>S`KsrllvVFoZ-d>~vE1JtW&Es@0PpB5WLGOW#TvzzInh{!S`gIi9J-hS%WtT_
za<ZJh3)vsxC||pshIh{*7yE@&OOi6M=Sv28*)v^(e&o40?Jj(}b>Kh76GPMt21rGS
zb^-S0jsTdRTNIY{Y4aKoS_6Z3A_nyW^KggJ%##dPJbLV{A90I8;cPN)9+JEJmGZf2
z$heT9sJb;<!m95chDe@9c8Pt`yTDq-+;*Yp1V?O%Qskx+*fbmIyroG7us0~lQ{x;Y
zYq_!x+=#xs-VRS0oz+)Az#Ix5MVUEhf`mN%HCj8WrY8-(%n-c0WGr^(DY7Z=wt|={
z?Dt;M);k>b-5g3}kYJ+?0!bkl$ufP0+AGs(HUgHhqn@qc=1CU>iJ?$`znAvpBke+z
z<!TvGpfYNhC#xG)&jSv0_=CZCw9{T3*7`AgLhKhLu&3i$$R@Zk_o5UM$b&6vb+Ub=
z;-Yv)nrF-XT)_QVIBoG1Q*y4@Dy#U3ZCPKX+=O?@$qNBO<=}5oIw30yk4{#$jl|a3
zj&3vU=6EH-kRLh_$Bp%Qg-JFAMJ`Hy#3|tdx!537B}Zi?wizhw9A|I_|Ej=J`CjKr
zSy>LQ?2k5tio-s)zo+s>!}gULK`cnpV|eQbfw0s2(nWJ!CKvY|ge~j==8_zRKi^1<
zb#YDd?Aw(nwQf5$25g#yT7mxiMReMT^Okk#z2Xf~uT!c0HPkG58ibicou8}2Z(1@L
zN+xfp^!{2GH;z!rid;2$CSL7j{x{tIo;C;Zn~G0+t(4qExuUe9^gd+nj^FI!;wiw8
z5~&u3DtXqm#vEHs*=4wr@rIg(=Fb+#H_;oE$8XxDI9n}!+N`OPsPutnG7%vVX3ivY
zpD&XszD32?GsZb`kI&drJ>%QbE7^K8-n7l)ne|N0zB^#HDYDSwREgo^<RTJV!b>o}
z-04ywupW46vb6u^Iym?Kg7|lR$K^oxT5<wv>bovC*+nF^VO(P&Y5pFk+_5_~nw6?b
zSoAjo`C;Li5>=kg)%P94FP7k{fqw2Luc1TI=DF0<=<7nmQapz`{uVl&vO@Y2eJpiH
zWzw~qfMnXDKJ!GbKr|%je;UQJ9$8Uur(>@R>-RN?b-@-&(J;F(+X!~`-TYG9ceV{W
z2T+fM_{d;?%fq#ln)-I#cajd09O4^K`=DlzbI?#otj3@k7IIOsiP{}4HqYZ>uroYj
zJ#q8;Ho)5{F&MF8bEC&Rc&3Y+M7R7i;oha4_TEF>x7~LHPZIrjPc$_d@3<Dx1lQ^4
zQn---u1rsQX_01HLwdA^I+a(|LVU0g%^Tioc}hCiW(_QbWT3s8X2T*Hk2$ff&4KVg
zX_*hvMz@sR`=m#Khf4iee5woLH<jv9&Jj_A$tlaf%qQA3nVg~-4XzVJ`%?YhVom=|
zfd6^xfu!#XzcydP)^boE!zD`Uf!F81`7p2hZLYW9`9_qH`Ffg6*Q}LLw0c@|?z!;2
z&>5NLPLnu?MfrL$??t^648mi{<pEtD+D&{X1=JO_heGl5x<C3*vUt-dAN73OPhS@f
z)d-cS5Q>qqPn_SKtm(3sk`+L;RMx4w1ZG1VgJYZw-8+n=RaQ5g{m&8^gji^+-DK>{
z;~r><hO>W8pIctosdlk)-_-UD4y>9W<Yy(1Zsb<9vx;tHF)jOMY44cFtKHGU^fJy%
zKiMR@-c!4kirf5>UbI8A?iJ_yO{ZvFLuA6UbnOq`sz<FND{)i6*(Dk?*nBrCAMg}5
zr3JRO_Gm_XPOpV`zu1wFGN&=FFh6-8v9d%QZUD2}`Xzn;eFHMc-p5u_uXRBIz0jEq
z?amv$kNe(o!pF@!qsX8wPH$96NALukhnUL@Cf$-z(LtFz;ahH|7MjQiC_Q<IXeBWw
zrv!O>?4_&`6*9XRl63-}qrNSB0p;}TFU)85Q@Sk$!}ZH%S>mykcs*;ym)V|{X{OTN
zThyrhQc`>q9ih#<ig|S`(`B4M<W&f(fv-%Bps<qZ;kNO%3(d$z(QizG2V0X{J<<5y
z;w{mQ<F#1RiRSStjLX03GfJF$-E;&4*c>QcaYA3M>kl;48aTxVesjv_fj2A7X^A`l
ze_1hYd!%8)E<|S*ql>B?zYy}yVq>5;6g24R*mx$`eack1sY*6(kKI>srP34Mxk$F)
zS}R&7@CW0^d!cu>!&ADZBE^;{&jVHdR9IuNREfVg{Cm|x)>NmHL5|`Q-wV3kZa?Q|
zYt73k?LeO^+;Kjm`DMhUCQ|5~9OR&qntX<x&j;-8V{fAJxmi)KP)MYdf@2xb_gW#<
z^`#7nMI6WOw)etI<3NXI9ntQUAwKV!!XqQi5B?&c>>_A5?XX_-JTToKo>wGqK?KsU
zmVZZ7l$pOu6ISg#Bp9VO4*8z0IJ1Ol?m*Uflux`#iM7GZz~+=D2`e>|Hf``2{Q6yX
zdY^mDC`4z!w(u*jO(=H5$N`DgN?1-b9out%=O4Bq*kiAT&&@j7(<8CwtLq>}3`OVQ
z&|Fu+7!ID4Z>&E06~u$KSHT<5NkQrPDU|S#A-oyXPCu8QbPegZfOv*ec{3=ZKC#Ah
z41W=6&)ALoP<oXK!@U?oSd!_V#pk4@mm1M$L+obq+UqOe60^anI|YwnyPnV2^Yy+6
zB4`X~XS{Et5@|--8u<TaykcPc-hPs81){wxSDA1#TQAI5yV{_vScjpJGRLXIBD0c<
zR8QI_F8G2#H%UVdDk=tMl)Kk~zy5CQGp-!JTf@oRD<^KGN~tH)w*sNdq<s@O$in3f
zu`Cn$H0kxBCHA*A^D#F0Q$8dgzS?AbwW>2zOqt{&LuGvQrwW4v+(oSbHTSXSk;&K0
z*+~6%F+FPL42Q9D77#0$fqQ7)p+)tm6)S-oE*W4bkj{L}>g^U)bYw7126n$PE~C24
zcdG8{hEw+VQr(8@j11i|51DjV-UI7#dv-ueR73ijki0sOl(>|Ir#a5HQM&Oa&ARXp
zM#wcU`P)>JF&(t-0{-KP+gNWQ51d7xwjA#;hf-nI-SZ%&skn>TExgg3J;8;Q{C#b#
zT)n}es465K&}3wNK_NTgjg<IDw6%%U;Z`()JhAh+T1_EU62y0B2ODSUNhJ)>Bn+15
z2eQC%Kx-r{m7_Axne;!bvIb^WWw2Pu_9%5c<H9c9&fkBRN*i^k5$JpC+@yMiIzr8`
zrtM*SXl)n?Oh4<4R1uqIzeQH(02&BLjXENxRL?p<`fR<e9m?AKd^C-6lE&hk?eeS%
z&zz05L!Y^=A(L8JBD!V`tuhOpr-j@e?t=D0UKy&v7Je-kN#ftfd~eL>D;-TuoYuaE
zpR&fynbU%jhLc@`3H=hD=creTkbU-VifQ|Uktl;}n}~Qa;4DDFTS%(2wkta|yZZ-&
zEM8iPes?HJ4{9-&o3~?E-Ppe&8z=y+nxV1z9gH{r<Sq8ymDYjy4i4+YIK>LIM8}gp
zG7mNE>@!w2Mi=e^qNP<R^m_kQ_E(Ptu&T-1mYAjM-7G9pcUHb&hG&G!;+HQlm1Oy#
z%6koLw6-JSHtn2d13SCl{W_1y;+qVc3^r|r6fg8ovqTqG*jC~}-hwreywy!KGoY=w
zRK6h~I5qYPt5fD^`8Kf8`G;ppFBKXG_TlXHRdjS^f^yJI%AL%jtV6=MrP-OYt65Pk
z%Ogn8k-kuRg89}f4YK9m>zs;YneHn`T|<=YSN&wE_1&ZrTZupP`n+3bhPu{nY!)bg
zhPq9mQd7Mc&qM0oN%!%Zk2WyYeCop=l})b`n80Saaq6f|QlSu0haY&mhnFkwGP`cp
zEgm>>IiuyC+CCM+Q73Y`aQcjyhFTNGQ8Ef3;f4YeVYPO2`1iJA24;Q`%_4~+Ei#h*
zQH!LPQ|(T=ocH0hNZEbhek+UANW6fa;_hABp4j%-gu4Q-zJWK`tmL%R)MRGWN#80X
zG11qh|1Oypx+n9c87*J(KimH;ri~1V%1~K59nQb=MI+j@+@&{KzN{Ie<r98-ro-<u
zTh4X?gP;mfmFxB7*(oS#ex|BaRniEhQmN14MB=EOW?HE@D>j77oG5!m6Ch}grTq`h
z^lSfA9zLasvpe9=L`(5nv3bu%r?Kd+D&@>WKucwM=YF%>NhOpsRPMHE_`pY4q9Anm
z=9%K<-DldL=qG$mZM+v*c11hAfR^A=ya(>$NM*aIY#%K6QEe}Jf=LkIbB1YA+jqxQ
zO4sZ|cN#ki-jLu!U)A)*kWXp9{}m$Y3pF<g&k>?El~I3t=bc{dS^C`f(OG&-`g|+6
zNWGnIJwLG}r>=4>5#4E^M@)5vluRU7sAk002ja`K2OJ;&*$;5o=SpgMn1sV~!LQOa
zAg{rymhQcy;2m+epD)@<)I}A9v)^l%EnB8S$oV|S?qze2tGNi90nrPuV~v}K>L=&t
zNMWH|-UP925+<fJ<JxUbU^01U)5|WCTbL1;Jux^&57Q^ryVH>hc3zYOl;Tc$7rT%i
z*!|J+zfx(Wht99!T27H7Rxo7f9<WO=#zghtG|j7F1s*VT!U`SRNH@>m|1#?(rh|Mr
z9{TM^rXDE%snTy~%$5L67k!l!7X+{G5eXWdnGnh7<=^BL`px2l>*i&1WS<^%v5L5h
za|^Dc?P?D#X&*&QZn%ESPD4_{=9G3hr5t_-n4D@Z5Fb>GN-BSTZanPk?!D~t{>j*A
zPLl@oE*{JH^yLQ2%%aLUTgg-_*3aTnlXSW5OG}ify@uiJ#_@tro<1cVUnv!dVq8@*
z2`|UTs&wJw(mBJ{Wo1x<^DByNRHX8|YSz<UWuG`yA4rh9Xpd&3pR&iy#dmth<Q=Lp
zIFvgd6X1F-fJgBNZzZ<toSddM;8pMb+UiX~$y{SA4*{9jOfIkF(9oEM<_*v7oOb^W
z^I#74GIsJwXFG04t*s2(EPte6=^%PmR`!GP|9bgtp3<l(Z|fC<?d9egJH;%>Pg&pP
zz3;&o6u{>1pFd>hV>A=9?us(_3K!Tm)Jv`*7^~+c`(9fXa%CzL=iI`jj0+G~yqrII
z=dm@EHhud5b(>(iWVDTRo^VyPtckC!Zc#gLX}WY=_cKLa5ZaM&K(G0@=u`($?94N+
zHf+^y{YlxYdQT29f<!)g2?nhoBWlY{cfZSlgpK8=-mQsD`5O{OyQa{kBc4cQPrd%(
z5E@GpQ!!uqW_QZdIe~>%TkuNGfZ=eEv}LbVgc2g)CZX1-pVOqYrIRj++E&;8*;-_8
zWe*^*6*4mw(F@G`1tGC%9N^MhdAP#FbyYOM*ZxBn-*5?SD}ngbG|Xb?v>N$i`;8FG
z_sbZ)GwI4ARXAJ6CK-6ceex`2&-jUC*!%0y$|4mNZF4ZIR(Xx*>Ux;A*n;xn@3{QB
z&i*eHw*pu1j=`%+q<O2025$;$tVr!jT|U8f4DVDIU2fjb$9KH4Ksc7r=r!&*0bLPB
zLJ>chi~6{}GZUkwB>I=c@t2J5us8l-^u=+5J*Xi+oBW*i2i6bBP`0BVQ8!s9qXhGx
z)=6mq(l)skH=9_^v&D3#={32}Gf8`pvvT~<(=A>x?mENAu`lT4Pxw{=cT1*_elX~(
zOFNR0Wxl)p4~FhEARw2%#z-bhJ3~(Anh+Zi$3wq2K>aDiVX{g5neDPzl=4vm;?wu6
zU1|dV<0mIT?e*siy%|TwgLVS@W~x$<=|<p@Q(3B>InVB3DmgdnXQW!qti2oVh*WG9
zr&zb+gE-xS)j9{sR>)5<3gIOVbUVrWI?r8ETI}TICBNOA$!%zv{Z02*|KetacKvhh
zqDCgeqFL6t$`R_gZhTISZly?LRgwa<z1$_;AB-nguJ_I5v>7X4{3rYWn$G_2II(g}
zwFVKIkFtSe@l)#sL}D$}G&$4H<#E<;^|R*9`xs3Mu6I#ElV<GSvC4lmeUmEFmyLNw
zD33p2(x;pUec0+v>E3c1>aO6u<>LaW(c}?*)&Z7jBy!uSMcz?VuCO_dRod{tWF)#Z
z6~OhlqZZ(oh+-=p_bx)_INrM}W-G;|mcL8!{q<!rUmt}tk3!!2{h+0OnLpeE@uiJ}
z8pQA`VAFfpAxMZr{HQ0mMwErS{$NaYV`bcZB3k&nhy3TgM+UEY$>cq%Hf<wX$M4zb
zSYFoT-uu@SHj5H3j29KUp_l<vVV_9E*9U5bCOa*(CJlE^zkBY$)%hK&>J1eWu&X{I
zi#soZ4*JXj_~=M0OV@1-5W|Gw10S0qt2uCc|7op8Ui+u&thq@qFi)Avfh1PLHo*ec
z^VJ`!+J?M61#G=<+a8^w*^PhS&@x7PsDy0KVyiv*2F(vH%^6xF+bAtdqIbg6ffGN}
ztIA+k44X3<4m$E%7HL9PeAD5O1bMH4qu6s#eA<G4D0O8hT8YkeJKgPKl&j#+XILE*
z{USe^u4OjBX{y@)KCp5n54IE+SI5CWqbT-7mX3eA*Ua_(T~0C)<KPz@sa{kW8adyM
zq4Vh<E~UpEeK%g^H5dG2r{nxY)ycz+0(^e68Wqaa4`*223IMO>-1y$7vPI{5Fb7!e
z*&~VD5=8e``2WkH^qr;=d~LlMY==@~QazfEo@J|*t}R(uwK$in!3Ph?TUMmTJ=@tP
z_$EPQg;N!fKaSf!)ZBa9cGvpq-vo?on_K7_kH1_!J7O2%I@pOoY}|(eT~5OrQ{{4b
zeWV|W*<h>b#X2b$ByTS6V+l`&FRWnXsI4ByD$Fq^aXG!WP|gxd{DVP*y@I;#=kBpD
z{LPl=^V6+d>s*sTn3`}QUn@DHy3-)jmypd>?rQt2O8sGZF@43T?qeO9(gY>CSY3nb
zYB@Up=TM5!J|d>8hN-z{P2M4TH!DVV_jL7V+J|P&<M?3baiC%}ib1-#CA-|ng^%HN
zmzOs#BOQ5Yx83t@pnz7?<bXx*Yb2Tpg&3%-9Zo3Lq)GyR8BT?CG?O&J);zoPMrOyv
zJ*EBBqq{6)izG8C^)i1J=(Q6)hr6CuM;NH&XliObTvG}A;c>H(QMNJai+2l|&qXcY
zt+Mng!jFr_0eQSN&Ns;|brX#i^K#t_Dk_uIq{#gnR?oDr7!^@cwqed+2a?75**Kz0
zQUiRKMd|Jp3R@)IyEBaz@1=w^q@ABmm)f>ul$;?SNS{pV8*w#HgC;F+aQyjnTkKB6
zyC+ZhmrYFb)=7#9kLZ=4&2m%npLI79(t_58ga%qkPpBdyeuFg~$F7gZeVnrPlg9#3
z8e4jS8Fs^XUN|QXdZZ}(={Ii3(siw?_f=w&<Q!7bBo090c4{c4kie^!dT3YeyeoV3
zi}a(CMIH4;b(#`AX;?8WFRUw*4%^p-@E)^+_XxxtB4C%oKej|)8Uyi<qE(3#;n4ep
ziF=y>pX;tVdNAtZ8HKa)=~QvOXg%byp2%E%Y9dMq=Sf^3cCw`!`}boFLm8!<c;V@8
z96R%c+_uUe!*>beJc}<{=%c3xiypUBudy=a*0@$?^SQgnYZpt<Yf*?vncvGiFDmhV
z<ff~18j?$I;O%cw5&yD9gH1f35iT^8@xZZR0SE^sFoR>vHP?7y@RP2Wr{PaBxCOq=
z0F!fA3`NB%v-wj7@>ojpeL|Y$LTw5eSIL=Y70c8+E~xq7Jth}^F(PA$N0Xgod3-&c
zR3e)cRLx!ze!Ut?uG$BzDPAMRd_^taCp3j{qjt^6K6dMq-PwAVjX?dEN!TL-U$e+L
zd{s*Wtqv!rU6mePz?y<t;rZ@fVVT#9%5JHa>By=SuCM2uY0S$PQie8>xeFgnmtb#c
z$vZ1(8~a}HuTaSGP^{+L&Ae{Y%=s{<;_m$iBL#LbBsk$g>RVU^5r*Ujm$*FIw#ci0
zKn-x;^Ydw?UBIDn0~uMWUW1;yRXFj3XkCw(&9^gXEy;dd-p$`o<<_}HH{e00k|td3
zw>A)3)odebhzSegfV1ul0{*>SP0xKJfw17@fk9#0Q41~IVz<X~62z+jDDLIcz!qIv
zo>ujHg@#9}A3gBvj|@^@y#hKDM0%Voj5tcQYPk(GBbxH5ukO~$=uOEuV=m~4dc)W$
zv!ibno5b)|mQx=8@@^rn^<#0|ND*b6L`1EAZ)VrNr*yfoJ<vV1pOx<dp~`q!pH};z
z!VPy{%mnl7&Wu%5;Et|e+D~;@hwRH|6oF4R@mu>QhYV3dGzvcYA4*j{_R306(1znV
z(d(>z6%aK+qoZuwc+Ap)%P1s){G?}R*ZN82&o!OrNXY9?Dc|ArV!+aV@!xHp%>!3t
zn-nQaJykPKYxZfGt_veutcxAjY~R#j-MzgAXHKik0E*xVMP0F}t?ls@FzL9(2YT&(
z%PS#?RYhQ#vR&d6cCh4)Lbp-V;mizBu9ZAslKdMtM=OWOno_PXYlCO8`g~cX)6E{s
zQ!8`B?uBK_brfSj_4L${O(#3p6%Eu4lN`h9LoH~1a7WzBczYmU3J$2V4ppRcF>DbE
z=cO*q0a=Jrq!wLuXspkr^Z*V`ysezt^z!VKm6&UAgC9k)fdGz2ygLyI6X`%d+A@{J
zTaj%IOKU2s+x*()i*m7^DXX1R6m}433IG!<EKiixN$O~fv!dXv7<KNz<fBgK7gw|F
zE1F44`#i_-MB1ZobKhy*n9QpkHAO1#mw76@G|8uGbf#qKBG@!I=i61y5qur%^JAN)
z1V7H0X<aEaR#UExmX!K1PV%a~FEBx9(cS3}MmU10gwHr=QHU%w+iU{11Kn99721o1
zUzARRBJx+rEi1V{s&mz17sZvr`=GA-JF3v_HFEm4ZQRFp-ht~2nqD0V&J0-gdJwsN
zQu-j24^j6G0^6|=z1^X6Nvj&R*(%7^UNN1?SsJK)Su$QAO#(4_EmZv}k*|FBmw0Qp
z8@wKHw&E3Wg_GavGKf^#NmFo(ZLpy~u~0@Y!6+9&2FCTSHQcd3)W%p7hpKfAYWrKU
z86FEXV`Zq~J1*RXx7|zLmKfF{ZTApDU6cTUYR3=LhM8tsp<sQVW2}h=#Fn-FrxKN&
zC~UiP_afAHi?=Cq!T>D@A!7Fng?Im<g)Jd#rFWaP5$rH@ZChR}J$++dp=8&Gd*u&C
z)SEIL`LXJ{9mHu=!U$(<Z_xW}+3-9RP_T$EI6`7A>JxGyNj92!^^16m&^aHKBj&YV
zukb-{OZkokBP9!f;vw+MMS6Fw>>fX2n1T4(w6^zpS<om7k~vjF6KSyakUlG9olOde
ziOV?(?3Sg<io`wT7O7dqvD$*;kAd<UQL;PXKjksigAuj4g^=PNg*tTcN}B79fGdYG
zIYkvtMPI@oO_Jt-Gt@3eHTEqAV`LlqCHp|_*`5b2I<I8?lilI(*Gd1UyHB28b_Ll5
z#Ygs%dIuS^(_ZAimMDt~OV@u>L&a8hmN`m~izdTJ*|C@?Ca$x|t9zl<4re%wfBc{N
z{rmSZC#F&%)rBouo^JKD8`hw0@Ob|$gN#=3Anh*Zt~4RS#I|NgrB|i1t{7IzTh?g9
zk2aN9FdUD=XFAM!K}5{?wf`3e2KUxhhVkkjjC|ykFq#?kfV2*+m#1-0?vj5fb**&E
z=A9dKnt#f@*_vz2aN*;>52<NcKr>4JU_3j36y74+?lOdrnipI5|5`k0m&a26Uo(QV
zbM?c|of+ciD;FX*tZi+v7O_CSX5UL|V=Ac3;Vt7AtdS;t1AP*OK>4z8F?U_(Xt8z;
zEMe}f#C%&xba|c-oyPgAJ^Hn0`vOo_G2PIn;Im`QP-#8Yz?Fb7{&b^aq234j>hsvk
zsI}%cH3;RVcV+B7r~e1zb8Ww{9Ir3QUCUfxUH|?24-FfpRjB>jkgG2qu<YW%ysDXq
zh6Cf`3cc}&yNN6qA=2Pv{rz4sCR*;GEirF1ysDv}Y4;+vyH`;p;81Hgjdk=x*O)$D
z;msYH3@y+l+)GXKw<i}bOd#{D=nY~{Hz!R2R9G6Q-#sMFO0K`2y|J>I&!tGboo7eH
z|Em06Mv&@qXfyqgLHl9{d@<@d?6@uWY*&yFmy1km%ne_^u0vw$RvIMjDu3%*CKQl9
zt|QksDD|ta!u+sQ3yNq64y2!0jL2u(^3~Z9k;^@+jgl<z$*?TdbK@;slo<ED*WHx}
z3arU|y3MC7Jsu<DAR|2JGHLT%mk6#?6K%caofU1)=!SZAw7eW1+&MSXr7lENkj%Ss
z<2=8Z(!h&d`r0>oay*_4Q(I1Y1aPY!T~JY0au$lqKsodx`DGt5LOO$O>!HAd-TGJU
z!p{=<nu&5C!jr51<*t@6oA<NP0|kN+wnt^Z@srBswdRE&3MQ{!mwf5N;%JbgetWlr
zkBDZ}<j67yJY-uGC5`}1r?i8zt-J$09Auyh%N{Xat8O|wr*>zvlYvnAU;B~3YJm})
z-H{2)W3GX<=T*W3rk}3P+QMVVs4GMgCP!x#)Ine|Jlj?yv>7IIqnwq~Y<v&0IR+%&
zNW8!SCquML7B*Ju(cK%bG`mF8N)ZXPklN($DQ3oe1XWJ@g}3sbi%ivn4G{~J8X4)M
zwK*5&^i5_HJXW5K+6MEo10n%;w1p@hi<}AfYi`}#>5sQW%z~kjL$8(mAdU1dnstt-
zh8P{}2acBN0PKn`+VQi;RZgyrek%4!*a`J9@zDE5&d2+$-k>)I;_x^c-@JBd>SDkP
zZmDe<Epy;N5Wfz!pU^#DYGgiSwgFk&Kvi>Nl<;$PBw>V<s_a6aH73(VF$O;(OtD70
zBBBroDmTyk4!KMFkP~af9cI28nMy?WvSUsJ(*s#0Xd=>Ibg%$lQU2+p6`nm0K)wY}
zoH<W*mq_tMhD3lJxkm8VHGf2Y2?0~a1hVPGX@`%#%SkhNEJCdDDv8Z%N`(7^q1`ZL
zD)qo!P@|ihyR5|UURhijh>Y#70fo{4gaanMQ#MPxUUn5H074A_hCQ8~+>tC?d3W2>
zQb`F~?Mp08uI`VH-qgV2^R_L?%csvm0(P&F*FE$XQV}!y7K(Km`MI{=XUh!k708?H
z0>LlZy>~Pj=oqH^#04QocwAT{?*k{msGp;c^<Ywjh!mO>*F998q^kW_T4w945T@9|
zoDjK$C;#Be*P%=8o*4H(X;v3;9JLbcb4LIoqH6<BucTesMk4l7HttkdYSpX7mf+NO
za&`%4bo+8c9ZS*twg;~qmRj2xIs|-jPjN!Hd-5N&&+aDl`ckZHrXae$wXn@Qg{_CP
zwIglAra8yn5KZsU1(z~01$Ps<Cs~0l-#Awl_VGcB>^U;=1)!9V{!>w9u6X59cS^_!
zX2|^qcMIDb&B5jfwu*8(YhIR7IN2j&*}kpy*ZDs!c~b}Hz?Qs?J#sl(bD5Mgyf00E
z#y=;azv*nUfZ34V=<eZ2ud_y#@v=xI+LrqHjCd9;$xhiB@Y#-bu=;3LOwxX`2`92{
zdmNDpa_!`@{!!oUAinY-l<KnTD3sZ?U*3K7_Z1xphZW)A!4+195k-zU^a+@Xe0qp?
zJ5bF3X=l52mWt#UT1aJuxaTJh%FWK#?8vJ>0ntj3JuElo*|S-`=F-Yy!<b7)Xyj{;
zn}z6PYs#$n5(-c=yn!S+!}a-W5#(uF@{)%S(W-$kkCmR=wkym(7%u0cm$47^p;YlA
zFHauX*Fkr!_#g_8oejk(l0=PnaE!y~bK<NtVL(o@qp5@+O<Lh*VHGWw=fVtqZ}YUk
zaVaUx**6a%I|rmjA6x)K63#o6lxxg5(-t&2okqV3t-&@=to;-8Su8v}cT-&)pX@}J
ziI^xdu0;-%<l)#6&!Bx_SPyU%){IhxeT-t(?R_?-n9aS@w3L?eT!yBFt)o5cZD`PG
z^wQyE;^?Q=@xTs9(6_22;ro_3&u3zJ{QjXH?L+%}!{pxvW=YrYnXDP6_BemKNS9SN
z2S6Ke3?ziovnPJ98A$O%C+TqJw%72|xKikb6vBRU&c}4W3KxnC@mz!)t#oJ?TNK%<
z_gUBI3b2a}>6FAX^$heOR8*AYMkwSIl@#R!wL0>Z3Dn%M3M}wBBkVS)w`bk&e)rxP
zvSv@Cjen}f#?PnRw&p*Mr#O#;4S78yKc#A5mB1d5nlM8FQ=HaORa^=@LY*lwFX9yO
z(2^!De=wL935~C84xinmoRr^_e@;Fvl494J0HIkH1{2Jum=uB(;hx!YxgVvpY<cx=
z^&W#ro*9(3@t9ygdqqCOmFI6+kn@!(;R&@PdTgCx{J&d9J+1fE^Fu3&9Epu^7>qiO
zphBJOf+@TnRIQ~>ENT_Oh;_(5#NOHo(XQL!Bt#`73+xmXRlYV045rO;zw&4OrY_4|
z*P61(dA%~<LALnhtf!G~rr~y`TiDOu!JwPy=@2Jc*|bd9ui@d<2Q`oJ6%{u62CbT8
zw^C63*{YznzH-9st_iUouz>llNv$l7gmiaem$j^{Uv0;-9A8P(hy%Lyu--0;9H91T
z?{>m9!NZY(PyN7Gr0(3T?0l`!0$!jj&4JYCvzGydl!2<D{j=*X3qL(b+~Va<13cf4
zx&(pEW{UDeKCKIWr~L^kUhX-jJ6@a-Q9Z4Fbjl!d5|{_!HEqgnT;k5Yh-jUydCuxr
zR?1hHH?&fAlO=nmI~n^`EZIFO6gb;hGZ0%H6Z`c1^IwY@md1WJ`7>j>qrjed149Iq
z0@(UH$M$Fvtk>QQt|tI+<0;K5>(esvJbPKrN4&jiubdS|0~8xQh8D&>50OuKAdwLf
znVNomyfo^yois8*2`bq_aOV-v62(p63r8B%P?_uPk5uy1^{8tjYCS^+@rou4gDs*v
z#Onn2qWTD!(1v_uo*3=(L?fOVu{HK1n}`h?u;L`?*IYVi{cs&qyc+T5F@AweSEY(5
zC;O+gAG(B`C9m63TYo@9o#idGsG+2<7yn?;QSsfSu)tk6!r2&Hp7mYZ?0!9t*>yHd
z@zT{7m!b>i)tsqS=NIN}{?hpZ^KOC%i-tecy0Y1zQSqCn!!-BN%1MyW8cKW5wrn?V
z!rD;aCsApAErWPxwVrlY7V3mP0DJu*p~YbK_FR~Ej^Hj`_VL}Zqy}pmZRYcXHNC)|
zPk%-8zrFC@9DSZuY(eYUQp?yb`DL@$)v;E;OoBK;3SsLWXBLN{e!&?5Eu>8rN3ZsJ
zI@zl!>(O~nz~lec$zK-_Bq>e2FVGvrpaHbC`^`ILuuq@Q90A(;O45SBa~0bYxB3~!
z!t;I0-6_7rzSSIae0}WMYRetEcTOa2BkkY4RdXZvTAbKVi53>OgwS8;yyQvmWzlW$
zWlTT3>&Iv;BRD+A?ZjGug$GcHQbKzR;#@s@{*}#Y4z#0XYpKoVDgK};&tY4j-;NgA
zM`uX$6@jW(!$`YV^HeDiU=M`{?yaQTC2Y_C!7y^uh{(6q@my<J&+40gKU#5o?lw{C
zZGPao+0X>aB`@oylQc+7ybRL9$J14Rn$#e1Y<}Wzw{PUI+>Gp-Tgf?SEOxPJ_TW^R
zHb9MDFQYdE+B3Dp)<8nP+A>sMYBr(LPb#)61_d5L61kU!n^2;+q7t9~iqaeIh70B0
z4SR`2ukuu9=N{Pu3NJ|)-csirET&*xg|4uJ#Gah59I%PWDxKE>c4xZ0D`t+tO!T`E
zlZ7EmnuyPnPHnXr!}PxL-eJ8^6$zVtTO@v(Um0Nj#IbwaMSC&NZZ;(uZ!<*0`qA=H
zJy;I8`VOvC#hL2wc2rbHjaekn4zhao*1iPeVOrDpmD3a@t$@k?U^FdV$Io+gcAk7b
zf7PILug*XpAURwLaIM@=md`HPe9<g6(Z5+Vx09wlh-*m1uBQ40=obM=Y1uhGsS{Mj
z%i>++)pXGgo~oHqm$L~GL6qrCO$$98xym&Riwz%l*ky!K6O?Ie#0oN1%e5ip-VvDo
z2%`A{|C`b{CVaBlthO}W@VYiUkMd)Nye*s5#(bZWX#nYSogWRmAeRQx{-$rAi`F_W
z*K{Q^1N0LUnWekY{^w|Kcyy3g_|Li>@8+zi7Jp%)g+96o{U}o(41aKeHhkO_VoL~X
z;fSr*He03ny&h?g7WxR7mwrj7XtmdmNP_`3E|bo<3KFO9=6?Sr^bu|-R^{QRqj`2(
zt6eG~o$+bjvmvf5yn-pOQ98v_kD;PRlL7Nr1tEFoEW!l+FX;28<>-QJhzN?FfBwrv
z9Ic7CgWb%zF5}>cW*E_r#<o&jcVvB<W_bS9Rr6e_*llJrdOvl#qRgP~yRa`4O;n!T
z*Ea^{m&0Wr^*&<!_lB8&cr($MQs$9uy&Jw^VqyWG62dCx#TVM#zGVeJJvG#Yt!>xz
zb`iI6nT~cOUTOy@$uLkSIM{hjaE7O@{sp)E?>+wSt1QvSt-o8@7Ll;km|Jkx=jLDk
z`@Mgik+S$ggxInPa_YR432|Cr`xh_({h(go_r#T1vm(!|tf``f*W*c$B-ar<4YD|?
z!*R@|hW&TPc?vOE2EPU8u{NF^jjl%!k2C}tOK@x&#uN)|=&6<-Cao}ocAmDk0t+fe
zU6CiQ%_(@#jNL)lg+y!=f`;4ELj;`Pr9_<rpYL48I~k^+9nE)*>1*bVv9qpBS%k6;
zb`VZFY0QUbjZ@1F6X!3LQ_Fdk8u@T*?!<}>Xp@JC>=5ru*4Y_p?QTs0uSSkmx-0i6
zM7@^?!xG_!D?Rr3%u2H`yQp}FB^Q;{H#};RN$6-@f2u>PzMY_MfwTO(8JsQ3AA<UG
z)^^Bc>}-0QTA72iY5YpuV&htBV+Y~TY{l-;v(8Q^quEF>9LSVF%!C#8Y?H{LR_p9e
ze$=~3rE`+WIr(%FK5vum8DLIfUuh8T#3?MyO|!WLHz9a~j{xEjs&>)H(VLO+cXK@w
zXFa-l;bDH9FoKM*JrvH$qj5Ro*df08aVBU0@2hI7^;~N7%8mDO@y<&eIU}>wV~o{!
zVSWN=!P@VbNFyIi4&a|{QEFvi$MVuQm=oHXYw4RL>T`-Bof-q@11Jlp18ic`m8O<l
z;VELBQ}qq-2(E5RBp0?OLp7Tc`Bk#f_|q$nbh-Mto`?ptoo}c`oF_ZKg7v1-JpH<L
z=~RVSHpgTURsNOvq+KSvA8X$fOZW`0>Lr>v^LDQ1G5{c<0Hj1?_o)wf^hxg%f+;vo
zq@79$dt*t~Sq9H=)%Q|9d@SNpuskq4EFz#|qc_c?)eb7m>l-I5rd7;48Rb)c`1153
z4qaIu@0@I76+)6_*}^_e$9<zOfHq>rAyCP@rv~0pq>YuM5Ds3{d;Pwr3S`fP%&!Lg
z`DnJ*I=IL;$)DaBdW@y8h+SIG`Eb?}M1EU18^Uz4s=4DWTv|>#Wi3|W39%m%2nk^<
z0X+h5ovKMuIYvZVwsEGWP}PVE7?X*pr(9+qIgSI2K+~sn5NZnD2_9x2UtH;eg{yZ`
z8%pd_db-_+7&DqN@g%}^`K>D2Wg1-sfmYfETWStM^GEwC_UERLr;s(5DO7c$8|7S@
z?#46ya;^ICY@4}RQ1qDs3m`#StvXa=+xdxJsQALc>aI)YlroHtbYv9NWY`#ouNHwX
zB2j%iEGWOjP1Y$Q1gj1L_1PE4Ml3gc>=D&1F2+in0VEG70O#Q!SKkLnmgaQ`ZZOte
zygNAxCuD+VJ>fE`&A^8Smi$ha9skf(nkXqg?%drR_B|;<>~SLQu$5E<)h>4Ehr&Ze
zCU)oQqI2EjB2P0+0ZxUl^y;rObDk|N5`Ww2g~Who%1M}B<txZ2bI}t#&9!V7#x8E6
z+iRGQ*E8X{SCid_J(Z(XZ&Ub+w_vO?6Z!ED`(IPWza3Q>!kA*%*M^U=%Ps2VWt)-h
z)pRaL+%fNM_Dy3<U9S{o!d{nF8ZcErTzwXa_)mLk&%fP${<jYPc{*Ws!a4)f1BQkB
z$vQnVjBc_wiptS4p<SiPHdH}wYw-)Qb;0F;x#Vzdads>vZ4=^#D+}EI-sbZR0fs^-
zLH3H!pvuZkcMCvZTz-H46Vd_FK;IDeW47c~uWCZE!F_w3E?R7v9Fwp3RMQ-P`3)5C
zPO4gWun7Eow1Jko{|naM^U2>>YFx(z8;jOgc<b}eUJRuA4|CTl1^9r0E+lxbG=1Q_
zS}1i;GWiE{n)iv>>@LntrSV!}X3+NTocVFC)bB;Tp+Q@kk52klQWIKQIm^DIu2fHW
zMf7X-ub^4pNgXexPXb$lSM@rkD0h<py9$*SW@}M`MgdxFE$n;!c>sx1E?skdn&KeL
z9DiqAH4CJ!cuojojzOB1mgW@`o*WmE0B3co8F5e9N9#<~|03?K!{Yj~ZPAJl0wf_M
zI01qO5AKi<Ah^3bfx_J(xVvj`w}L7hf?Lo+3J(+#JXj$G<W_$7-S2d#`+c|jyz{=^
z|FCBjti9IUo4MAWYs@hW;*VTi?Nk)(2R_Svy!J;B_3nRIiL_7+?0_*31lvonyYXWZ
zd>~NTvyttaYo@k#%@>qbfj6qL#$6O@+|w8M1%PxiuHqb(={<$b>fqiL*TD)3Oo>+0
zPq((9?Dg`Cn}|2A+buR~TQ05JgAIzMq)Aak*+ew<L&GqJFFe_6xwQNWwKq<aHxZ+4
zvCx)z8yDNgLop^$nl_KJ%<jNTHZQ%%EAK|zf=fvZemHnuU>Jkxn)6`xlA3?@ab5{x
z{B&}wVg!Qh-06HwEDSWTJn%K0(>{r|(`tuJ#f5sW1!62G*Yg0QEZa3@?CP=l-!)vS
z&`?>B%^gW6T#8W;s9oXQ6nl%xs7KCuh4``)*s?WzR4lQb5S(w@Q3l0ZvZ>3t14+&C
zy9=LYw4RHpzYhu+jX=p;f+l+$Gbebu;Vjh))q-M{IZJzP@$v{!U89)H#TS*vL^A~&
za4lfj(~PXCiMk=~=wJs%x%c&{QruFHhMLqAf)b>VDsOyked%t}#V-#~Xqh)cX;5P*
z!3lc1$_#Nq8lyU|2KNx37D>!Na)-12NNo?7*zu&IT9e6uNUXZzPHkeAb*4cG{Grxs
z0(7_r$d85Pw0`Fn{H~=neY31RPZwwfTU3xZhSbfzaCS|s+iDUrv>+bN10N=HQjWEn
zkwOR0=TqgxbZSDS=zsQj`i}0CQgtn@*7HYxsSK_GR#{@Yjl5oc!zf~1%YSa8lMjWb
z>UGc@%+8w>PbQF^&;{l)R61uCf`&r3L_k){<Xb5OgMOL%bKjDwlCDaWmv*lyb5_K!
z-(tJ!26&V^-38CpJX2@jE1k5{2x|rwwusv{^d?q*Nq?y?Wik*lx*9wdIEpn8bGE9?
zwgF_G5Tc<X!aqg3z2bkCRoeZoo5_V;d7Ib9vtApgi|vr!Cw+`#*Tm#r)0ck?1dmVk
zcJ(+ccd}y#?I8q*Jk_PM&Q-0hm1PDQcBG`@Fm^`=JP&S!c>0S+;yrVdSV;$Qx_boy
za5{R2EQ`N5yzK1^Y<)yhp~AW6*S=rzoYpMd&T2kh4sN#3z+N~(NlDzp=2^IPlQY86
zDz{&?ge)P+n)OGnh0N>Z0zb36Ja#{MUMd0Z-odRPLFW%qT>JDL0H8p3Kh`yH+*=lU
zmkh%`pW=@#b>@-fNsO)G0nzg!C3Jjufl68x>7N|yEKNjCF$L*o1gw1#FI4UZF*;C#
zyf4^)&XS(8D<eX}R@<rJ3q1q-eW5Ggsu`stp3_)h2H}Qz6z=b`Gy9AT;J@jM=ipo-
znlfd<WqT_Ne@*@N4>$eKSMl;+S$-;gT*fy)&-<H2_5uXuU?11{-z>698$nrFfqgiC
zvB=7ok1#l*SY(Hyvj~U=vA_KSjBU%E*u}F732Ig8p3$a*9fWI5zPNPEFC1seH^F5~
z3^Eruyk2^D&`tzqTt9HZ&l>`Vs`Yr>PbR1g%fY2A;<hKCYHphQIp<Zw)XCS-{3!b;
zrKy^o1ynD!jt+rK3^T7OOt!5vSh#S~D#}Qm9-Ze6N_}rut9(;lJcbolo!g_%J*m6T
zN`c_V13Ap$&w%+IY7V%!Vi>Jytq&ld$t_!TPo(*nJ)gN$mxVR(#Y=9pM-84rmsF3l
z(zq2*BDVquW<CyF0BMQ@N>aGV4qe8*l#{D0TNb^6UPaxasEl5fNTEime@K;PVUA1<
z8na;L=G)Jz^h#DvnY<KoS!e`z)Y;-+vmAPLX6^D@(bF1I)|Y(<H%(L!wp(A64hm<_
zFuC+;GY}G#Lp&s}mn$*_&!c|4>b;Z@gYn5B`_HgOZ&Ga`7i3Sa`)3M^MtNS^1y1Fv
zHYl44mj;F8G}@Bd>rgj{CDAYQBvrtGthJGSQ+yc}kB_@Ugq4P(?xXmlaap#H+bi1a
z*e2@*YNS8KWCYBUFwAwVbGI+#Rnc(^q-P4uylHINkA_H7FuFL>X&Dpai9)o@sPsQF
zA18=K=I5_H?WB)o5p11rsaJKMT?am))3hOBLFO;+&oO^I=$xi3h8Jw@;YK@E?!T21
ziiYW6o(j0h306F8ogF19ETAI&$nq`I9zI?jwj@Y$xpFxHzv98H=fsE2%ya~Ba-1^b
zd%>&Bnq2Fz`NuF4@s02047gvp?rtw+jJ${FJf!sreq(_C`SAO3`n~mmSgz{Y9eL+D
zj)m=y)0&4>ZZ`8t*>3JNhnrNsd95W(U>)aPuE5PoFFq-^1G6XL>5?3(5sj)pkUn`^
ztrdfF@bT6bnK&*UsU_zUlfy95Ry>0I>^=N(9<YqEuB+gI>%$sb4N|W++Xi$m=}dwn
zs4he0%{1g(qnB7o1M=0va70e_j!3B#k0+hro+(?iDNnB&d4w}6_enYQJJdG0T^RM|
zviX)J8kWkK%<w_#-*v|(zAP42Kz59}HsXeadb#ENr?f}<Z)Mi+`wbApwda*^>F+@o
z$4aXBIkbCM7=v4Y7#1AKTN>!Z2G(=iCO)t2qOCaOHi+qylIWl0k$tQzDBTvbA}`~K
zWo3QEu@PwiY|bWVIX`{cY+qVol(i**w@^FcqeR*bXFiBe;6EFmsFe}dAS<*s)=E^P
z%I08ak@Tjm16WpEu`E+MD{Lsh1APyT1y!W?cI^oE!y8Q_TyN{R4y*T~Q_W!U%*x_4
z4$QAdD|l>^tx}#e)}8PUiDc8t5)5pA7`GrjJ-Tl_T*G$eTfDMN0wrFz(A$;FW~|%e
zDZ%~pH1g3FG0;)FaJPnzczQvN5yZ$FDt^@CLdS8kL!F{1w-=HJ^QG?)W?oW&XUnC6
z_kil$7bZF;7Xp`-4`mk|v6R{T?eL1flw@Qz2}q$kj8pFuqHqbLQ?>DUVHrCN=^rc<
z{|J956PE%`#9QfMa`PGQBeO8X=cqS)&@dvAwGKCxIc?&~J_bAarDZBm_>y+WUL_Y3
zPBbQy`>x~XKQ#1;u#eu+!~%`pArVEP`<qn#97R!o_sxXx-WAwaOD}x<whFfNbmZEp
zz=ag5vj^eka@GHqzh7QducOG^+GsZW3ACff{FVcI4+nI8*osnFIFV0fsEb<N7+zzq
zw6oXQ?2JND{Dk63J+iC0;qSwRz3Nmib$fb=fD9KdZOGktsx5DwpI+>cTy+<mR4Zx8
znxKn|ZqLURWB0#E$3fqYEu?A+Iri*uAmz|Ig2=2?DJf5WY~HEYp)heChPYl?JR22|
zK`Iq2*N)6uuke|V?)I6(R!MOmxZA95k@tthg)H!9ALdB8dC$JZzZgHSXZbwKTFtT#
zmTX41D57~lZstS_R4Kz3p$S1#VEMTZlUch_SiV<8u&x31PA!0R`ku2|oXdZnYUQwi
z>ra={E(u6vzN^(9vH<CGvn;2_T9_wC+aGdyAW*M%j$z25<e$cwP-@mvTbWG*C*og#
zwSuNb5xP}Fh^$XyJY>3^%EZb`hJ=%|=)LDnkj?Cf5=3>m^V5%?$%4`h8c*?uGOMRo
z+dh<JC2{%R2)1$zKq9yG!=?oktag1!W)KJC7eSRPiw)|Zh~`^@li8a|KLnL79z4r~
zn`3>fqchQS7v)ugn+%nnqVdZ={rNz^SJBt1DdW5si9XQCW4%kKa2S0qR868yea^IG
z`!&zXYMI1zmYX8GwRP^8RW++~RDNnA-0tbC1B-8%Yi`d@rgtDx6Nc3dTP*&^+n4@b
zmXVy!Ga4q=I$O;x&aWtBcNw&!HJ28_SQXOFCw$zb^>L;VTxooe^kMg_zNJQ$N3`-P
z!I5{Yq_OZo{mYtZ;sA2rA5}^n6xIT?e)(^ZB!((nE8XcZCxUP&dsCB<c}lng3N2kj
z{2m+|Z+6N&VL19Il9cBSQ~Z6gEBzC%%QKjj_Nkhi{KQnY2kFZ?qeGW=my(>8)tDV_
z$!~li+B23uCEIF|m7wUTRLvUWjSb0lrG!vQha(xJKkrRKYw9~Eg@~Ap?cLd>=?Q^7
zyIoIp(Cjr%@^BqVogR=h+P)fkz@dcFd)nAPps(cerHx2TsH0+4eH-&Mb}=Ayo6YKY
zetIc$C`)Ej9%!J5U=^~>EKhV$M1;N4t#BFPdSIqWJX0pH70wjEcB-m3@={!FPdQ#3
zzEOaumRP`}B_{b@M8OYN-@eIUusRLua9$N%-k>2V-!Mw65?iSuJ?eS!(ymODc1xL2
zRb9U+(#LOZE7T^ncAPRJcZxGas#`9b%XPj<6@6m{jy@uiShZJR<y|Hn0y)tynw69z
z#y7Cv+hyc?`U_xGx0tcOq_CGFgDW%7wf5t2Hz>vt#DG`)s*}+vbt~)pT8SNS(L2X6
zl%4JPqSJF+R_3(#;i8Zq<8X#Xo`;_$f1&IYlV)pvkCC$TbRl>hbR0azxSidUa%ok;
zZs1g(KHPK>a<v*hTf9T62PeakYi)L+9fA_b;8edFlAB2(4xaImQwL>8K<j{Wi$3CI
zD$JXRzPgs{s0VWj8;o{3OfxPxiv6b3+&=#hnE>~&EpIHuid_%44pZ<mfmNX`XEC73
z#B&1kP$WBTdqwfqHYLqEW|Xl?T6&xVZ{Dj-3#+_+3ZL*);pRcv*C=XS6qC)EUm|&<
z$TdK`gK2M6Wkq+O!gEBD2huRp>X99PuGGFuk@U2R)2dO1^o7EUW404$)O;4y#ez@f
zp)2HBh@|yKGq<#2l0N{z03Uf8TfwO9m8Iv%;R9OV`UPm^<0dg9P?#~Vsu!DZ^uqdX
zGgG<$eQhgCV(+Y?6Jm5dR5)K?x2k43f_$g_yHxVA7goQsr0_x8#fX<2*e2e4D<br*
zBvhSKWd6x6=Rq)QohFP3H$Er_89iOtHKD;EC}5XL?^9-YSNzw2{?14Lmd-HXe%Q^6
zUx0v{RzLl@1chTr#QlrEYwa(?C?{ePS2%A1>6e8Fr;V*{bZrIiF%~m|%HNOUH`|B0
zV9^!|f-GxJl<B$M5~-bvLHai)n6&vGY)^>6BtOfKjZFw)t9sY6g!5yM^-9LB=-INa
zzZVT%>243n+@zsqmfNKb^un?`!aF)(!jB<YYv1{fdYQXr-}y!t0p3vZ_NePhFu{A}
z?MNis&duVwFKT)1hVbv;_lvsYoE6ll=S5ev;^q&`M67hQ@3pp(b;kSl&u~IbRHus;
znAuZ3T#74QNfw)AixtX8m|r*7F5^yLhS93}0}pN^a`6qePdiZ(VY&Rj0Nth75T2h6
z$Lf7Zo|r}r57|cRHJX%&goSG&+7}qb)`X*E^f=$@l!I&`Y(K0U9Rr$BRl}R(2lpCG
zMT9NW^!<W2lUxowegQ<@Tk&Q;A5k9zj|S923WG`^d9?*Y;0yuyNKZ=PL{t)EB7;l9
zz|n8%m$(^L7heK0p(D2|8TEoyAySmC;|aP?_NVz75#z5K32M4&$6Q1sy6moqNhz8i
zsvVVgmh*CZiPgnIn2KOO67ty8m`=jeeS~^tp(bMG+H~2O@W|$X=A~Wp?9U@CoK=_-
zj$-nz+EX>1CIP=}BVs8h4z@V8-Lh=iS4_yVW(zSyOk}%LsN_Teg1ZnFrPENWk<;Ww
zwn@i!^gJOHr40~iKc;=Z2aREdPIje1i)KPg(9?ouD8g_WG<PX=tb!DcEo=V_O)d|J
zOc$M_GjeeO**uIgA=#X0XsA)sM!xK~S0<9|!`_Kq8LH1!#7F6N9j6}`z_*Px+;nNY
zjGSo?OJrVn#@068hA=o(VH)B7lmI1Tl1e`+_$Z++q-)f}GbLL@ttrq?V6}#ZDI3Qo
zqI>s7pQn|o0I{vG3h^^huF0&1GkUj(?)l`FlR)pbvrr0aNBKmnoN9MnWRY_phwcp6
zrFlM(1&M{z1ti|%zlm}G2w$Zmj?CulKgwBdxvm<qSsB<(^%PxGu9W5sj;oq2AIqcF
zSvtS#sv4^OW+N&<pNliZZ|i#1%}AS3LHorJ`&DOjNo*{U4;BgZ+Pr5nlg>utOJy7v
zh8${RQN9nF2<hdm9v^z&3vAb^)Tv^)YrBGyqa*-XRpijEkdM{aKg7}0By~RbPTg$O
zR{eDQYHC#vyr*&E$lq$WCnX8<36|hI&ObE^v-TK_M02&Ox1^>L?KHlMK#|MEOYCyn
zD_8lKuh8B|dOJ9V%za+UBpU*I_V+0y+HfeA9_&xU!D80+=rnF5tUThCU104EgphNX
zF;7yQ!}DT?+^9Z-Wkd+Hj|HVq*wiJhYgk0@@EbiN#1>^jUQgH$_$O-c_x%M8d#XcI
zV+TdQnkkQ13XONa-Jy#08<2BnF-`MaQxlR2x_dDMOXw2f$hwYOLA54?dQbfN4<W^)
zB3qsqsQ{j=lxo)Qbgq)0-c~X{==A~{RXW(_Z9uJV^QN4KhUNuPYCsYU;hDT_PEjas
zpfaz{?w&qis%;iko7ajE!83d<+Na#r{sIzG<#|bR1uC7J7d0`<b#ehO8JR%M^bSXS
zqr29jCigD&-HSq#ffQ7b)|~0d=G);eg@Y=F6?%lo%|{uvtKmj&QNLS)uuK&4`cap+
z4!l0r+k2N@h;f+MkxEOo(u49si`tZy){fwclGFYh=t5_s=OU$Spp^35n~Odhr@=vk
zkBE9+5c!t)ROE)q8%5-Byy&yI5^u9CV88abpG4<<X+Iy{EPvTs^TJ#<$sy7Lb`p$|
z)Mo?+)GX}zp(14<a(=&HgUI`DK4f~ktrt-Yn@|@|Z)edKV`W*;=h8b4TYV2jkqaKM
z$?fFMXIUWp=R=d42|hZcvIBj^r?`^SEai-JQYyQ<wDh-zPqeV8zl|m=Q<%G6F;THn
z*@0ji<bx*Pi(z|hie>o;!qXAN^XCL>eeeR;{j&D(Vu=31X1}WxI(vfWSW=Ptc4dBc
zs}9R6{A4`q2xLNKm)bO6?FjoA48kIvD5mpTw8%drN1&4!oA^$M<!5FHL%rjV(KT(C
zaAHXR5BkRfFZl)YttcxjwYfV#fA%lzl^rNwifBl1pOw~%xExw&{YZQERiAqU+A{Fv
zTn{N_>uD|AJ>w-@M=#&5VxgirSwC~-%pM3SXP7Y`Pis+_!-TlbsOr@_W&j)2XcFZ_
zL-H3@ewKV+6o*LIsXGtISY<3lm5(PBNz%QCMH#}Xz01-x>B|V;(C@Qb9wK<u#XaaW
z6$mfOEB#c`Xbo`h-uSXy;oqWzZ5)sv%a2?v-RH0Kcs2u``uOQ)gsWB)Nl{e4Ip58~
zOWJUV^;N7Rwn=R&WK*Oc3iQ9|c;`pBO1>g-oq(LvNi{uBru$@F%!zP?-AdJ+2kCLG
zH$-^jy&ytLX5J;kP8?#Qb*t*7Ocy)k%BE!))?Sq6hrE@x&HouX;9~(tW((Ty(;qDB
zO}%Q*HteVaD>9!y)+@9W(s`df$9Af&CFSE~R4^(vZ&-PPDMNB_mY@c&ubk&@MkM87
zH-#Qs9D}>{$g8g-0<Cw2it1cs%l0|n+)mbkr;8*oM?zRNLAAG3dX@*~*Afz}+n{>m
zR@YW#UOA4<m!@C1Ki(^IIsG|(Y~j_ia2T?9JFcZXXP8`Db<bc?TkjjSO1N7@hy4^=
zUt=~ix0zOP%NBy@EOL&o-_J*78{rQl2yhp9=l*>gbB^WI+9$cVwfdVTQ>5fFU7cy8
z7SpU-lLv=koebqLbHMl#9;YHbas8)x+5;%X3)V)yLD_!{2mm~r3ShS8;z+0Z0<$gm
zWYeKcya$3+8Isa4$`7SK#OK~~Y{k(IesAgWCEwN}R4s=(=k*ZY5>?Q!3w8jU5K9Ub
zW5$F~sm?17B#M!_@S)E7*oPTGl5d`kiOzwMs1RJLHBUJ6O224cL3>(a#<^}~<CK+|
z{N<z3wGkdmry!6-*!M8Q;IXF(8~qrlV!``lh*)za-k9j}qtt6KWS$hl&nn|&%E9kp
z^bNs1(^~lp0Q@RHv~DF&|3qV5?-zjgl%DjX2=?6pRVD(E_72q_hLs}S-QKprLoD6a
zXmAd*3kor>DXOB~)-cLkC^Ip7;gh{t^hw)t3mY(QLL5<`D-b6aVVi8UFF8UO-1V2*
zA*z-CI{q`t7f*e?4Tc3ZzF}`LW4d6aUEX$eu}3R+n|6hr7-TTXk<%wT#D+PR22pt%
zHjhc`Ch`pw%jGJ<2>+|!|IXP%@&ZJm-dyKJ<D;UQ)q#;uw-3r1d#VzOf?LD(Udok;
zh};jhX*aEV(;V%D>HezJjn=bHqq~XaC0*kj#qY)RKW4cloojgAQCV(A7*65@3Y#5<
z;`GzC$+$qPrz7D{M`gJO^m~wf=HrfmIqb<8VW)9}QvBT*gr%9({N}Qse`mR=#w8tB
z()Wnl@v~bi#`oj+;r4qr2P!~v!;I?E0n=T}&L+#3So{H=eLY+8pxllE@hI0Z2V{9%
zABrmAcar+C$b*G=rw%a)K`aP(aZvn5us~{84dPMBJRO~A<TRhCz07arVl5UA)rjBw
zU^bt#d5qHVzH}FB9_ajQRDb8f|4&kp|BOEP?^^%o#ywGCwjyoj&B&7nLA0t@m$%2g
zplX=}3k;*eaps!>rg#N|4`s<;vb-XH0V<*hM6SQGHvRLK{%@Z?^r+q?a|}gobhPZn
z<57EtY*y~5RwZ*zjqS{gZq-yt4Ye15Qp<8|T;ml-i^y6yEKChQFOk!-sHrSQ8UA+n
z-<GJNR4=@g6$WF`KRp{tpWEj$&Do7^6LArZs%-+-)okT8uv{N=W{$Un2RKlL4o*w|
znCB4lB}yH3@2b@$>=Jng0H}4QuqZ4p`;;{IrxvGwZO@wZnICGso5~Jwc#J;;Y=2+t
z3vX^VU2{%m98#qc0lGHzKWRq2pAhx`Z%oVMQ<74Tp;Tq9i!4)d$!s0IqKfMUC~tK3
zRHMsTlsH7ROE20>@!LYwg2sy`0sDM~W3r5hrlG{)tuA}nzhpE1)?@d?L}FQy1a{<m
z&klb9&QA^Rd42uc@BjY%f3V!ey)X1VHnT{akbikaD!bQs=-TW@;c@?r=elicdoC(x
zEV!2y^u`+K=iClIM6XSV&d;$<|GqQ-s&!900~(ER;Tf)xzo5=Kc<Lk9;-Mq<V;AfZ
zl>J#Rtb4waSv`H|3ga>r&lOw@p0lBQ!H5!OYH`dD7RG%~34iofOA`Qi<RiqfcawW|
zjnYalxfT9Gr9u7+fa%v0({T5=Q>rs~JgN3B=`O^F;H+Zf?x@yh6p^~Pip>3fdvGq2
zaVb*2-NDm(ar^am$us80iOGCwHPwvg3*@^B)v5#mlX||?Oj@latheS$V7+B`q<)tG
z(U}%6_}DPq;s!2Q?m}W@fb30T-M?pIh#EJea#cT)|K*4+?C7IY0~Z(CQ82iZtyg_<
z2!;*PF|b6Y4AgFHZsbi@6lq9&X=7@Udc8&;oRdB&s{*EU?+(FY{KI+wp~v7+XU<y^
zsnn5+jW-9ntY>By<d3+G5}6Ne&bix01uHvn?2b8pruJw=bx_<kT?Qje=7h&ZL%#lY
z!GGgmSX4icS%G-C?XGaAdEED*FKs5sb)(^mc>amPSah$hYW4?^;ROGFMJhHt+3Z;e
zj;*m_841xC2kI3w;WuS5e2!?~!XeAg55NfR;~~_#@w7)5Jf0cec~HCGqR)QWt-Prf
z8y6XvA=GNM0(t2|_%@j>w~>+Cb$T&(-C}w;-^Ap?$sZpzmEGrZc{H?nYoszawX6+(
zO6(Kpvb(F^!u_>4<AB&^=TlA(sbUu6{jZu%FM(X4H{>)w<9hHqvo3%90xbORT5~n9
zOr`7DKss|*?~(qyObO*1zrAPSfreSc81paZxvN&q^JQ9`jJuod*I%DsHb`g6CO)KW
z8amF^^>2v#!+HNd<aqwL!ap|^d7}7gVmEtEu&Qqw>vz0bL(Dsz;9eafbIrj>@{}vT
z;Mm}kTpMKj-Kn=i^v(fm{dAY#>wn(X|Ls#MHDDnpb7DEfkNNAMw|wNRXLWdk>qEN*
zM>$~wJ$*AX<DEm_6J<W$GQVgl{B3gD#3DSKokKRPKodR;2GrdT^h7%C+;pU${Q}fL
z5rFcV$IBQKLTc-KNf#F?^W=*cE8QXUF_5Pb@_E{M!U;}jt=wwrkN2j_&KI(6$(eU2
zWR>W3K0>;S6fkqq6}J`FtiGaD>_X$n=<fJk`qNOH)MdFvorOYL<I6nfe)Yl^$j~!}
z)eD1pa*7MRk2@mHsPNo5-NBI?{|nG~QZ2F%_>1(1O4UH}la@RL;^^%h+FE4Q!qV8r
z6rXE3MD_C=En<^KA;9CJtRV8m#j0hDZmv940ZVF0->^G<t#wfHA3pmJ1pzdS%YISL
z4o#8Q1oTFYvv{kz@`3xwqMKVLIwzB=;oUoi^L!*m(A3Vy^DJ*o%w$dY5Ze>1wSRQ}
zUo;=G6dn5*r!FtHwrVe1<Sg>ohR<ZsDu=Xrd%D1QvvmXn_`5xYr8s<~dZWtgOXN(a
z`(IF~^vfWf;(oJSKHxj+cgo!uI2gb4$FYm#KUKX*W!qf=O+7uR;<8<_F!fQj^9(b4
z0F)!Hni>#G>}`|vZh&)Ja4u_&DR+@p1?DA?@Uh4xB+<M^%>$(;o9;UqWa(SY&$OLD
z3ybCgtMRBz&UMQp=`N<?_#9X+eM4L=cT`leS~86$`TUy$QQAmn5%=9h@&>dDf7FnJ
zC%g?b15fVx9Gz9yk1VmaHOp<zW!y?$p3K7DL0vif`)w?3XjOcawA^aUpUjKN#RjN`
z2bu`)Z#<m*!(;!cyAQbR4%NeEK@ltZ_zxGQOnDUq<xPGh`vr)}_^CU@a^79;nTl~k
zlUPR+;7P)FPSK948SxDM^+)gjWwSh5g=9J$MY^gbBb`|y7`7ejv0PmyZKb|Y9mcXO
zx@OPa#YIWF*aIcjAAN3~tEQFl&@-PL3yXxN%l`h%R{+4_w_yC;AxG?ye|Q_m0P(=U
z1p|su7wXhF!UBd`S3Lo~%;c8u<nZu~2Ax`T4rX{tS7;Q!<5wce)Cr0kaqn+pG^SNC
z-0n)^l|d~opI<b1v4{6M+h}-q&Qw7L59%g8buuZ^p(D>M>L69i7D)a3t_m-e9VCrA
zUmRTxq)YDdUHV;EzFMt)*o_+XJvF??D}}5XawCjP#XrVL7~xQLwvxhY^#xzQfp>K6
z%vf6E_ny@EDB~>f6%s+nn%>_ZBIa33{;<hJ0Qige{%>7}_WVw;b6>B`uTr4(7hrDG
zAMNOGzy9O=Ux7XU@hAV(B))V1n(W$`dNio~d`n@2s*(JWG=v2siZRqla9nV5C#K{i
zn}h1%>Vm;7s_OX{VD?@uY7YJH9FIjl9!R#<_#N3kl`-gyO_{t#SFsD#p^?FI!<nVc
zRQB~n!(8+nGFEaT-opLKr+6rPZd?;=uS=_7S1oFfy5jehj-_iq)?y!#y|Y_?hKg?P
zydTR?QIo(qk9Mz)VGD8I)B5MINuCM;o#e8DGQD|FNyTOg2$Wd|N#qC9OJLF@2pOz>
zF{aB(r5?h>ul#E%fK9hc)={>%tG{U>h``o5Y}f1D9h%!F`0SX{3q4(RfWB(C50FmZ
zkY+ak_vpgWbaPv?zCZjOzVD%b_flqY8%*gwM#%ZzHuRw<Sqe!*#<_x9;z8CdUhF`f
z5;^&*umpaq6QNJmtsF}oe7Fp`*pE|rFE;4@^x*$I-s#UP{8RhQ$9~QV@wPhOclM^k
zxgw+;yeVo!j<Rb^oavjLNoEQ3dp}&BG`x&;@H=-1K!E@?(f-M%(m!qa&kv*3s>nwb
zebnkNZmrnSSjyXXTWB6w4Ci^}_!+yD?>d4&xDpA(2VMf@`wtyo)GV73;U6>W3UWJ&
z6KFi_l74}rK|&`QFXqTSG~v5L0N#lZ2$j)od<<+rH~qGUAO<P8NUV^z<Vk4iYQyGE
z6&DD6+%x31m&Lkx<9jx&Xq>In^F^MFYzi8}Se_W>enF*lR!0cv4h}n@OK=&e?05Y!
zASyJYN^C6UyLf}JOu;K(U?L7nOCb9e>Ew-aoLq3LuE`@^)Sh4z{5A7y)bJi!MNZyK
z_f#EZ;}8(tmv5~7(Xvtskty-YaWK;n@767>1VdO{Kq)Rjhe?d3oMwK<ou2#y7pJ5r
zKQ>rizWJ-l;%~k5A6p7qL}@xdKY?C7fMJ$5JJGdW`jVPD?O$T#p^c1@HL8h-t|>%-
zQfTb`R4mAXSc}<m4HP?3;q$+9;FVSJu3D&gm!BOuN{vm-O)r)tH^|3SXrxKO@L*BQ
z8Qn9MS-Ek!;SL%=ZJ?g(lmlhGT3c%g!yA<~u@|T{2wJi~0AK-y3U?0u0w~m+c=|zi
z?+czJxiR;Cdg?*i7h&n*5nZpfZ#BcWrC!8S@?=y-br+%l+I&+(K0`_onV+EPkrMf;
zDE=xScT$&}@V6%I{bs>5gxOx$xqeW`1j7uYX$)+C%xr5Y#qIt#q9hv)H+|u`TKRq-
ze0gSXO-e0PesG*2TLPwWcT9tY1Zcugj=-F*aOfBdI$4AWOfDTfm3!~CW@Gow0S|KG
z`d~k0Zozl6Zl90O<Bbc$j@o1?D;7(FB(JoQMX|FA?tx^^+q}O>9sbUt|B=j4&0N$&
z>d37JCwPw4uWm{8db%XphGMMiza{!w)Y3Bsy%++C^7Uvib<o#EnWC1mV@?MHU#v0z
zuH*l*alg(bR^JBbmttGA9&Ub4xwFhSw1DlRn#~#N89gc!GYru|)pg5^=4h#7Q-qD-
zy@I@pidUjSg3Pd^P-$X*R0!^O*o%TAo5n;s{qc6zGozU9gm@q1xbJQ{e}=j0u@ZS3
z(K}WS42pM+k}ZSnbTC??2&L1;7n3sutZc@QId;vI*y@nYU$IaR!96lh(c^Dt{O$N3
z%SeIhVEX8us`BeIq$nnL*NfuAoXjgZ&0wE~EYot?>f!Ob!A8q?uFM)_ubSZ9#BcdR
z0Gja(|0z(;!*4N*{kUC*SUbMU6I!^IP4(<7)yZ1Gb{8~3qv*wNX);iw%Y-YfB~_;x
zugrTv&PYOp_|Ej>54ZhiRs*<C;m?5ZEBl*(y-w!+YyHEu^qQ>8m8016LnGgT%?^V#
zlQr*>Mc*U8w$+z#PcNDM^S1_re{}s{G*Kw%ANoGEXJZ3f5x4PyIZ1QcW62-{jTXO8
zdRCM<q(y3|A4;REmo-PCMyu125bY?+4t%q@Xq`%{_AtUYt?Rd#u|uw|3E#lM7+bEa
z-`W2Ig)Zb7*F{00tHXB&egRZhyeP>{TRJ+C%G|N1&w$8Wlj*#thz2odS!V@M9bm9y
zI<q9$r;|XqSXeHu0kzI}{$UXN<u3p!KU@}w68#&WTAd=R+&;{_?wrr{y6w5fvqRSG
z$HpP63(j&|{H|Ab4T#1Ee*qS+G{_Nv-=ggRoGqR_4}?q^(@iUhsbP)6EGO8{X};X7
zWIjH@vbYj|9y96TdTJ3cT-AWX$=Y!3ML!io#hJWrxofX1Oyw_t_uu*OKN1R1N#aZi
zf)Rp^M+Zz4=gAiXFCcyiRfkSz{cVlM#&7F?7uSZ63VQt<M-|udbpBOb``^wJ{|~+y
zR>WTSC|;2B?#Y)-jMEizal<T)yc9jm1N9quL3oiq0kMp!KdaTW%xS65%_gM+Nrca8
zRp1B`z+dS>6pJCqGgQ;^&>>}$#x7i!iFAIbp(;AtPnLN_a)Eua*7pOjg!AhJK9krC
z_B|vub;k=L)7YaMXTJkL1L}DMJa55{+<Wf(j<x+RdDg=#_#&0Qt>x69jGRn+@iDP&
ze;RDGin(yuuAGtOe7`I6O?cf;>G-$S7Ry=w8L$8DuA@rxLytFq0gMmew?@mWcOBLM
zR`)-C<8RD=Ao6_JeuA+X818N8o8U*dNZf^zm(&%$z2(2gZ*4R1t-7{E7%Z=HAxx>q
zr+h!0-)Ega6~Ttzn0Sl0^@udQ?g!oE3i8<;B-XeHQGKbwuFe!bR+&qF#BxUVBgX1v
z(j~`8+uv&I{W3FmrA|{yqGYI+3La-<R@!_&Tb4hJ5RKP0Hph>+L!>dQhR=m;u}KM0
zDel{9>F^Y<6d^Pk5T{<O<@4kUqdeKu`0eVeAigK@Nt8p`g2?cIjpDc7uiUne`guI;
zhHE&TB-WdB5eH+!<BVu%j3p48_W|D+@a7q1clX8?fJ#ct9on??D!9cOD|xq=x)yx*
zoW8!HbM%!Xd+p4!vXH@HZr9o?JOCx3JT5~#{2+(ZR7~8bcRrJ&oC}P$VJ1>=A$hYL
zbtVuLw$>8yx>+W$*yK5EOw{D^7vN;4dqv*N)tioNe-G_iL1AFV%ucSv1$-TPwnf0h
zG5R{y3f$lMY()mnTg^v596siJi<M|H0UN|Ps&^$dP~5W#hOiuFu@&uM3e_7eu5y~M
zi$nR_8PIN1e4w8YK}yi=r|Twod^nQDN-8F3EF4*I0N2&sEP7DQ4oR)Z@~Rt(mJ7fl
z64`2zE%^D<U`N#ENK!CdO-7SR&HR)dEFjKy?KGp&WWwI(u464LnC^RT>_f70v9l^=
z8X}jrD?)K!;AI^r`J*2K8Zu|?<)Z1OWeIp;W%ID|zKE4u0t6D*MjPYd>+(quXw27K
zTa7ZUPX?AQBx?;PyeCV(@bzc%EtM-evK6&Gaq=1sY1v*UxvCwib|hp$y>6mw>tpwE
zk@!w;$?1oiazk{aW@M8Zbyn}tmsx4%o*zUZyV<QlP`w8v)QUxXPEikePws89MhlvV
z12H<fJ7I6wG6O_s@Wh?a-ZdKt=ij9*pe^k_i!TwA<&C~%-nWVRz$A*Jpkmm3&SHPR
zgPuGoHbf}Uxm2+t8_!+2$F1R=>gz<eab$Ca8m%>Glp=p{bFy@c(R|zcER8=M#glTq
zedkYt1t^W#I8BH94M=R6eetZZTcFnwIw!+6D!{-fb2Xc(W<b!>!%9=7WoLlw@ig{p
zGcM!hygCfei&*5Myz2bPs8AU{3Ty(>o{1J@D&M$^E^}+D#qAuz#ZyD}K)L_}8&rm`
z7ix5QijZ!wX2eb^pU!15h^iB&hjyEuD{iug?)fW@A#RdxB%N%x7KqVpk7WA1EOr+A
zBJ~X@h(9ei^3SqQswe0(XjNFINoW2@=iX_twKE=Mavn2~g^Qd4Qtm`<TI?GW)Ae#O
z23q;Md=sI^=UO^Y%Vitiyj#BD(Ql`cGz(P@H9d_9mt%}E(vJB~*&?DFurqG<`(1~K
zOB*kl#??v-`()2pjoilZH<J(eowK;CXqKk)7P(lYEm$7+#h-{BXPIuD4cxxw+WGVG
zV_pqGj%Tz*?xeONlbN9edWQYSd^Px)nWgje^7Wsp`@5t^xa5gvNC}wC4;EmVp3F)1
z>EBEYo5vgZ?j7>OD>+lKoo2gsg`?^QRU2k&kt#<dPt;r;XYWLgblQRX8XXVOY@(8&
zzeyx@G{4vdgx}%d4u_t}hHyV1I8IH_Cp>wH#wax4LsxU5Fd@vg&y@z3o8cC6$Y8}H
zlk&nZ>+DtIwnp<gg?iIEvQpw0o?VWSRMr&x+_&@hC%xEMjcYTT0UylO)Zk;1T71%^
zWtbu2WnczfoI2IUk8b9$IgjNLvA!NyChJh8ET;*Q`}s{b_YvZImo|<_vO#eAkmjId
zbqMTP;TAQK_($$Lavo{&w|DP{ZHV%Yvs7?wWBcvvWDXTKDSUi>%X+`&X1S+Ay0!%$
zaEp#^*PYC^gD~UD#R6xKNsM1mzEraYj%aX(FA8<%l28|dMO#^km%E|UQq-WOT|7s;
z7g>E>xU{l_@c;B<mvn=%Mfdq8LmPre7w1|PaAh%bei<p(!tSPJEloH4ZgjBb#ptO9
z<a$ZW>&QBQP<Ds5TAj6*zE(DMIUzW8M7lOGA~7@BVAF&GzA`-6d?~|vvQM@6<^aVw
zfz|@#)%o5i){iLu@M<L|xZi2VpBS~f0({V|aTX0DKRiO^zXtvB$Gh%C`Rs;-^XiU-
zC0oAfpZBSl%ItbHd$gF9N9}-DFzXbWq$EmS+C_vV*e?^3ELoxWu)lr&Ru`sXx|56c
zq;9&Mr__D%{l0$cs!9T0m3Vo<dXa5?oucxr#c?Gqlj6BKFc0nF%zi^`Wy79hS}Aqj
z@CVbC^e)HyaU$T)YmC<t``8z5sFc9QJFMAl+s`vTYK2_6Vml-cBIQdLlU-e}1(8is
z<?u2>6pH0V_BWdFAk%M7LAs4fRK+b1$D7iJe0qmu4Zo7^Z8#-*Tkcu;!uDD>`Y=<y
z+GuJRS@YgpU_nb}gAL6qI~fXngo%xy+#BJa?Y%4OBy(GTPY0P3Xu4u|YQFduG$s_3
zEhL8giD_|Wc&`>Oy_es3_gtaVH<*X^JIIQ+h!A!6u{M853O+||+Q#bo!<x`!-9j^C
zNmsbJ&z&S(I$#Y-+(I=vy*6QzVRCA9HO#tGbk-M`-qr~TGno5rKsPJ}CF+C`r&6^+
zRcR-Jhl=TMjbpTL!Ua_k@;7t3lX-5@-6M^zl|hwi444|yHsvE2bU)AMBV~FCva`$B
z!(-0E1MH2{T~sJybP48&xB+@UG3kHgq87%z7Yv8D>0^tnt}6_S21*NLs;tkSa;D7G
zfn(B5#ZuTv<mRkS?5CJ#t=G6D^RaTtpzueC+(#@AM1E>yh>&;4aLe_moX}1-7}zFB
z^S3_x)^De=kIAUc<y?{nG~mn%?tVmCF@T<uV=GG;9#hgt%k2<FFpbL5&!HnV$zF68
z+I~fC;rp!Ki2=aAm?wNg9Wq?t9&1OM6Jw-IzCPYuiUri!D;N(-BC7ZnVvsB^y!Q0j
z4&14CoJSa6tVCfsk$|sX=Ennpv)m86cXB9fXn~t-We{2UH27x0j)P6{1Rwp_UaH-^
zuV;h8INpJd=48NAC7pJsv@Sdf`PjshfP#=0IYJj>dy(Adlz0u*IFN}iSRSFN!BhAA
z+(0V+u_@k$igrX}|A(}Gw+}htCorxfh^<7f-sQQTY%+mdlIm->C_=Y49v(MA7Guur
zV+v08LHroF!)8OTx=xu!<}5VRWias2Uep!@ov5VEvnd?XI|OWW)&9-u@ZUMbY@F%r
zv$$DqX*}$lsrkNlDY3meWl-T`ab|HIdbsDdOSeFd+JJZP**x=0yiWak=lA_aBpfLO
z(0=zDuo0_kLB5RQG5LwZ6XFinyu91glNtOCwl$k=u}RL0Hg+-}n%lBtorLIHUtGOZ
zkW#Zc8$_3;;sDww5ZMvVp;?O!Tfe$w55+J?rDkYdu}y(wZ;F77E7JiT3cWVApP@^u
zdNwt1#uJci4GCxf(XOs;NkW=mI@7K!^b2q_$PzH7PaS(8UcPnv^~E>T$kBABJ0r%g
z1`n|-?oe1Sw9XS$9-G|UdwbSTtTW7R7AdkUB7X4;aELu^SLds-bw_?Tb3%^2jLh->
zE7AA2&il9C%TEy_vQLRC%v-e$d|2ufYQ8O4JNhvkA3?tWkH^f4R=ty)+#H|XDGTLC
zcvHVnGZX5Vdfq?A7I$;lNmk`B)A6CL#n~@`S*sC&Eppt4U3M+wHV(92D4;ub%-S6>
zJpkL_(Y;qYIj>*$EWM4GWn_qtNEiHCd+ixM*l2k2mZy04@B!R*1BrDB*>3mi5Rtjt
zxfMa~6kHfs-5wyem;Gd}==Ocd=Y%JtFYk`rYKBU7P`sjwQD?E}zBF9zQ+ajtuSoM5
z=cMuS@9!w)T4FI&lnS8eBMLVj_Lwez<FzUX8#1S75-raN-{|dsqGB>fgr#xMfRtoC
zaX>@QdO#<($b(MRmN^GOMtg60YL)!Wx|#bY>LgK0!QF=BPcb^R;wpH8UP2C0cc`j?
z@o_R*Z+l1EiHK#v%|YTo<pWLLtFThws5b3;J{Sc5@p*o6oblRO<`nmJl1VU0xiJAc
zJHxA-4k!r`7WO@L60B~X@Y@={HXG*X9%9kuT8LWrs9*cSX8u-Ea{W@Zih{Lz<dckW
zxx)Aj+TsljM~Jj?Dg$E{iX3nM9wnd3zyRLQ(~8zZ*=<t0{9J3-pHEq_>2azv`Rl+H
z^ehcn7GvZ>-Vl-jX2JbUN)HNIGarRO(?Ud0ZtK)8k}}n9&duEwL9B<U5s_*9Dj*P)
zF{XCF#`1l8ZK-t9{4mju?!}d=ylIl0Ebi5itnRhNJvH(Z^q-3;nG}dWs)p=k=WWM5
z!yEWFe)M>k0zJM_ZBR{#OCWtX@PW48dvxZtxb$o_uFVa}%tn^kp3psN75ZGiVyD>o
zD8My8c_ycYmerLW^JL3HCj_jz5hPeSN(>Dxe=jXkGf2gV*}Ao_&n}Q#Jk%)HEzSoL
ztxU(>Iiz>;ufe{0aB1@kFtYg0?`Qbx-728r*6UsIZUh>+`d-GF&qw_|Z6EW`V&!7-
zV(S&S3)YMb2m0-V8RZORRBJ|5an><UOh_<_qAd7Ol@nzAHG$0!nzW|~&vSwbj@0?r
zdXX&x0xin6+H<7ZDq#{HBl!EKngyvZ-h6GKh$((%oXjd}+QfF-^>S|oqwRz0_V)v>
zG4x3seB$X|(;M7+$udw&RC<VKP?T<<37P#SV~9c~j;ICi*jr;b!W69@wZfxc?;NNx
z+V{3u$=+l(YH?Kp9=nJgT&P2_&6KLT%g^1L)UQPK>Rpfbv!r+x!{gcE>nwSvyC#`@
z*dlp?qL-7K%XO+NS+@iHfoe7jJafKN9Ww<*aoL}d8DKVroGU!tk(PJ4s+BM@kj7Ga
zyOO=P2~;@!tL7zU(|iuDx9Xu>sOut$Oqg@uw#mMi*z(k|eXfb%q506KIuU(kbL01Z
z)p5o4=ckFeG!&BQY+1O$^lSm&snHqSFV{QBT-<zyvfx!`DhDwu_lnjwDY>8)>fB&6
zn%U3RS3TP`DLw9+CD~bp3e@ybHDy1olX51K5yUrKp!<$hCNXxb`k)@vZdJbC_kJM#
zQ}!YE9m#DP`Y;n7&%Pn3Fk6-d4Y%JXOq8TSoJh#(O?w0SFTmsMUx0(37$|K4@2asI
zk7|z6=13*z83fD~pgdb7nlz-YB%_6@eh(^yMzOS}KBFdUjJ8dm<_{U~dCzV#QONR^
zEYRl%Dc7`i&RqUAy8axZI=}4}>u>?D2yZSR2WdWnZU6adBa_#v`7YJG2IQFv7QJR#
zN*}Kla)^WO4PvkN!gY`I@aBZ^5lkW!2IqH!)nmF=7YCm|xron>!gFwrFVr5*STS#P
zCwd;LXr9h!QAzd<OO%@~;5<wa^qPpd<Ke!DI}+;9%IClBl!9^2k-D$cYEXVB-O~&&
z4|+DMZB~_!r~b2LY<7J>PkL<<4|avF@ltZ@y=S}C`${FpypH!OAwSv`dUbOb<M6Z_
zHGZ@RpVlaCk&REU+aBn*0MR%oM{veX36m3NC8$D~WP(N$O<@T572rcy?gL-4@0%vH
z>ur9Y9)avno)np6-9FRvOZ6k$<guQ+WT8#5ZS1lSmpf1lp=cL+ioz`cnA;vc-^C`(
zLSJyZj4qma1+L`r?e;Gjn4matQ)ibiGTEt9QOCmd@o0(Kn5PLtB)XOnQ1{CudVjRC
zn1k~6KN$F-@VtZ5!_8War$oeI?8}UfYxMbQLk;-Tjwzj??^czp%65mi2K!+g-3%i)
zdS&tjt(_`rZ{uvd*dAZ_el@p6Fgcz;qR}#>e`=kS%>#xco8)nB)1`xr3#5aznH1)d
z($nG$_6q5<guFMlha{D0Wi8Msq*3C}Pu51rcrV?;whz>E6t}AE3Yd7@U0sl-V2Y$b
z4VBu-BPYsXzT=$1cKS?+w{@v50or)$UEE7DZTg@Ml%`}kITsNd8G=E)u}=#-&FJ;K
zMC|j+(P{B+5u%oU)@F{R&GT~>hg@0zvVRCe**Beox|4lL7(P0~*N(Y`T>H1bi3-?M
zi@0598ALZ3u?(sQHFar^7iXMaAHJw~1-t61H3*p$^z?-D4SGex3%FRNty*p4R~kO~
zB$u;B!?ktaoL+gH0Uv0FmoDOZS>CU69vzOJNDs5_kP3dQ#-4)dO_<K+h*$Ru@Y?Tb
z=attd>^mF*RA%=(zFjHTqxuQ$vCI<AO>S}n;8UMxS!gV&fQ-@}4WoE9v#HovDVJ2u
z=t0mn_gj-p;bK%U48Kl|^ss<ffm=k>ceH(vOQ)I8!LxgfsvElEV)M8$E;#AM2b!J6
zA0+YFb_a1jAWy_*RnX2ZGN&nm6h3!i+dI}JI_QfuJka9xc%)o6<{Qe=Ino*Hm<a*i
z5WJM((fQsaR*We9@cg%Sm&2Yv{>uHwF6dG64-N!wB&K#_v}Z{=jozuie>_LDRuIuC
z*PX%2D$pW?+m<C9trL2^q*_%Y6HI7^I;g-LdplVs_twtbLTd<b=D-X(t>O;8X|t{R
zA?s4h!pHY;XwEZ`W$s!G`+;ev!pZLQJXk$4%^%Q*J=onfXlbRrabZ|=_q0g7dRQZ)
zuRw=7u@1))X)pm1!*7g+EqZitTKx1n*~)}Bj8w1lUouY$1m=In%1R0z$a6pf%wn{U
z>$-L2tU&sFGHEU$F^xKV8|7L|s-*+bn`RZ3gW%L&#!!Xa4nC7n+9ABLbfGl<u#!Yi
zB-W3Y-e~Xq2FZKu?(`QP`~n1_X!H@654ShRcs38|i^hQ+s#B^yq%!pj_$``-^iK#r
z&FW9I$*K}3Vkfz7!i%zeXmLO^EUK5L48BDy>?RvsN!)i|X1y-7mKwI;#Xb9^Mzu!M
z{w>PeP_<7$T=wDombw#+*B~s?=Gk{4W0;QH(@nQuxQI`1F01C&S&6sH1etYE-5|>;
zvBliX<%+}~ZF)OB5nr?XWknSJKAn^Ka8o+2T#7LrU2%isGPQQM&3rUom_*{)eMfhr
zv89L;rEP}vHXA%(^>o#OpnVO9xnUR{QKUZ#GUa2D5I#1ndDrTGJ1zU9xA30&C)lpV
z8m%2n*${<(8+!e>i`0Mp{Pit4nDVgw7ofd~i`n;1Sm)ypQn?ON>trV8rH`5!Hd0rY
zxQsR^dSvl$z%-Swn;)7vW+FQ9y6n@1I+`)Q{v$T8A{UE)v6KrtDn@5cC@cB+Sz48t
zbrK{%hkHXW(1WV^>3H4do}YdbzVC!qGKXwj%<!YD3vx7wip+E;HcavN1`JL`sfUQ-
z`MH9D@&zN`cCF{Q%kIH1fVzAqZyQz;O7}1a8I!iT7xY4PoKh*_wwidGkYVF?Y5T7X
z{(lai|3smq;Rapw$MX1>DktuUO&xbDpU9GZ2(eh?o5(`kM<b=O%3J1EFQ$|dl_p)o
zns$l^V(r&58x&o|#+=pY9X~vjJWhs-ymv3&PDe2X%RHNCn6g0l#M%tY9qd@?ZVoM%
zkKa?S5@tA2)h25V*B5G#V~&tCl0bJ9O{8HTpnXYyzXx?g{$CDvpKLY%c2q|CBY3bW
zv_iKOF<h0RRRB>s%>T~xmUW%hR7WJi_Jl7ogl!yGHIi3cRhiA+B(AStCa%T~&zI4|
zsgEqBZFfZnY`u4DXT8s4BQ?^o2%2Jsq@$x(yi+&MD=_D}eX0MBtN?!s_b|OAMTWPh
zjKG2;u-G|KjQO9iuKu5nqBbt(>NV)v>O77`JKuC`aU$jbGMh^}LAf89rZl402`TG6
z9+jwNCK8LHt>Jb(vyR)@Uf$XBCusUVE&O{w?{ld)_;I~IS07(JFCD~fDZeQHk@Pf|
zA^6baHhi|CY<PH3iC7W{-ao9U#Tk_DbV)MR=>^;WoFBr~(|w8d7jEN~g>c%KUvtzp
z5_S)8ql+lajVF6HL7K=>1G5?unjvxb-3=x(asZkbjDGl}cfM}Plk4*G;rd7H_|oJ_
zka^e*2W!%CpkWhRfUiFQP#(*o3vO;1kv-KDkeo}?+y)PLov<JvTyeFcb1EiPVD4UI
zz|q^t^nps94mT2ln{^u;wx#Cr!@>_F2ml!MF9X0SXZ?7p@~jc!ET(IU!N1azzvr1h
z&ys!U(U)vPfZ><`nuRsb4r|R+4T#Y>dT#8Dlst<B&x|Q@6vQ>j=Fz<S!-xOImLoQH
zy{kBt9O}_(gqR{di<@Rtz03z;_@Oqd0zjQYr7s5a2Tq6D5?nsin{9$mbVAYPTCi9g
zOR{J`<npq7z&;5zcG7ULn&DAku+jACt9)KRqC5)`mC&Ljruh^ABs|e3l~6C$BvNiN
zpEUxdbL-r2;2CeAX4o@~w}15dzs&n29t57VJOcx+k{y~=^Kap{IiAN|Kds{*RXHaw
zyJ3+$?21h&Oz`0LLDjFDAOOLBv2*V%56;&uVs)$pT1oP+Srvf#ehWJw6ZQelgC2=l
zP7k0S#i2QsNyPb?pii-oN;Q;)DEFC6(5x_O{$p|euWSEt_DEz=<SqhXfGVazrd~Ev
zKagPUs>(2}@L-DTicQv2IMLxq6wZB8RvYtU2Fm7V(YoR!nfd5^yd=+I2L*8{wQKpK
z!GF>G4X>a$unI-ND-Jv?@Te@82T<?|>FJDyG*9rTKnxh7Lt805(oIG~`(!O<%<>ht
zZmhbJ%Kswnt;6D2)_viL5E2NG1oz;<gFBPp?(XgqJUAgpaCZwf$RNQTf-|_yAcMON
z1P}hr+UMTytiAU+=h<uD`#iV*s+#VpdAqB-r|PYr-25<uM4Iyb0j~gz6@h&H6Yh~t
zBS_?R(ezWB_3Lf=>85@i&Bm}kkjGG5ITJOMh0`2_w_loQv9@gFVKE{zW{{E{nk3d>
z`R22$nSOatj<MNDd}au}h)e(gP@zaGl;*W2q?yZFi&iHyM^!Jtk>*)#p7pclnK`lg
z;!I540d`f6ctV6B-t%PtQI5eP<$Bx)K&!OK{ZBk@WK55%Y%)XZa(y}wqg$WX3bU9!
zR@$sbVn>uyy}X;;NoC!%9Z$e*ft=D*;}t|`+Dz0NmC+>&-eHNdk>1sK|JY4`iHG+A
zi^=eApOq%({wFCiL4UGe6`+E(+VB8aib0MUZwwC&kLi>G=Fb4naF_K7^P9ZlhfL2f
zhs$ijIiuRaBBmDg0?GxZ8NTz%yf!^VHk1)xZ>RImg@oo<u*E4!&E>>Yl{Q~~^=$gN
zpDy2Ttdrsns}uukIk-zQQBi}I8@_;hS~61g6FCJmCr?HvV@z@rY@jHajl%E6zPY3p
z4t%U6^HaKF9K-;c8Z<FL9I9vB%=1`s?Of~&h9@;8EKHbAUq0_Gn84QGcOxiDFc(=T
zVDYzb+I!p`plKO<MDG*4c34!wqHk~21k2;Dy<K!jNe^kQHBj@V(~mKyW66MiPh%v-
z3L=uCHXGb^Rd?C*`7ZuGd`?ctQ1gmxBbsh=maJ{3EiPU7dqZ6@S==rb1-(nF%u*g3
zP^WBqw`#YXjn%{rO<lk23wj{C)tm>WnshT->4)BIexJ5ux<ri`*lpYAYBB`)N2mSQ
z(hGU%b8J(XmJFXWB(ZsXG})V!`IYsvaBl4$m;NA&_1T2XJU8X8FP28gyW&4Kdex5G
zJ?F}24zuk#in^|bnr>oIzEM?&L7PH0@-g<s8fMDfTkMzWi^E-<5m!5z=9A2XcM@X7
z?*fM!K7teV2smb9a+osXHayf15J^Rwi1`auF=0W=4>oPvkKqP(Ba{bGG}%esX9ngp
zxjJ1<wt(ARpxxb&o=-#&lwE<iBK)I9LPt_R3i}U*VRg?3K(jlLL+xO;WFDCx8ry9i
zf6XrGn!pZZD#)s4?@!fZ*(I^uN<}{YIQQn^bAzANxIaMQ!e5%t+pryNz;gP_ItPQ9
z7;9mU!OG&42X4BYww_kf?*63IBn26LnBROr)z%epHSzC6NI)!)OU{cj@0k{a<(aY>
z-D=&}>K%3R_ggtfha$@-V!lsS+1lu~L>)Fp72mu~DS87<TG#Jxy3nXz-e*>q3Qv1!
zJSwsY0Eiq>E8A}idU(__)|6#vN3-lnH8Ak7Jzt61;B?!~>;JF^sy5duDbGxyrHJ^7
zoNDpj1#nH>GWg%XbWf2}B_w?E0dU)VQjF2@0bq<ASB|G^CY^H?d7ZKJRqRDc$_&%n
z^MGZB@-Z;INpAIZFn9>97U6Zc2|whyY~qlUbapWQg_qa%qu8zT_n)}5ZNb|i8V%?M
z{??!G*}zTtC8(rCWi_F32YIi&uC@A&q7}mBRkhVmchm+=2^t4JV3Cn&G#(8yS<I$x
z=&E;35f^US{J6B_9;8N)uktrF{NNAZG~s|yu6;y(r=d@9)rp?ihNPJg@_a{s`v~Ck
z^@?qioFIKKrIvpX;#dx?M`_X#*S86NHsbl|#P@j-hirq#H#}L+aJe{yf^*vBz@}Xa
zuPiD=5rJ8hCs_H5mBy-^c|wS0{8f5_fvXS=5-TrPpY^3~Xo(eic|PC!eeZ+qUL<Ms
z%9T}dOQz;DyM%>=kmL?-SJ<tHB}FQ4LhV^-tgvU98>)a{jntlP<mMz=3x$0p5+CV@
zbS+|5NcK$CWFZl-g7g%ygY1vniTXup*rw#>An)Id|L>;&fWm-GW@ia&tc<|%hotu%
zBKDSw<0`A_g~l_B%3o9=plHM4G=6;sdo_?$i8z1Y>%CpP$ou@aYB&znh;f@u{AVvu
z*SCl0s69!c-U8;<SKjVzi4#~cgyTe&o<HknGB~7KsH80m4es#Mq^&f5ROtK!@+-oD
z&NJkct6XvkA^Ul;>IFxePMvaP*fqEY{)bsH!%zWk2KV-X!qaPq+NW}Hi`2t0L-cfZ
zkyTsx=*^OZSD=?0W5K{QlX~ZDTy*7wN=Ch^-i41^Cs-Cjr_QHYi}E&JcJ-lQWm?;1
z6T13X>BVCb{Yw=@Tbyw=b)%Ro;5Ko%zoBvL?_f5xR|%>9lzyN0eDA<~Tj1_?#bP|t
zDk~8%uIqejhibqKZUYN3pIex2!MVTl{3Zn>V#>xb^|J&fww>IqxbL?l@#n-(0D#?t
z_fd8~u#@@+d8MNQ$rN6%m1Cwm#M?4J+%a+N+~W1$@KQ2vQ$q%vS7!T5Tw-RQXaf7!
zztwHW2p^rg4eB_%m}=MIl{9@K5$E@}8kzx>PhysNJ-(`CYE1T515d8tyTde1ZYc>{
zb@hznbYDFHUW*W_hD-<i|NZ!v3XJ$#=UcFbD~mmYcgc<_ELl^TS>G?!rKhAkiWjCE
z55VTtcs+@&bDK#<Cj^nJ%0p()A-HdVEwoC%N7({uz5@UxmSjjIB@*6z^ZSurJN56$
z^Y!ii6yD>Dq1x$+lqe2-e^UX=ymTX|v^aB%#mx^wvbR&mu_sqlR&bMdjK>)?Inil%
zjSwNwz#ov<=g?_xVb!wf^V5nZ#}d6MawN1Kqg=MpX1sn@>~bUX^5?Xy2H~ifTtn3C
zi`fo4fitOu<Y<3ingpIBKs+)W03%+>Oooo_(EY+{O4Od#<a9JTEQ{{tE`diuONKGr
zLp}Km!`HbfSu!E7RiiwWLHY2W@VNfQE3MhFhDVkkkS+Xq`zv57)RdM<8vzDnN?+h{
z6P8o%^4{rLMx8`g?+adZc?sy7Lo|q9W|XsPeQaoEh+mE7yLWnVYU*srY#-mFsH+D+
zx+^M!vL?(2UUYsRJ5#Z}th1oXRp%@W)>DBnjPgY$!c6kte9Sc?=lo_TuGZ<4u=?iu
ze!(kX6ae6a-*&huDh!{xr_SRVq}l6irQQvJid}pr?M?Muw=_M56b!Rxc>K6?hrK0U
z2L~zBl7F~}3t?YoVM};d^-HOV1!X;lF`d|DP+vfE5RBPB9Zk<ZZdZHipg%*@2!Ur0
zh&6^cS{&&qzW{j#T0ka&v?2@<t%9R!7n_u3&wgYQL+(|YZCh`*i9>D!`+jYe6nb**
zZo?#sZTatevgW*l^&z!5Wj{VkKd(PBwk})I_2G%;pj>^EW#VnJXt_r6MS$x8kdvJs
zcI%BSILtD-$%id?$X`@qt88R-y$>p!Z$O{h9oDSp3aopz$4!WDZQB@3%5SxBK)E>k
zYOIPoGP&zT(4$k|{&wdx{D{T;n(Gcoxox@Y=9MoWA&j3O-6lplYNj#^;p@vLodivJ
zz9SJ$m%m!Sp~k^OXVO*XrpwD#J)tI7h0Lohu)T+44K%E^fVpkmPnzR&4Ej5go7f`3
zI%#u>ZREvt#iJXhFi3tOwK3DLDu23QRlazRn%MT>Pd)@3*$ZF4&U7UE(?V?B%Z-Y7
z9HZ>xeXr+V;G|oO5uzKzJ;7S#K{jH;$M7bZmyB{O>vWgNxVcV|=9C*KZJsDlKWT28
z%q2%Medc1%IR?;rOxDhcy$y$Mv^z__+ypX}sasA6r93(rQ)N*21r}@n)^e1pIQPDP
z#c~}r^JKBIHDIyn%^d5xcb?j~?~^hJUv)ybZ5cf=nWC2nbQdNmy(=ZjlgwUqmNukz
zs9iiLO&dXQm;r+7ffl<4ak#LN>ftByw!ik*x9xZQX*lrPPj{n7XZau5>!OeHqkwuE
z6dDa4zhPB_FdV*~CJ~U=x!N#2k)tBYNUN<prf;TvDi!k9;ii{psE7-dXZ)RS=!BeC
z{&$lwmbIfMDz`-s&8oEF%<j48sS<VR&GIHNks_Q;g2@5%XQR~Na7+zpE;Y6fGB0op
z_+tE)_ztPeNZ4H?2ubSX52=<Iz6{JAYL~iBdW3)pN`0sZ3RU5VQK!g&$)jv9*})v{
zE1O*f{1d`W>r&42B7FP%z)1n_{$YMvw1|32^hz4D420IVf#T1h=aS8MGQtLK2V{Ef
z=CoPt%6XyPiYBEYFIKT=vUS}QBk$`^Kf*{gZgQ{ovdFEWKAmGc<*&gd;r8HzKIqJK
za%hJobMq2y8;?>R^eEwIeYumzwW+{Q{6xz+F5ay9i|W?x7w-xzeU3dmG8<EbKJDtX
zTe{{~>PCII{adLNj$(d~vhn#QQ2n>7#vLBikLGZ0i>i}oRT-Z>vzTFGUblv{!@R}J
zh&73p^%L5((&qJHjyYbLyZMW+wp^0gMEkXjam2{XXKk%drq59GdGoNIB%d!i)ls<7
z#%#3y+PBy5_d9}t5Y%|Dh=>ge!N``Q3ftL5#xfKBH8e?y-8Nr}3a*c3ahMm>RaK@U
zBb-^xI@((*RHe7Iwr2);!v$uhrO9tzoWkK#y_dlmV;?-xI@IC$v-)xPnicdaWeUS{
zm#iXE`=5EQ?<}f577q_g^(xs|uO?bsZW7c*hx6~$C?DzzcT=lB&tC%u@H2X9-^O@c
zCXgcC5fA*dt=~6KO2+2mWR$h#xrDn~*2+oP1rso00MUPxb9%_lF-8(5u5*0fTpvr0
z^#f~s3}{KI;<9LkqTYUPrPyf{I&Zc2YTwUgm0Ou{vYymSDr4^PHJHEfZShHSVi>hG
z^1qrsJ7FAX(SJUd(kK`nO)VEG`g4$Ub$k*Sof9ZTJbKnrx#TO{MvCb?HAAh-uRxJK
zUQ5H_Fb7wT+fac}?RzaGyumbjg*3u;IqShwq&M$O!0_^Mu&X3&b=>Uenbe6W-SK9@
zWK6lJGc3L)h@IK_mf>%9^cA7C(BI+hP~0icN@fYu;w1<@B#wJ5&TRwkRn=rscyiVO
zcocEznq3(@dE;g3K}S}f5(d4z5^9aCl?%F6X>JQv(T3jf@xDuTX0K7+E~P#~=d@e&
zz*HSomFLKuWf6V)D;O)n#%EQo;TKsG?0P-p5^ROoRg=}+-`44Ryq?i!(>|BAxhWxo
z*~HmpVo`-k6rLjtX_?G}QbQ@6v(W=me*(C8Z(Hk9#I*$NwcX9h^IsL5R&*qOXI~Or
zv$xhyHFTGsYoKk6p3$Jbh^TY1C`vI<AjdrE%=_#}@c`J2%1mthDUWnc^)cV9QCfDB
z7}^6eDK9dPz1SDQT^7h4oS#gr&9*2B%T<(XA)`hdyl(;x@Y)?QQmP~K>=^8eDyv+h
z&cT3_WyUM^-6JMp7s}tZ?F7HFYf9X93*Cdj`nHd%?cnGpm2^K&%hre`rK-$M_$$dt
zV5aO2NCJ%3P!L<{<<+S30<A&uDSVoL|0muwg1Ps#;=E86sF{1H$B84nJ0DA?9br+O
zG`(^mo{m<pr6DOo1~yfO_mwG!J2G5wtj0!u+h2(NIL77^ybYHA-L#MaFA>a8Z)P>r
zGpd7QHg8_c+Zw{&TE)WJzRFqSN9lS*2e2@qtJiZhCX&*VlWOwK-I_~zyhwx;6RTTi
z`;Eg}`V0wV4x{LbFAlQnR~MxPDmL8x8j1{alt3GNqoBpxUbe+jBJExS<r9oIl%oV`
zC~6F1ol%)M?O&cDb*dHbPk>zh_vaJ9{iWN-klQ4iBPsz?ITa%xOIN?5E&DO@28Jn*
zi0FkPj)tyL8+SX}$saJ&H?iOCw#sR6Q$BSry*MivJ*NfPpAHY?r6~z4Q}VYoa97gJ
zgY_sTU;C_2RcYCpe`(>H!4)X8M=tM74mHEfEk7$butWwQ+Kr%Tmr>q*?xI0lyc5S6
zz+kG`L<-mW5aWNB-AEpFe2aF4eNTE?^Z*DR;mE4c)cQK??m4!5RZ7yavH@}H4XRrc
z>^`-qK?1qEQZc4|KG}>;Px~~eRC*jqR`0e)Cw=zTPll6M+UVSs(-fRBy}5$EFiDMg
zmyt`)Q39m59G-};99T#B{ZyX%PUJIS$|kh6wF-&1$rkeO&1%@#B=oa_r{~)`jBuFv
zk2I^4h%HEQmFb);YY4aw;?dh1o4_hf$z|TH)Hqf*8Kvgsc`Dco;!ssa0pGT3;f<5R
zH@)+`!xalZ8^(;amGb9nDx9{L!(`p{Ls~G;gIPAZbc)nNvnP}AxSQtpgA;A6TaCrZ
zB{HTqX4nGr!mgfr$mbi%yOE@%^WJ$(KGm`4ebo6W^ZkjT)D~M1I4QplauG3@S<B&(
z;3PLC-4ZG_P?j>BSV5<viq`zr&bYw`aZY=CT)uV!=}WS~YdOqeAa*v)Fa{x_;^SuK
zHP*`!gSPT5RmN+D5L2r)Az77SYGYjp9rKCOBq2u-Ilt(02GZV)cz+sUg+;+Es{&63
zk0qjbq+n&b7i!77QBL8MJXR5yLb>uJ{%&=-y|t;AzHRur=0T1?rEqYX==ygHgizmQ
z13D%GvAcHDHM0F(FxapZF>$~$5CNXSDbb*_u)9ggrFLs2@%5|G`aW%8L2XjZ#rbQH
zv*sjHr(c}ZRlE3-CC50PAxQa{{3j`HYuUYI%MB#DJZ$;`$W)uEZ%s9z6fO@lnBeQ>
z=a{FZ;ueX-4ao_f)kqfr*(LS89CS(>cRt=8k^-uO-~QAenoX}Uu=#o~Xz=PxkL-Gh
zyUoyTJO%X0wy}1XMFxhqCR}OnExgz+*O+9<^#gdj=jDmJp<fYXH?%+|F8PL5f?~z8
z9r`P)ou&PWt4H_<ic9`N17?9INFM@c<#2Y>aXm;BYE~Jk%IcHyrL>TaBqp?#?WKC9
z-mZCVYc_fIduft@>SA>d$lp%R;<G(2EP8F3-l7}hVQ`UW6*Z;?OV7j7*R2(d?iSdV
zXG))!XsU3VMnED3o`?2}oq+r-a~L!R>+SOM1jV?K#P&xse;vBI_*-_r?e)B$n9s)H
zs9j@*$?g!SZzY0;`|1MG{$2Z`#N}m0V~Qef2Ew`AHiA<ftR>Yjs$V0?==~|re2`XN
z{E7L8KhISD_44b|x>mkYm(qH_fO6@dV5pkBrdY?p?}B#;!%XsS`qdg5!ru->O<1*T
z^>d;P)W4Qdkt}IXg;?|{<h?NZ%@+7aMs{WHjK$?{*b=z(6Rq@D)O1EhouIyRj)E&`
za4`52;+=i!$u^MCFc{1hXHbXK)nIwM^I=9?QdDvk?an?=qU(7A8lhn3QW0BCY@HF3
z|AIacgA2K2sh#9}ULMem9Q$6wvzm>2GJ`#P<M{gc**7m$o5PuPWxLZNT8R9ILBIOd
zqM~l*q~-j6VxJ*T9m%R)BAI>wlp`{a@aZ1=HB?$PFU{18(TQa{noEk0EEIs%8%Et_
zl7hg_Y-oCEe3VWvhM5p3PD#VlLEJd+|IcW(Pf*X6j>5ML?01CYvcAUFbh&KBz=_*U
z>fWr@@aewPD9#;QrhdCl+S;eX6`O1gaNjAeU{AwQea3UR{+6OYtb<mqgYmYLe(H?b
z&}uqq2T^GYm4-2&#;H!RkgBbea<G6fw+NeG>NdFAYP^vrF&8y`?tS98e38hs06=l`
zls$$@o<t159JHF357G-QlS`BCld4Or5FoQ%k5W}=rl@SmQ^bLeYSkX6*KSQn<7bE}
z?K-V=7ej35PmQIpW7N%OKv$*uW%aB1UHK|(ut8%i*yc{Qe64m<qMvf)E9e#Gg*SHt
zy;u1)F`-teR1f5X0LOuMSV}LXx)?Jj_!jrP;q}0!cp&}|sH<M5#pa38sI1nUb1&JC
zi=0XP7Anu%vzAF&d)WzQjo$^(ILB{WO@S8eG#aY3lwsV16f0Ln$5{ceg(njR{MV9o
zM?zGE+|aK48rQ4+YlkOtQl}cQvc_~@nkxEmzIKEsI_F5+)J5^Y_p3EKxz5sWv|*T=
z;=f*ZyWew@NnLkn@@(IOPW$WXE8<Gmm~HHBY_rLS;!PAN_TAPC>veIx(CE(>7gYUY
zLPC>LlAISi<Fz7ERyR3<Ly?pflzN4*l?t7$wyH&47MF_8%_rGi+hjN2U~#yzi*L+>
za`A#gWDUFY-{{Nq$dTB|nhnY_IOp)7kR7qMDGj1MlQ;&i9<C28V%OjJS9)EnE|FH%
z)4%2~A#JcOtog2a-d-9$EV8FYug-mXp%&-y{sN;qdYRgt-iP`mooQ0p<B<Gm)d}jL
zkm)q<w*RVQQ-zOamS)535_ilz=LWO+f?AKT=%P|mXVJ>b_?PX<j;5M~Zg`j5Sf}>W
zzol+OdIM~u#tfq+g0_#CE@NnsPD9r7s{0E4^EwHMiLlwI3hTRxy1X?4UB|M@v{@D9
z)8RMHW$<qUwR^{@Q3A8KwA*~&#D+rp-vZv{jHG-W1g3@`#(#=N7^ZVyBqr5YN5fyw
zPs90apZ#bTdDjImF9eTVuvBUx42HPOc5CXWQ{?lse3mhywNcdGD(Xr`-<vf4qG+{~
zS0i}<yv)h_#Lx9y>oSZZ#}rl5oH+cvJh5G;BAGRg`<?q#$pBN}1K^D{37<BXZ$EwD
zSNe{a2SB2bq@wM2&Z4Y{5#qG{@*#|m!Lb|HzA=_6*2B?YuR9Up!}<YpWBJ;rJY6>F
zWi%%9{#=oHz13Qj)aV%dSTBxA)%$(G`IgPwUOjG#xC0ap+lOa*0|SRrzF`UV20Gmq
z5mCt%ChpwU!*-rfX?pJX0zp|W;Q}|Q=4cVlNgqB>jn;!Ub>=x?H;?48sreiusN)7q
zheM_N^Z?f7BA?{Q_!`@#b;@2I$K@r$Oo)ufug?)}eY}SakJ1+OI;H?f;xG><ld6x2
zLzr&twNzf%-Ca9(7mavsKWj-hl~z;NH`_fY`r7u@H8miDi;O~!JKDXAFFx{4!Tka7
zbda3(#bSk^m8V%><UyO_1ewuRgW3Z?!Wyr+TV-*rHbUOy0U)A%U;JxwCRJ|Io{cq=
zKeYnFYf?|dYqGLbQj*j*P=r8(ZaDU14@#n{A57m%o{~#XD$QsY9N<5h;cbqg5wJ>h
z2+?I%>9nhvEwWcZn7-yuwV3lL<+4nIvA+PB!yw(}%#8h?J%;8L<vfP&OqW_THTwn%
zfeja1nMJbsHd$`W?=0%MlXPWe1+?f*H1s%QkopJamWIqpm_8d<KQTOY{Wurd$T!Ho
z!av<Dk3qMHI_$E?#J)psy>|I)eK5<oHW6_QFBp?%CK7orXt^%;B<ia8qYbpvgXJgD
zL8bKGPaJ;%Ue!RDN|g-wZ`SNxr?sE^G3{Ba$4SD2)n$MZOIgL~+QKr5-*R7%qVFRq
zHNaXF>w6C02%Y^g93t%~=L|k2Ang`vbQyzuhtcnPI7`tqQFbqdb4d5&6sGT+akI}s
z;po?m6d5_RW4Ymv{}3(y_yC}4<q|&;?aRoBcE`<gh(f8S+6P&>SJc{5F)P<hF^}_<
zX6`8~YS_WdGjuWR+sWAZM^298U}6#9U6RbYvT8$vE8i(+pvn!q`e!R7wb-;_2`I&b
z_p+!e#wQD#B_~L#TnvKd@M!UO@$ly1wX?XjWp>kI7u4w_J<5F?IiE3?zxm*~EfXF@
zFU%uaTMuNR>vhFE$@OVxd?s;P{^)I%_n*$9gW`KCU~T<c2VSeznjm|kky>I)@z1&J
zrD7LGU)HW1k4vMW3$`<v%u{#W-h1-+)5%iuNm%$8MQ_i_veOo;RIKF$7oggb0_gSw
za=M^paS7acOk4@(Aq~Ed?nAHLgdTG{koWaNuw-o1t`c$uPY{byBXvFNY-*sfofm{N
z9Fl=vBa5F~c*9*^iUl7N9naC$$jQpe_@I8vYt|CmO&MpBu<db-HTxWqafPjORWv_8
z>Mu#>QlZOz>&P^|fipkfuE6m4iLR}sx{Cd~lG0&=!Y?7(`E=w?1K_tA=YDoYgYv>!
zg8gAfFZ3<fAFo%e0k^un7h7{b*#0<;mQ+jwH!sEUu=(1vZ|{NUmmP$%IPlLhT>C8A
z%ErY#Feex+^rA+qWrAa_=r<k8_oJBQW)?!>^`oz6x$f$;T4vqdB);2v!_`@6xjhq+
zqz(<3SH#5#eqw?pP5kO=di$^=q*-d9k-a$hYmK-&g6R9^e9Xf4xu<%<>eR2)*LDk3
z7!YE*v<YTvpK_gjlGqc2M}5&FO{V}{WBuz9W4}*P9sr!nP!E4gzr6c4(>Q<mw7c9}
ziMyr~!_o&pG0@AqP5AZ!aN51$w|te;*x|q3uK574cvLk2K(jm*U7ZZuLY3RQW<*r)
zXJ6E(bqF-<x54p66-}hYI<;1lg4-uZX8cZP4)s1PBsh|6s!}n`iDip%0Z+qL)zn@{
zgIZlrD+-UdwU0~dKG(PV8Rr>V!4o`Q;9*K!&yV4pGl#jwa>^7A3)P=nr26wVBXH=`
zAuWU72%0E30&<%r*XK4Ivu(jv(XywWe$dRa5l^vQ&Y>a}Wp3fVnIEhh`U8bea6Jq*
zF*i*qTh60BUQ3oYu!BZbEuw|qE9DR`cWyXJd}8^sDO5D4dp$Pk)8_5bP#fbdWzK$R
z1DbO(Pp+a6EYp1f!z7<ceItz*m6mQ*Egv6cmZxH-Uxo9CDA$MG(|pdo^rfZV)%H?K
zIfZB=48gK<O{dsx#Rx?pY4B#IrgZM=5(}`8)w0d9sJg81vDkhJtb@T`d9^^nv(WV@
zxUMM#Z{kF#%qL6J?S3SuLqW^+s#0)e#zoGWAsk$mC7iG1@xvkE)oY9V9{P}p(OF4$
z<(!hy<&`wijZT5O{wOS_)QPN=l547A64+kO&Isj<mW^(!Mx1q-;f^d2N=Bq}`XXH(
zL~|OdiaSU5Qlne)r5-Vqi@l)<=Pp;LHR|`8r_{<`TYApGz3DE$4ADMkR^W=)Hoo#k
zk^P&f@gWNtE#_oz97L&pnF*chOE}B9MA=Dm-$%~j<~d&7isR#=CGXJaCZMk9mG**H
zWi6kVv&9(2)LCiCIOwQ^4~1rq)3mYfF<x<>hJ+oXLPphxBQl_asQEm&#`9D{E*Hb(
z_U#qg-d$Zjg3Qn-Hy?H_n&a-~?+EqmR^PE9%P!VuRTsz6X+ODSR{T;>?Lw4H#&3n-
z5N-8qbt%kf@EThu_>GYd0GQs@JOFko%^v{Q9Zmib@gF!XZi!#p>7>Lb4F&qlCkrcz
zyBN(4rr8Bqijh{lRP@M|QrBZfjDduZ20{x+xrJr3e|@s@=OdBQz{h&}j7Vu<PnsX(
z1G0`7PfBV9%Iyk0TR!jY%WDFGg28cZsntV#@h+U62O#%8voNJDEp8lO%hV%YWJ?@5
za+ZAea()ZQ_#Y89J^;$Nu$)e#YxjI#o_L9>+J_|L*P$j`aqYWAP-r#GzB_61N840S
zHZf7u(8oz-t2vTI5e6Fnc>Ko<2noN^_0D&NA+_K}EmP{d=gWnT2AU7A+k}6>rq5yN
zO$Uk~yMZfihgCXhQbTXln@VzAPBojHBV4nZo1cHv@p?-BWv#K1l~NadfP(wUB?tbY
za>2>u{7>lCZeQPfkR7BvHVY!4Koh+?1wjEhj}KS(1j&`$$}!n(&jn=u`O5rJ{wqsW
zPoJMuOd!d#_}NmaRd%cU+pVu><q2!7!lU&CT(`5`DVgEi4q*z_hxl5*DW1!j^D
zrhEfv%C|z`sq;!~jc0hgeBV$9&)e7B6ZjhYs}!bf3VBp1CFvV{0+B#h=!ebD<4ELc
zf7SkeWWaplpJGLM>V~==*R^QfeqkwYSDrgJp&11>))+B~1i6~-lt!TLe-A|CF7KAr
zpvx2y!+&o2u10=Tm#fc@=&AIZIQ9@!#p4gh&FNt!*DXtyzb)zSLUyYYK^Ga%E>a;%
z2}+GTMIdKYoY@CJJ}(kZT`feGk6*d#hDJhka+qt)<#i`PI(*v`9OBud??6+)>2>OI
zGSr-unWCOgNjTKcP`o-g#~x15&+QU>{->ZRQDajx$3C%s>P!}rHkCU;l@&{-r`;3K
zP-N#f*&BPk%PCc)J{iN_(9K}pC+CwfMSyfu*3apF(+1Qz_G~AZUD;whN;>lWj-YjM
ziZRJzfGT)y)=F)1AB}%BZQvguNfl;`-{|2M@^y+Ob`JQ{FXxjD-}-*eY4XC`pM-mu
zxgB6!{|@$iQC#jc?u<LHssaN~BqfuJln-j!(R>lt`kfeEilHH2l1V5o6DOB@C&FTS
zEOd-qodtsLc5gWL@28~wf4?IbzzC}yEiK<^lP>4%<wK;)l#~`jqx|*!U8*2(L<|MP
zCj<qR-fY<^Ix7v|`x*)4iyD`wG~AM}56w_yg<b1E1H{ULOH|G`nOxO)lo>Q==l3~G
z$Pa1S>w(PlxSbbGvT)%QVJD)ozmeRfZNfgMSfYd9LE9s`qkhV_C*!q#Z3TN#(x^WK
zJ!kfaE`ndqw-{7pk!d6~jhP5z3RS-@(}9dECwpWTW~OP%R=A`Pj<Org{+kF+6y3e$
zwVHc}Mcqroy_sKbw^S}iyR6Xmnrn{h?n}3pb8G|_a=y{nesT?j+{v!^1AqCE_uTyl
zn-7(+zk(y?5UC^h`*}vUpi`NTzjH<51}Tq#PxZb&_uzP66<>|n{wYt&4xE+S`!=ko
zptR1*>`CG4w7N#=mkp7&f%TN;+?YOuqCqvtgb}s?YBg63j!Iq9k<CX#x;5^G54(qs
z74<X6Nmtsmno)D6HwIuv&Zy`KHJhapq_*Pa2L4A&);E$Dc~BR>l|M+1fXC%~hDgTc
zh6gGXiF5W3T)y(($Ep!_*$3ts_V6HmP!4nQZjj$Z-Q$_<U_AB5dH~p5VTB&vjeWq2
z{m&l!Tj?($+5k>58~t}$4}d+u#y_<4pVeoqi*Ol6iCy2e=513mT4T<O$_@nmw{HpD
zOl4&n?WMB1)KsP~kJX6qKAuW_Er?d<!|n>{Z=7YV`0&|4Vkgz0Y4Z+s>U{7ZO9Iu;
zCfxw~4Ll-Xb#3ricpD97T`QN+$J@=rN&wn_QS4e3NHB|(TL(ye|Igq5hZ33JoJDw{
zy4?&RwB6uMp<!B$T;(w_%`cPhrkR5S%*O`2HWR>*iIUSpb+xJfqLOHp>+MI#vDte5
zl9I5*4RAzJWvYZDeJ`!n!H3i=2&BE#H>gFSCuWs#4@zc=%v3<@g^}}3Z#pNxh&1L*
z?&G*wS|K+2hampdYVdUXc$vOHi^rz~9ko+Ni}(rt8gCgx{MZ<k7C!9SB1@yx5B+^0
z!xEmjaE<4COnGU8k_@Sx{toPqtpff@@^HIrw0;jmbe$^se9V%@M-;@UOiaW4J$a-F
z-)KqdP}JqX(`$LA0wt#w&s=VbnyUIsyKjG|ed6RfYxe~XGsz#>wLLP$X(3<Lnd!E{
zQO^UinnkY`e`rGGDC^@*=CGpe_H_`S*Xq+Gj)6q84%?8XKC~vF2j9;>WOxCO@jqS!
zo45?1Tn4-;RHQ%-kT-dXv+Yo*sYZ1DpE%D>5pC9K*BZ;7ixtiZg>oR1@P04ih0&2h
z8h;aT{kv=Smw2>*Dcio8jkHh6C&W|jK*556U0-S2vLoBW)JbnD{r<7eJy;9v*E)MO
z=|y83B-3OrD=#=|PPyml`nFsuF7Hu_OFYXaHVF@ISaNtycw|;!LQHy4dW<<U2FIK8
z+DNrpwFt8%Nl2d*hq6V?`f{yScwV(6zYWNTpb6p<8F27-k^i}(jI|LrF5vy)bJQMJ
z?R`O`U5dAJ4prV!z0H(X+Vd#BQujpDYgqqmiU!vELv@+!I!OiN1pN$k_TGb1%q0C%
zt6L=;{8jAXt-TffCi!F0EllnwiMzG-=-{=hrS<CXH5EhR0x-<OA5PvCs{%})L2R&Q
zxANyF(oGz+uikLoN(wMwD*W0F`K`=8p?a*G@I%GvR4GCEpfDF?Vo)C!o*W)$moad~
z3wdV4G;!;F5%F40Luy-IZR{&PAB$5gX~3UP=FercfYaW|mW0GxEjRbE37zWBnxi*F
z_p173hZuoxM(UU~O(ccEaGv03da-PV`;IbWa)z_sZcY0Hw_mt^X{Oj-)gM2Shd7j1
z?eYSth^CA`Hdffxt7K}r<%Ib3i3b(fTiWanEe+&WB|FlNM1Y*5R?EVDrc{f*);XKM
z$Z;0}y?6NUulk<@{ipI{!5Po}9NR^Xs%wJ|T_boQH%m>St$diZ!m5QQ6<v9Wv$M0>
zKoL#~HxHMT#erEtW@d~|Z@v3AEks>kC9&|Gl+hnhsQ<+GKW(*e@4xMB^bm7*+|AiJ
z+Bb7UzNL3Nmi&v6*w38m!}Oz7-_?sks(qmHjp(+}vaIzlKhsq&^3_|_phM(8gvPy=
zXNRxrQua1ck?{NL;P|hLnHf!oC3kbgO23KnwGLP-4(R*^=%|PibB09kFQ=y1BU&E-
zKg7mzW*<-fae|VmOJ3WvO9eIOIQI5|`asJ`=pCCyJ>IH?j`UJ`CESep$159U^(wHk
z$w*z*O>hKlq>3VpuR3lai+xq_|Im1VGk=@)`K|J1de&yz;l-Dng2TPaj1Fqtd3-Tj
zI_2XG%8qA)$A-yK#UnJV<jjv;ztaf`ZcfI<WcZjxuJ&oQ7}W&h57-m0PeoX_3x3{~
zb?SZGOW{yz{q4q<_eQLShHBGtVWA$Q!DLugx|*}BOTK_y9==dqrKDBvN9%I3Q!l#O
zAVmdCBPVU~hNQ4fxJsn#UPUX^PG9z#+IP(UsJif^Q&oD!e`BSZrIIL8;#43N*BTjh
z4nD{{oR)kR#ljvJ6;JveR&+z%c8<hFia!7z07)Le+mC>yvu91h7C!W6+X>~HVD9LY
zqZ=R%v~H`Y7v+u&kHZL3SEX3uST&oaGtI1Op}d~asKKuc7Tnljsu~<`FZGrYS)l;F
z^y6C0h3LC|Z<aXP)L3p|;QwkQscWZGs)BAS`J+Yr?IK)K8NDTCA^CI9oz&4<3VPM2
zw9#zzO#GQ8r3Kz+`NHBIB-wokgYhMOV8w0m;+Y$(?XU**cP>$x`$;cYBJ5eHdT|Nf
z+%VZ*Wl!m%rurs6U8mVXBg1T;^nT<v1+^g0XYSoG!ZtpyrJMW#`J6Hv*a(*8bR^gG
zG)rfcEjm!>hnsJ2Ig|_GEpbHR@p@vKPVSQkr@URV)Ld5~3np>_%7}<f7l^}hU8_?D
zbG#Z|p1tceO3S)pZQv2eNw{u}NW)6H%9(*7K}*QBT(*PDTXyd|A>YFDv!8|y!OH~1
zLw%U_i?z&)uqo0{msSRh?$>we)=}+zQ28=3^i<1n>AD7emVNV<!s?93cV833P0eQ6
zwIH8NQ6etz;_F<C7+raQ$`^7WISmf>KwlViF&8f~BJ|8!*VprmQ5WsGn4IPY><YW(
zlAu_~AvbJ4lAIh8J0<(iT?)_WS7g@b1xK7YzPFPvTmQVsB_@>3?Qh#+%LIW!r_WEI
z4hqUc){u;+)e3KHjpd)*T2^2rP-U*%r}e#m-7y>!z1X%fl)BSQC3|+;Pp4z!y;fDf
ze4wRuBbZQ9{G8OcmVUHEhC;1zl3Hp>LgE^Fq2OUd^;IH;P^u4tnQE(g7jJ+xsu<1t
z<2D;?$+>?U;3%LPh#UlJm)*{c`L-B_1YRwFIa%gA`?iHdfpMKI6ZQ|?_7C0M<-EEd
zGgNeH6LQAE-iqiZ7rhdm<5##T@E)8PIB*LlgDB;ciF7Ve>%d86$GFo>1@lXz#|o%I
zF_gQaPSO!cY%(evrQ6RSGPo1{9g!(kE{1BSV;`0TE8&+Yl5&S=c#tZaE14yjL)R&;
z(vPZiHv@f7U#L-}<o36zc~urjmpKsREcXCldEz`ylzPQ+>wbOI+~VQxrcim}>$vU3
zGI_^yO_miAFgK=!bbX(8Wp<(aMXK3=t3hhmNXvjpgYip?0wHj)a(}uqNW$L#E#`?3
z-+BT#$5nXegwAp=y*fR6^`mh)Ef|iHq2Y32?FH_uuU(wKXdC21eeE<Jov}`Bc^s)3
z!h%CD=dUz3`z%)1^jP^&_hM?!qnMQ)|N53@%1Ey~_7~6^apL=iq2DAvHRjs|$IjDv
z{M~Z0*W_XmLB|Jo3r6h$EX!R*;qsenTTgOLcsz{c+@=sCY~^O(X+e32<LDB5tKo`r
zL-C>rx2==vk<H{<P=)a0VjPgz(i(DV^g;i$KFYApx%brKNUP1EMly0SFC)2PzDaJN
z(QdQ3m!GjgAA2K|;y6CFX_N!02h6YzX$Uo7IBAd%|F%n{7G%&@B8ddvWUT8o8SBFn
zLAIpu_^u-EJ#H1@5yk7TP(2GGeK72)zPW~j#32s*3VVNKJ6iGYv2}gR{zPzV-(biL
zHZg-Ce$(%Qw;($iJB$>&_WZQxA<pbh!wVvkTCtIk3{uW4<7g5e>gXy+K<~Z&Z$?K?
z;!Qlk<HBnV)1-2GM7ZzO{ZO#I<>%cE6bs7f5va06lB_<rxcf;whrOO9s^>2m3v+H&
zws6G{FuH$!kQOgh)e5cnD|M7D&F@z;3T?0LWi0#`HjpZUNfnw@#v3$uN0|9FzS3}x
z^1YHskS~%$nwc?;LVX23el&isTy(W(D0L~6AE&jO>YGFiL`TQ{oM7wf=IJft+NUfx
z?JiG<qbHWHq0Dsan!*-d<y(Xl4g`QPwQr3CY(Kbe^ErAOrU<k{z}E7|#qNtK`U2(l
z1w_l=x!OA`^(?FvG~<_4jt3g*Bt_r#Y=4@Vq+z&%Oe(b(3^;^8Q75O*iRstBdUP;^
z1mN#I0BGIs9rNbFXV2dWN#bMF6wGS+je~kLHx}R4c{`{XPhKB%(_ux5X=9HcvYV!E
zvqi-o3M&0Z+b1U@^?x*RV+ujZDcW_a^&a_nQJx+;#}=jOW)mr2>$7L8&0RQ2qxtGE
zBZXHEfo7K^N>G|Y)2To#3z$T=^J5Si@|*W&IY-$Nc>ov&HQS*lRg<A2gxVGJ#I+7o
zR9_#H_4xuaMDDoqyQp^JK4lB#v_#^MqriWw9ZR>i>iu+Ok~lng5#8+D1mY@7d^$a?
z-_8bQE+<Taf_G|<9a@((v|FoTiO~L^>W>o#G8W{vc?ZxH_UxsB9ak~=uHq_t>$*A~
z2BGlz__`{jA+(1Y;ayy18Rw2I*o?}Il3K$_JuXfD`1vASJ<^OVkRh!ND`0Z}pYMkG
zmfjrP<yL>EtS4#pA!o6>=unML|Ex@SeB6|B%z>fDLgB&dC{2lOiv~Hyvv(flvx)mc
zz!Kt>0{Z7FDUYVE3|F%fHh{5oP2H+zV*T&+sU2~=y-5qviG^r)@#?f`^aef@N7*dO
zBulq;Ytr_qQPto-lPhGG=&Uw*Rt}t_IjX$fV?|O+z)cT;wdc?FThGf2T}J~auKnD*
zFI|^Prmm-S<#dIy6NR^v@+MbpPVmSBkGuP=?yMKI7jD$m)zOcAU%C)a&!K%eHD8nU
zyOO;pd;o~HsB|JlVsrh=mR)_(7<TEdL$9;0RnHXDZ$ynIgi+@Wiz@DN*iH?ON-|b&
zSw(L@6h)!`uDXDgEa{k9&EK{!9=X}u*K<Lyha^4J;1agZ0Y$p22dZ3>H1W$=>yb;n
zq$K$qt6k_+@#Q&QmaHYt#MJ>%$wABY;~BpX-s}&6DgF*fR!7E8ed*m}G%V%bZo6G>
z+~r#OD%>j`DP{wB5`k-b%X>)o`fEI~Gl9s|Exut>Gl3W!;m6JW{h@|GUHxWHK3eWx
z>)Df?D*1vfaxdge)faX0vF*wE(_!)@u3_pu0-()NYypuB8l^slLZn%I$mCDXydA^d
zmL?DGyCvd4^kJ_1{1<f%&xYpSp9Aabk$+!y;szB78sDxj`z+hjYIS$YX`aIu;-zUF
zgZET$+Jk@oL33YzqkGv~bj)_WKA(*F^Un2-@WybzN>gfVYHW%e>R5hTxp?#J^7tNc
zCt`hc<)3tM_r3#r?K)vK_z3X5k{}jBu~~WY{D-mm%UaO!>a02OMq!sU4a(4Ac0m<Z
z%o2M<dXIAk;azH%`Psc*K!?&l4fX$}4{#&OJh5!m1Z$9^sv+JIVueFtg>f&90%$=u
zw8DD9t-N}U%S6NA)iM2h%a`shQmfS)Ue)0_i(hHwObG|H|6tWTKOI!_vxHqgF;Hnl
z)Yn;#jN4l#z~pOh&+IewXeSV*?lQd>(;H%$!!Bo0pFH1M63P!YQVo~NZ9ij?4T$}T
zq%w>_u&lIy`)lsBoNFXK@${^3LCt6?l@RWvpdgv2#QMDG2TQ79*Wd-Ids-oJNTUs6
z4}jrf|5^Xp$K8LNrUtWW>_shW%3agK7>Qa<VZ|+eIbStlDF>FK2TON}1DN?<-Q$Nu
z7_}E}_eWWpASCl1DiGtu9_AF|E;1VMM-zYA{sOxffJXI8_8$ENap!Fy4eXnLhyL%n
z&LOqM9}k)iqi!&``FZSs76frL2>6nQ{h{c?7cYlyOE_`@G@Gp4f#fuOSn)pO9u@Bu
zKg{C%DeAvaM6t0|7?_)3WoQ%VcK)Qs$;F+#X^-7%1H*fr{Is6x#$v2a$7p+fdoxpq
zR3l4Mhx4n{@TB7_=%5Y3m9m>`CQh!o5dgqDigcm$_dps?{d(NwBYj-B-#-?^;<S!P
zu`bbPC9+_-?{gm$=j<$v57%`cxin%I{8+S3v3VjfQQxDaxy~&MB;~kqiuaa}t@?qa
zXm1yoc{;FHiffjsP7bPOI*{58>6w1+oi6;2-j*eQD<tO?&x7Zv%{^!S;{gybW1}KP
zxqs2Xr+D+>k0~htw?yM0DvM{c!S0i8BLk6FU|84uauFp{)H>7CITptTj3LvRc&9h2
zXgavyawQQw3K}JcM1EG8oj$fjQo&XB;6Ie}|8C18?@zglXoxuv_3*#G`T=NL`MceJ
zSHY27Nx8+}gkN8@#`{OrD*fB+`#?bxxg>w)<yduZjA7N@!+YX1j>@peB$F|hCopQq
zpte-VOE_0IG3}-0;;~bVoVUM~;lG(DMk<WM7dF|Z1juDHY4&8@FnF{lD_R(E3yr6A
zGlUGy6B5iNy;<!9DKTh_GY8pSHAbG_xikpBv@6JIAsqNy9)MUl_<!^KHHGoKx*a{p
zk;)c(z9%QoMC}mYqT)}MPHLV7J7Ada6X%#ttW|g6R)Z;f3ci%p(f?AlT&j5Kzlr1)
zN+Qz$fYxuvdO6Pf@>CzfXJ2wn&c~ED!9gU_1DcuI*qNsJEQW3Z#d*-$R=(7hs-oLL
z$CqwWv&(IXiAZ&_Kg<h}S%Ag1QAMx#v3x3;J;}XUA!frl?o>`&%=nhv0&_$)?)5<e
z+|L->;;^y2Ge2n3I1B|xy6+`Oi3Q{9#^1av|J<YhuM9^N`#7Z5JdJJx3=71nn6`bL
z1SE^Png+L`&4<Kwn8K-!AgWT)W@Kg|7wdG8s2IH)r@-KgKL`F7%3sg?7$XWj?k|q0
z>z`b`uwVV|#vSTbo><A_NUCpLy1l&}Tg_90r)|luU=bsfI66BV$3P(atSj`=%jge>
zmS--zk=sB>3_Rx!fHWOE56L|gc7kOo=4LBO+1US>?7H}VGV-j=!O4sf?*@fTlXX?E
zw0c-hdT%H%;TgB@u2E6{0XN_eL?W*8=8&VSL;9594Bzm{#jLk_QQH;1{<3BfjA%Z)
zc}i0#>}({BoOWb^dY1}(Wb^=VTVDf(N2%Vel_mRs{tw5gufGH}@liHpjXj^z{`A{2
zxf|rn*3s1>YF*@=?ZX*K<EqAPqO<O)TIBb9TRW;ln5-2WmIMq7j>^1wj~(z2*X^$*
zVE?b#4LtyUcZNELrsNDr_-u<ZZCvs^7yTY52hxihCTAaRb9W0%{1K<5%!tOTNW&ME
zfOEf=Lck(N{13tZ%T=W16wP$F=eK>W78&m%l5cRN<rJ($!-io#`5X75!RjZLPG(AD
zIJ+w8($Fi79Ch*ZZLf54bVhs>i1R;MPKDlL^d%aS3&s>R-stGqH^@k&nc5noShf+7
zz)Hjr4RDR0UY@2OBt(165Cw<`%9KvKWZ}nVmtHvwc0ry0x0Z@Z8Q$niIdWzsGiVD<
zM03mbgUWmr)z8#xGm#+s097xY*ER8v%`MCem}{M+8132|0=MFs9J|?ry0lq7==|fG
z^#9RYfb0JFE7PtctAA|b>t87li9-X-WXH@`vxY37rgZy^;dezHH0)XukFYG>>zOq{
zYBINH$iDn|{x5g?-yQf|UlxJZJJr(Q@q(*Rk-grU85_(~TX^jG{G72SPC&-p{YB7A
z9~<-Q1W$rOqv*Ur`pg37l1TiE^|!R&kcy@l`-riklWWVV`wqp~$EB@Q%Gaj4zneGt
zHBGqZ<gO(!i7n`Ah#g++5_9)U!$;<-3&4%lqq30Ufey9l%eXnmBoc5u$Dmk$u5926
zvlem@Mt7e~6e^UWFoWaUyf#0NujyznKYpICx<x1|q;dzc1G=awC+MHabvc0d2JZW#
z2E>xISr-{fwl4!dV-6nI^ay`cpRsjk39LLk=VEuSty|q@;z=ceNq^@01k0=IWuMaR
zt}G+UCr!8=>$~tN@guKU@c(4K=TGml`6L~^-wnf-@WqH3>_SLHUn3|h_;aRhgsV@j
zestxzWeuj?ob9_Vr+G`w?RK+9T|Ua-QS{8GUK?0A?ZS<e$L4c_5hF=R*^NeZEXiuc
zF6|4F^)Q{%_~V5BQ1!O1RTGnN+LRExhBoSNl}${h%LqtS><KAwwW!b1(tb|<cq+3{
z1x{>Z!+`llWt-Ju?ikBJ9F|H)_-y>Z=JMOSz<8l>aXAp9b0(5hU2S(ij_3S$or$dc
zwG~cf@<791hAEEw^C!~x+!G%RR{pWQf31#&eH`_+Q;~^0_N9sqK6~Wl4EBJa<VvaP
z>!2KF+*+G7L;6)*H1M~S&z)#41lhwIvn-f;ssFZDqD{GrUvN26q-aly-~kXi;mx5w
z!R7ul{-#=bdDym;v#hL8s9~;Dss#7-FEewJ?d>-<=@q`dAif`ZrjP!R^KnAz+VI_P
zapW8sJ$-YW1}8toTU~rT*8%d+?<kL(L4wji4*<{m?{_s9enZW+9l@8O^NBHKcajLK
z%u8S9qRy+B;2r<aV+CyFI|D_{(|Nl+a0kIL;uh^v-cd8Rvv@Sn;`eJ(cvS~PPhMQt
z-gCXwm#%%D*RNyEFv&uE)k~F8+Opkv*F#+9zxc@Jzq;xa;)(-~)bsp~7RNQ^RajEH
zS=#Tr_*In;^paD|Yx?BHZRcbV4Tfv_YN?Ah7cO?QeQ|@;T8E^<zju<bT##7CY#)Jh
zP2OXqU!g+)+*hEP>pp&_>Rz;EJm-@k-F(lT@|x~sSHmQuvO(zxoC%#Ka`W9z{TO*A
zQqg;Y`-7n)_tBoCs?~`7`1KmgDt<*h!XKa6#~bP?Qy%CSMtQzPKjZw9Wpilu`o61z
zdfuHFvmL#cVne5R_1RFC#O||C|LvXgNX+krf2W^*N2M)<?l)Zgk$1p<d+h(X;~irk
zHM^9j?QI|vfwm|!ed}j;LWwK-Pw~<poo|Xtm_*kvBlEVZNI-r57Vp!1fhliUr~k(W
z|J$7?x;9G+a43-FPIr#u<%)D6_YvhOl1|(%;)HS_yXZF3yvSiLBGRt*V>QTG4)kh}
z<)u%OswHn9+s@<356EUEDuA$sj$+S#9oy{pG9jTHuyidID8!aAt{#J+Y=imYg*W-Q
z18Pn!k-6YP0kv~e^<96LOVXRatxf;UeC6~+bRN%-9D2=ir(XDB$-us%bL-G1F6M`n
zlqC1JuS-0Jb%l1rNzwF4209oWr{xpCQwH)2r0dR)pZ~>a5lQ=*kO^+ur{}160;gVX
zP+mB44Scj68DY~FNWEIHxr(4Avn35o404L|niSQvn*$~>21SKFmFQA;cBbuVdS~6e
zdKv1Nt<n0$%D}W{ShtKQn@Gdd*3p`W!-Zu?c_Ik{tC!Eq$4_%@2|uV9sgmUiu)BLq
zJZDl>=h~#8_s$6U)|Hb}d#YxJgJb5yM9t*=F4f8^Sw256&m=#Cc2t<1T`1kE)|jj_
zk6Ic>g#ufe-Kek+J<<r(=)X9f|M=+fx7#-lfWX@pe}kEL#pC+$$N#P$pY7H9lf8~p
zzuRD8o{>B3^O&!~IY*bbF;M>y&!aM*j>=Pio|7D^<2Io8+^;r&%kLNgKmXgX{>L2}
zpjza_)%gTG)yhMMA}XqC?VvFs&{uA3OlTx)0<5>YQ3qlA3HxdH=0dHZZuwH2kt4Ln
z7<!=Gm?V6)rcN0=jX!h-O_9P=|H}bdw+EHP?udMej}IK{L5;v1s;*to=rXm;gsHf{
zNar{&&+s-{UXOZ0V*{R|TcnNTDsPtE>@n3#g5c19ayBX@VJiL`6as5i5h3Jzt9KuN
zmv%h^?vVG_@{2fAoEfY1zoon}-OsrJ?x8LiuK0g_0My=&-7WtwWs3i=y{`_6Yist6
zLJ|T52@(P%xC9RnJPGc>-Q9yb#DL(zoxz<!hQS62E(3!z3^D|lVVK}Dz<Kk%SGVr1
z`|5sG->FmezE^ejA0xH*TB~=j-MxGD>i+eAzg@nL)#(PVlk!uw#Q6`ZdzWj@6ML73
z2e-DS9uCi-$M=728m6c_#2~un+9fsgbE94WALR>tO{4yabHwbdXsDBF6Qw30h%L}r
zonEfd;?H%!3G*!)r9zIV=X)$^Zt-926s1R;wX?+<m0ut{aC=6qR70z>zE88sg;@SO
zE&uuduTw&d{Y*o8hf5dt^=cRU#7!|PElHy1=|j4&&Uhs(<<6B6F+WB@#(ec|KEcbB
zRL28Nr(xJn|4GQd@+)2tx5(9^6{tHd_Ao7K`Y`34!-1;LH`JvSYyr)j8i)vK`_`ZK
zbibI?r5-6{m31DEhl_ds>K|6b)@6SS$6483pl#zVEDoNp{S$f9n$}?I;G!h;>$3!+
z6riC{NOwX3nltvMZY*<dN(iOKzEETFVT}=d+7RuRvA#)o-^?LCRnc-oD?wJm9hH46
zDO+&eG7PCMq{X2pb}AVqS?8kd&CsOlxd)4PvE^zpdZemZ#Kp<Q8qIJZ0%Xs)lY~eO
z`?xG}a^8IWShg{!-Em2J+N}k*TOl64P*uf4zWeetw{0E!6!ac=x*ZnmlT<Af=)X?I
zqZ7^*#y=$4O~okC<Q|hIMIf2a4l%0VH5RLM!kVs?21xt#>(?9v96YMwaeOE-6%rrJ
zY0`iot5wliUGA@;|KNx$skB+nP)QL6&KIL$y8R!Yc-re)iO(IT#xyu>w=WDI%$GL3
zPCYQaTjpimM`k%vJbK3BQ%?0>!^D6F5*OQCa`d1!MIcO*{kx|P8P@X?rQE+VasRmX
zuhUK+;$v(z$}cRhKCNqV_@#Mwyh%w~Tk*`$k{dW&9*AOJurV%*vi4>=V%ECHo(xls
zNuPW9@k7V2f0g3@bN!Q$_&#?pAP&fFX+8C4=u57)*F+`b`{4SNP4rr2wV?6U)ggu4
zsU`H)rsBxNR(-|-bt1F$L-V;0HyD+B{}>d8!x7xN;KO?UaVzhh{|0x_ltg)+;Fg-_
zM%Z<wx(f?vj6Lbl`=SE0bB|NEeBQVv`|VtYyvo1^!F`u`+3xKmj;G?!JBs)`fBg9Q
ze`xyGKH1{Cc+PpCO($r7CR|FteIC#MY@Gblp7A=7yp|@68<<1V97)X!b}new1lPgW
zEY6i_9s$)h{}i%Q^I~PTvHU$4VRqIVA%U0xt7)Oz|FqBl@#JmY`Nn~nxFE=hi|OpL
z3uWAzKPA9MkTb)vVakNRWF^?OiMfun%xLzz+#|zH1+R=v#Y|#dlU5Ox0hM$ifgcS&
z{wr$HFC41DUH4+jficFNeaYF>ESJfI68-tPcLo@_2*C;sF_+VsTDn~QagrYg)M5@|
zmV?E=jhN#<Wk?`P&uCuqzWpER{3j>=s^tY?Xq!1y)Cjt~&U`P3`A`Fr_O&=`>z)uL
zv>a%gWqb699W$hoO7ck>5kpDM?cuy7a)PV8dFP){_D@dTp_{_;M|z@{M!QEiW@jP9
zN)EI(ZChv<l=##d9B4l}+SMx~LrpDcCm=w#N+{jZJxVG)=Yb^`3U>dL{ubvM7p`fN
z1VDEH>X+A2=8sx_gN%2F({P^A8Xr__J*I4}KuFOC6uae+*^1Vod$Wk8U9uBekO(&o
zu+;IGG?udf>iRTlT7A>vR(elf{15l&zkd96uQ&OAfw>uRTn%LRA!>0bc=@@~$pP=@
z;c_c$5w~BmRQn#Oqy_@5vHo95zorju;lY%QpSSRu{Ye4CIi)wM1$1|E;^9BQTj(<&
zeAcU4)(6CHp6O1@C<~wT@Hp}7KLO|F;)(RIPNLHHKfH~u{%VEKR~KV-`{k$qy0W|V
zO;Q-EDS*qk-o6<Mhht8xU3cSKq@3O-)i!}UYF6{>fX@!J(`p4Hzqm1`LA6!3TR3Gx
zsP(!8zFN2NeR0Ee_OBm*l`D{(WQ~-n%-{9uOVyi?R&-jvzZdy3uf)yEQt{L2Iv8U(
zF)%edk-GF2o+i5Gu$arOCG4H+pZ_|&|7fApSiO#J*E+<^v?uYx?qzDZ9`Z=;v^uS`
zJ~8cWhZ`?cPYG!=ac(FRQOPq6TRblKH87+j<QZc{_et`YEyR>>sz29rJX<p4{k5Sd
zgGoSY)ciV<YSOlr17rwuvz^;4-!d=NA9^AlKNX@d#+Sky{t#gPcxetCcMN(zNxC9Y
zJvv_>TvXJZ^>A4ZCDuP2lNN!?Q|J<|1Q!`qDECP^9s?h5$m8orm3Wvpb1P@B^9v=0
z?LGhod=KB|><2V?U4Dc!m0+F?XW3Lb;%mW5Unz|*4NTdUmN-`zT2?}33z-s@v-eG2
ztjNS<kiS$QiBDp?(M3T-P&e>pV+)Yr?Op!rbUiP;#QA53zf%y353G6=TXI^QX=IeZ
zn-dOFsy&vC-NUacSlEX(Y49F;kBJ|Y_{*zlUf{sI*s279xp0?_`6+gi_|0Ofu9p%P
zX<%r{S1fsUFEmD?Rt@6e4BRkIhFTXNvph1!v;Q-*^q0xA;MKwB6SIrXkoN$Z&6~)}
zfi8gh8TLkgx3fq5Y*x|?e$knAp`LJ#1hns+n|6Z6@qU{B8G>=!x>|-la<CSEogE}i
z(wF@F@`ff(`<yl1+S-^}<~sTXq2P)sd?|XVSF7W@4ZE!qMbG4Hll;PgS|0|6qbrPm
z6%%m&xq)%5{xWv`!^JSHe1UT@aGuM)ucpMg7;NOJ`8*QK)ef;GiVW|q&Y}wNfZ6@9
zP}`6B!BR<o+Qo3svD!J;G;y~YP_er^el&X$t0&!n*4Ecjfm%{(stpf=;ah;V&D$-u
zsFGAi%=+A_V!vTIMz~9TdDr}ZLjwLeZvAD((fx<#{qg32RN_HAo!8Jv`7BiA8j|ZE
zg)gpKAT?@JP&_|{Kgve27u5D^<E2Vy$Vca_d)$vG=l&eocOHB=&)Nep&|q;ms#=nz
zmm6cxBHrI52WK3bIo+h_I}0p(U!R;Abyi;oqLvBPLt>^jLMS1d6lZ}-WNm+LOkAt~
ztB~w{ToCi0I^fv;$I3-P?KkhWN{7vunOposnLBtrZGuF@HXWMc`3piu$jfc0k0mm?
zKuLCZ{kYxUfbgF${?u5vH~`v6QJb^$I57OwPVCiU;r)R&j8x^k;?JR%6^ZkcaDuOa
ztKewTgh<&=Fgy7YkiJQAzM>%YVG@h69rd8$&p$UTuGJ$$#2>ffc_Pq5AR)<>xy8iY
zP)l^>$(X$}^>F}eMV=O*-I<0dr(;FzyAtx?dyR~(Z6EIj%!tn(Q6QqnB+E3_rTFRh
zAGZ)Wzca2MFXQ6q@GV1opWvU`1+j!m3*$<fy_*9Z+`E09vHTD)K8iz{9`|9zMK6ci
z`gYqL3xwVr(?<Sr>&aXCzdQJw2Y){Ye@+&Tqi;k^pFKxKT&4`r(z=AQ2Sg&=ekE^*
zRPx8Uj(Z~<vyBm(ptg?An1&jds7v^{cX)VrI$t8+U`7;$^oY1W%-AW<q9riP8@|&Y
zU6VOyA0@m5uM8cwJrH0wZ`j&*+Q^;w=zgN;U&<LO_9FE2LGbr0e)2FjX{6Neh*g|V
z*O!zAo7572T<X$T4b%S67}DUyj6#^!Ta?=t-h8e1>Fcj|%jse2%z?fF0-7#BX>iH>
zWuK;S08NNYV_2Z)sqmYdTIY&tJYoH}2r0cYSm(u~*RtVbPH|r^C9CIOf5$iDk@841
z+thU|?714N9nN??`SOkiIz()XYZ)5v2eEgmX}Ktni&mel5o}t5mzl*X<yGElSE9r`
zJIQ6a0!->u){Tv%Y6IctbkQ6oM5S$f#Dso5E1fi|LPm72Cg}?aeeXskwMV7x>LTYl
z-QN_lTFF(Fb~>A3lw_{C#H_<5s5Z*=F5<UpOB4XLykVfO(rD!Yy|iYR=j|tI1>B3(
zZpO~I(u}Z3n;o#jk%kb*M(JJ|x^W7JAieWls4^&iG9<I9`;=pGop0DYF{f#5AU-HY
z!%=jq9*mEA$`a`!xLHncb~6F*RBFhidw**ZpfTg?C=SBtXUI;T6X_W=9pz#oFzl+I
z&WuY3vNw7Igy^&UznX+(IN0N#8obH0!%bP~utd|aSY{p18AUf}fh+o2PW7+?s}thE
zxPMywg^Y^(4$UF!*G@`t`j)~=g5;L3$2D%UKgV?SjXDim=?;Fs*h4G8+~dcE<mQ`o
zpI&VCc7?DbsIA*2v_u53Zv%jHzt`5j4gv?O(8z%;sae_x^`h7Cr%?Re{WJKoS_xk+
z03;ZGsaAzJ{w^H)$OCo}nr>1L96h+s&|^>Ff5Noizt-WgU<~Xe+oSb9!{^8007@uy
zfUvD6PciUm>lX_;mAH+)Zi6+S+aam3E(sxSq|HH{T_*-M{NDWLJwbe{sIVYsN_(=X
z2Dj}NU%ql%g#NY4q8R1Aw9|+YpZXYZZcezZ=bDi+QvIg(!YuuwX6uHvM}8%oqQg({
zzzGykt+oI;Je$4rHv#A@RyZ%?CM@>MIaBN0Ky);IZ!os8$INOc-f3j)&@Zy*eg|<u
z8JiXW@1fv=q@+K=U}JlwBXDRsv`U*pXO!!!!DsB5ofD?SX#86`keTO1)xfJrrR}sw
z9U{N6_(lg9!qdrq_5Md6vCZI>H%y$hc(nzUf)g@Zz_(05ot(Dhe*4mmq2>7bq-Tv}
zXI<4P_WiQ>F+!?GV`ozNf{dL2drMkU`M1OBRJgFVl#Is!Pe__88&&jUa@C#_P=V_*
z`%}kQT^9eE1uUh@LIw2dDqvF`SxHuwR?{NzWUl{hpzHkaleMhDj-M4S+(TP#vz$u|
z*-C__z9()0IJ&iyGE?hFS$8r)b{Z?lOBFRyqqSz&O2AeyfNV~aJfC@g(HCY!nH<|I
z%;kL&XC_ot>7at5htz1?v<|1y5E2_y2k$(!v#RYpbM_7{2o*bVE93edJPh=t+T^t`
zr9QChx)Gv4Ko3l#uBr4qSk;uRX->r4_sp87@}7+cBm_gOEKH#!$4SQPN4q_Q9-Sk9
z+>$tud}jKyCrh}iCfcNL8V1p_Iy_r6;Xkv`ZY<16yF^Uz(UW3j#YuKS%eGxE&8@d3
zQyr&XU6otb4uyWcI^+nPKD(EP4!N@)&yYFUREnk73aLd^#BQVx)#!y_5t-PXGyQD>
zLX&Lx4A^BjK@@{4m%W^i%fHvj@32Bz*XNUo+@12Er1mt!+i48K<-t5#KCaY|pR;pO
z<uLOzbCbdg7^&xGS-uXmHgu)1k@U8H2>51KA}ggXGiYr0<~EVP6;3y(u_MQqct+aV
zFC8FGXy?5G?#}8QGLH*6I9uAdu1v?`9E~en(`;8mQ@MCw=krK>=VGb?t`-?_;6liY
zHXna#$keHn&CwW&78a(Xo1~R-ZGbd9+Yt;aM#9RekI{M{wgdI{$$A3LgX}y|OyG61
z7UE0VZKhXkzA!x^RrT<_0MxE{V=AE9Ugjip30{)Br264=RF$41AfOHep3AuOdd{dV
z8OVQ;XtWYI^U?dUXF9Jh;d#X`0F7ItN*aZI=tKm>yI3c#$LK`b)7cDH88a1NTA$W4
zk0m`$R-by>AUW0&X+-T7zYWyxB0I%nT<9PSGQ@=BhawjA(mHl`43w+IEF}^2xckOG
z*nf>TbL!0=4?<<R(tSyrG@41L+!!L)t-K=ENz`K1VtuGS-}ZC}DFe2~OWrfly6mc|
zz~13>c&FQOfe^)Qdb@p0f$FI7`=Ue}*A#0-E*!7tH9Ci@&~kPcHU5+QkwsoTEo(@Q
zO;fz7-tnM}YAGdLbcsJSfg{x4nE+m#*sm*-k$Aa33|_2ix#%8Xyn>kq-Ma>i{^%$<
zc@R2+BosGvUw-%gL`uZtY-jcyn7ddcfx-!y%Q8#gB2%9S9MH&P<T|G6exmQNW&g)5
zvFAZA4}!O^1d_wqtR<HYo~}U$mpW2{_FzHR)?ifY!f>pp-ietnYPk>&7%J!Ams<DM
z9WLx23^HI=Ded!$LZ!jywSINB-jRj6Z;7aR7#j1j13R2jKTJJoJ52>*TaP1}KIm1d
za6CDoZ_h$Z%#~b-?pYGvgz{e`YatGZ)`!;z2F*ig4`hFa2-}=8VJPj@Co)WGXiXwJ
zMXxoNXvk0#C-VGbFKDg^YwBuL3vS+xtQ7_#Na|+JJEE-5*?B+oi8bM*%rs_a$aHYA
zt^F)sL3N&8Pf;}%CuM?|%z9LF#X7SnE&>U-@HM~i*l##F!0i5;dSUm&n~lNpY3+fB
zM)Ts#!@1IeGIBrmXbmpgD}U8!h;eY#o%Ijo1y#>JA{{V^%DkMdX_~C!ys8pxd`F+E
zftaV?l-N=jP-T1~34623ZT5>$1{kp(Y;~zzq1QNSno@DSQ(KdZ(g~~5Vvke@Izw$>
zgJ#)IkET2C67wRY!ggh#N%SlaVqv+sN%L@%e|J(Nb$B*Mi*NL!b@=@rF;1t0gOaWz
z&1z>{Ij_sT7kosO<E!4z1M<<jg71aTZ;p90zp1Q0PFc8NY76qG15Mzdn#OQAEUBta
zci9b!25%}|m;8_Q6*KM?Gi3zjRv=#6l1lMk-zh(!Eo$vOK26@>_WVRj5+c6EDxPXe
zWo8n1VclxcDM_gpycEDwlILKgl!q)lt9!LE*EtH;;0y&^cFqXw9Pn>(TxR3lUcr!=
zO0~I-E$>}f<aNrG3VoAdBHTIKEkVrzj3a^VfiE9VZ8SCxw1HwSgcCXGR|QmCqXMsq
ztSImI-=)N0@N&6CBZ@b%_yHbg%Mz4U*@gO>KCCD^nb{BzJ0rcyr-nvj4dqzZPFc5O
z{byc|^n`5POj{f6s1zM??X8D)h-^Wq={s*uT55P^=ZUAT+~Ue98AlUxfi&#5%Wo1;
z0tqg;!)R7$*CbLsIj*i<eFL^Ps(p@JE3qOGAb)PxBMw?aX?9FxhgEheoLHw!T3JX`
zUZd;WI-jjbQR5SH@tm~hxVGviT{Q15Fx?o(4bMn@-Dj}jo96q%V$Xxu!Q~%Xydp**
z=W#Z7O>L6hhIrZd5AqQqYEIVhy&0#%<px(8*DhK2lQNo^M0!H~o}z`#;O(c(WD4t#
z^A{yq+Cu$b<mMS*rQe0r!yLCGo1bxFW>jZ@HxW-Q1Nz_F)Ic3`;^pjEkK!3Mq}o?q
z$<-0t(&Q~d{L~knbe>6S%EFI@;bz3CswK3ZD`4LxcwxXUehRpP$Wbbg4(1XaA8BHY
zz6r%;Uuh52^!8+P#yr0luu4p7Dk<)6fYIaT+xp|y1cYVyb4=Beb*D~~YtQBMbMY^U
zbyXqij(b_q2bk25y5#d}(sx#uSQSsosat$aa(=H59A<-QL}uIXLYG62pT=HusM+G+
zi*T}LmK|Kfd2$IB2?aYCxi%@f9s6-q-jgVoE0L$ai|io(u={+48$$oR<!k2&sUba3
zFn$+SF1J7spj3HQ(w&cW(b=qz$={G{sX`}1Uv{WAOeJ1DZiCuL@{~^rDXAn&(s8fQ
zX&r1;*c?aDJ9bXenn?ttB0Of|JA@?uxV5=IRh!bfUIm|R4C_EyNN?s>@C{zoyAF;6
zcrXmIqEYE9<i~)oW)y9xsRWaRl8eB-{e*7~ORjZUG&l|EOwVOIm*DfpC<ZW3k<%n1
zZG<98McnInlwjw&yeTork};*y`?z}1l<k~sh`bS!jix2I9A&Ryta8`JH>)rzG`YSV
zRyQIIy&L+uLWt{WTj})kcdv!pcb@0)gl*?4ex=8Ij3L39y!VG3fA{LPUYHt~sf-@2
zwnFw%lQ-yhR@I$lD$^mSo_0o-X<MB2(M=au%Bvbup}mZW6`FwCw0_aOdhtBZQ?b=&
zF&9V29*A0$^R6(p*-Y$mZAYD{1zR>$;g4Gh^AL^$8v5Twi5!5Z6>QvY>aOaE0m*pc
zJ2g0g6Qj(tZA~j|2|S?;oQu^E@B7EQ!BOgs<0~dvpGrU@Tb65`n%i-iJ<E4oj02r|
z{O@*DkaYScuMzK`cFz3&-S9ydf;O=DTs{YrBR#HOHPKpV9E?w)2j<8gg*h-HoHe}d
zV;Bh^EP4Nqnj(8ZB0I~^^@F{Am`&wY;qcf7cqOh&jQa7JldDpa1Esy7W{>TJu*y5x
z6>o*KQN_%ToG%7Kk%89({Hhc~$}wBx_pbIs1s93ekcIg?kXfu-M)C2e_erPff*>=g
zosCX4SXblZLQ`F^p~psac}P*-00A5+_VHfKtLbC#SEqjSxM{6t2*ct3F3;3QogHds
zgEHKO4Hw!P9@RLL+mNOIh<jbADaXYTxyF(N)wJWdDhx|@c5|iR2qls7vv7MU>Gp=T
zA3O^#Z>sJ-@IX>DH`o<4xW*PWBgFWq@I~~iHf__NUI~V@BIUWpQona5I3KK9^OrFc
z$cTnL#&prC`ArR})z>-S=**WHASRabdaSkf>h?Kkhz%fq@=$q8j8@tZt(-hmIL5<A
z^C4}o)rgRBS+i*DBFT%&;Mfa2zaQ`{-2os;%RV~7pk(eqx5+KmtQ4?ttWBQFL>|N6
z3@+xM4Cx{*Y(xcBZhw!Y@K9t<(J$K&VDSvso|j4lM02SYU10K@g|Ay)R$saR2YoB8
z%^PwMQ>E;?(Uqg29!l=`=)x!ALBoTCeX4ac0x2}K!c^)HDl$fB)u30ILHdq<M4`0{
zE(<eEqIyPl&QAKLi`d!@Aecvt<EZ>YFk9CV1uJ7jlZhK|w6&qlN_L}bR`T`elAwlp
zr(cFplDkIIEB=;r=m03m+D?_0*E2PKfn}%Bm@g>Ww<>LzBMq52R8nDk!al5J_2#F~
zEx}pja<W?#7h&tZsIJmlOQXn1d-4faN&}dp*xz&dwZZIR__rXW=4{cl%C{+9&7HF4
z&3vnkXDvDcPwS(%rlwh);b|SLVFxGzTxKu@YI=Z0pIpoj!bAaTQZynu=;%qCi8pL3
zN%-PX_a+YV7GmGJe{)ynhPZRhIzpZG!9yGnJqIx<w#U(_T}-YCld>Q9e#0bx#;A3C
zc7}$_yg{+?d!#CSQadFi%c|W+g)N_^{CDK8r`3I&^^n#f6SVw0K6*4uemciM3dG<l
zh8GzdYa*8bwB}YJuYT6WXV48?7Vw*kHMP}z*e^-pj?0A53b>@XB~Je$<SkM(@EdkL
zlVzZpDKs2S_HlbO*PvR=-WJ^mD#*xpCUUCd3pL0D>pwgyy724gO7+`YZcZQyN^W`J
zeqF;u*wHe8g#2;qO}}Tn*?7jX${M!$B)LCW0GHGm|AA8aXUIGMsUh9i!11dtX6KaI
zgEPdn>&nb<)owmdw1gx79a9YkS#gu@5?eVR-4+HUMmmdSpmhIJ)~A10oyF^>9tzTv
z4!w+BFk5|mP7GU4E;kKoYFax|op=t;Yay`wm{L(ZuHl|Yr7JR^tE#=^CeeX%cs?rR
z)8$t4OtEiC{dKm|gof%CnRSv@Jzwr6hdC~tcUD5HHIxWq+z3#bn}o=&@jF}$E<jRw
zUb(|Zb$q{-nhZnArl_OF4|N~R0D9Nc)!#(;z#lPvX%=M<D|Irxm^2k^qdLFZHT`T6
zkPPQF_WyYdkMZthviUMy6oyfEqQY}skQ-5(U7NOglDXUNn<U23$uW2SHkv0n;lm5P
zFXy9y7Kf0nf~}S(;h~GX!3kD1@0;sNGEcW9)#bWuqi0Lu!;032ggc2J4idPKIi;nQ
zEaOGq=&sDPms+NFlukhL3V&sn?oAh}o0%2OBOWB*vl*R*0fAQN-h<Ow1jNgWAuQI;
z-Mg%)l4!Z2!l62uX;Jh;zsW+<bD{Fz%cUaTxCn0|L3rk5T@5<Z&hD_9B)E_Y$xU+1
zb$H=h#H$=cdOlO(D3kya?WpGHwqZ5U@PW}}lpn=^{*uh*ImRP-WXIx1Jl-n&dE;eu
zz*jThkk<aTK$_Zgv=9bE>JtmMsN+p&Wn|eYN;>sLuyRq#ppstQP(lIs&JFtSQu_f4
z%)ETF34$oIRh>A!4jsm)?+K|>9=jCmn6+=8dT$R#+BKqGN?%yc3sq@gp1eYIIM9L2
zLafH)Jh~DeKRRF~``rGKsKZcGh=@bY(dNj0(C_<MePX>j1*cEKK^;2T=i;aoe@Nl0
zZLB7LgrsK-c`?lM>eFlO4VeF-MxSvt<Y=`WaM$1RfN-257+RO9;~F)pd1gIBVFRi#
z9Vi|ff~q(bF~9oZo}$$QEc%KiC^fK2Vi?MwVPulFBEAjLOZUM<#hC-vC3J)fju(|x
z!;I9`^wAj1k?74K)Yb^F=eI@aFGzGVJL!sIJGF@3i271FF!VBo=CxEy#Y;SK($KS>
zP6l<<qd4653VQJF>QQIo>rSd;;@78YWzi5c57+FqL%<r6XdFEBpvACl80)C&SBf<~
zlX=aU(V}~~v(Yt4qF8bbGH?*lMpQ*bCg{1Zpsu*>kwFIo%~i%?O*|eDb!exmgMe?E
z1zPbz`NKvb7J;nW=wKxoxgjs()pw6<avdgH6p&FPq96CQmmgFXOw>Ak9!;y^^X8gb
z<O&Lz!L2B=aM4{apqwseRx?jg_Y3nDhQ;Rj*;IS<88e#`zQk_o3BKo(h<m3n$@aCa
zNRL?4+?R>hHE9({KQKdSbX<~muw@=9pgNtC)LsGE_*TNb<!!uUJSZcE`0xseWMH7k
zsF5l1;bH{H1+9ysv#bc0nnftIUfTcscU4IYWA=wGQqBGl%Olz|i+vLV4Mglprt0D`
z^9omBJ%ayxZfuUGBzN~A#Q<gc(1_esa<qH`=2@G0YdNhHvMY+5(Bw+7*`r3^?R_L#
zirod=%`V{xY;>A>*1;WqD5^7+1|o6g{1}9x7_2+u&IJ}0Fn_DBoBhx~)_fuK-sD(n
zt)(Xrm2rXnk#|aO;F%_M)TTPmL11<S+q1e&SUXm0n~rck5(}5I)D-eEx%s)<KO-O6
zX3)y{!dIC^Wu>!QmN#6>>^tBQ!eQcazi%F?xpi`-Y+vH8?Es9Qnrzta|DnJ#OIQ_b
zb0*01XsAKJIx6cu%V(t}{$J(4n)`Y0bhmx><66HtU;J$%>2&taqelB(S7Q~=xN&iv
z<}5tW#r>-+W1%H?uP?!yZnK>_s7<|kCaI)0S|M5|2WL+bPCcGj!}JP2^IL<{$VHYp
zQK%+}b)@)}zIDmL^3a2Xd@zb<v%GD(U5T0$46TQ))_cTCew%f%hN95o6fb!q3Ip#u
zpuInCMYGauPWi!-am&3IK~2R%*6T%RcxD6T+y2>xwG>-|y-*qwVgdPD(f8C|3Njww
z66+C+9v0*<nc)ltYWd(B0Xm7ZxG!5FRxuskjZu?Vizyj1j%h__;$&=)(e5R-nCV$4
zV$Vmn6;U@U@x!0AGMSm?MVDN4Hgy3r<LfNpxBR(RzaI`>(?E^0Mgl3lPx<5D^g|s%
zJIG9@mP+;3hVa(Tq*PG%oKSm>p+S1JSt!e@g!Q-u*>hXw&oQ#)<F0c<9Ax1U2=V*n
zWz(bjX?{mut%j4O#(B(~wW3X0`aFq|Qx*ugRsuwQ*m#Il#5F)+kCW-3pBIuAt|80!
zHS2}M`z7~oNDBxpMm>Ag;}_Uqwa&(_wQE99v9UL+xj@Z>*YJwmnhQJ)LMYy&%XqEj
zoH08z8gDBAcuP)|wkq8BbQ?eOk6Q+(dI?JwW!3l8Pk3}@n%b$`m91Ji?mIT*d|{SQ
zKoa&Q&Ds}5^?&S}6j3B4k0hhI?KXvE6^WOWS`BjDRVF&zG$FwMzQIrxglRxd`aUAg
zGSqHSQH0Eqk)&piiZ$yS0~_khKwe8QO1?*l)(p%FoKA0VxW)ZPv8nieC>ZR}$H>{V
z7wEijS2%~KX3bJaHV(`c8QzOfnre{M9B{CD(UPLN)hf$MkhfwPWVs@3_mZJ4B1Gy6
zupVDlaUBS5s<GUa=v18rUmKZt!ysZt9}2BrI3-b1rzSp5t9PiJ$w@{PV~CSJR**U{
zczI~9Fl?<-zdg(@Jx6(zGPwj7LO6mI*YU0Ps;gl%tm(`y5YOOAyDD%L5$u)Qv~8Ks
zZew=w5(hkL;d}R+Zz0JEiE$pDoZpB_8~PqV46c;U>ywLi8<4w)5KY~w*ZV`ZnxLu7
zg8Rx(BX5GoO1IRXj-pk0UjJSO>#0dH?AqN-)ZbYOvhi!2pyN16otT+tG{>l>&@?qx
zF7>(ByJ2laH{9JnCf0Q8nbA@Q_;kQyXYA2!q)z2&s*9}}lNH&}Ej(P3g7&|wj{Fb0
z?w+?eFRgjci@l@dGGVB+<H}}{{qb?hgPQq=0W+`ljI8!(VtPscM`yb?gO3cn&v*Q^
zeKvZ)e#E*7dYlz2@4*c&;i^#u1N;%3KF*C~J}ytXVzK2+8DuYmA8D>p_1_9WyZsXG
z#Ved6Z!fNCcG7=~JXtCr+EK|7KOM#`a0<W-NK-r;wjpe9kyt68M=Ytyt2eIYk1A}0
zo8Y*o_}}&=^B)ZS`}_Y12SmNAcnP<tW}mA`=i4O31@q9D8mcC0C6e$tfn1o``k7EE
z*yimme!?jeuPJ_o9Is(dD^_lP+W-ALJ?NPTt{DspcdQ7OpPW~J%j5qFSBTuFzrRY;
zbYAl_^v-DWXB!cQiN!*F&tWiUipI;SxBHsxBgSPa)&gJ#v<AYfHLf3is^FADbglon
zRkQj8j^1!Otw<^0s#@|GxX&`aE0_?G6%iDj3N5i>spm-EfVpQ|<Kr@n{@HQcj85ee
z>+n&XM_&{fQEYTf$fK$vGm6P^9rhnJJj~4qYP5U>M+b|9leu%KIN7OZDR;z&3~&LY
zg?)Dq!Zo{p@pZ;a3FJy#W%UWjXEr$p&?ro7{Yc-6&C3$SyZEdMu@e6=<iNvZ+R!@C
z)6k`Ms#b9sLS3TG{?t#$Gtrz&{GU<x7o5XmMh?2+8-`ykXD*}OwC`0tDxT%1Nhwaa
zsdlhC-?nLmhif0P+WFqm<}K}0jhy-zbZK6yj)O!j9cv6_?mVhovAU)@jkvJAeLA+@
zbmJZLDzuPp{Eu6&eo7*)QgIMDHDU3`R`c{5>G%s$iOUx=zw2&d0;R7&x3G4HICvt_
z4hpG>c>B;)b9(0+<<Kp??tx1_H1XlcT{K|G?P~WNhk7h0k$+?GDHwFYg~Nq3BcR_N
zXpn;P68}B^U&?>}69VrGvUM)2i6zdBX!DP~011a+5#d^cwb*i+Ssrr=)5ajFAlN&?
zcI!Z5ZiBMv!(l<3!B9ULU7*FLJ(~TCgT}$vZCzDt*k_F6a=_$cN7nkYjXUc396?E-
ztmW$9hnAafwsrOiF~V6g28MdkPRWJIiR+f#rjIwQoqvrgZ16XIq#g+Iw?kv)dVee0
zZfv9Ps!Vq1s8(yw8R9~3%$k>kQ<PUL)tcOdXIiYYXF1f$S~13m+9bv2d07LRA{_m0
z;ZU*_yr5Z$%5_oYvGBfwgIboBT%(jq;|1H@??op$lDokPrf?4m+xaa+9UsBQF}8V$
zRH*3y%dDC#)n;7bY`lSZlXl03)?0Qt$n7nHvbIB?zCUg$IAiGUA%pNs*o~JnAS13P
zu!tH*wJ%v|iw$o$dW~I7d+hz2lVTbBeJEkI(VD;+sF&Kx>xmEf);DTxvtE>uHz6CU
zw;{|IiD&Br`D;bAhNoKkCRum;GabuPmPXhc+beE|Go6v|alV1G^E(eq0G}&}#UunL
zYcQy;E`57@V6lZM^Vv#VlIX^wLJm5eQNSOS#z-AsDHf;eDWJ)&@`hZBO9utmCU)T&
z&Ngk%iVT8f@8m}%D`%$&J0pW-GE^6MaY$-l@hJZhxTHn=+ELsIhRRKW0G6c}<TSq+
z!~HYfv*P%wVybm1?5Kz*QP*+JK21j=o=#D|D(^RDSB1iIw5G6hUHP`}jo4}%;GK43
zj8syzFz1rX9<b`Dta|b1<i@jp+tgi7qK!JYSa8qJk_kVA&G(szF2~F*Rc4Phj?mZj
z`IhE_Kdvg+L@U<Th9PFYWhXj4#b-^@+pb!h)$va76J@$S_^d=>on`f@T)^QHC*e!$
zGSV5n*)eLV5ywFrYl>sx^7f3M1`*z;Lg0g^y{{FwmqH~PSgfA!#%XOV(d%e{9xl~1
zZu}ZGoXNZ>?0Zo0WKTP`s%q!x3)6}=6~EEO_{X(hP1@|N`B==@Su9JdaE;hzOV<Kf
zicrW3d>;C7+ce0gc5>d3k2cO<rEY||z<`yo)MiD?;hT;IJB_p+DHEr5WHLqX=R0<8
zac75ob4%`>Zy>$*0|7gH&jVl5>+I(C?v~6d8cK74`DgT;aV0CtrWaJ&vTEOwBqeVG
zq~d*+C?8ZLd><*cT~&F>B%fi%%4=-1(OukFDV`;d!H|(<4lQBK%jtuY$7$4!_MW~{
zUxQcZF=nTJfY62T=T>~^^J?(&+_0s#@u_;8RPcoFIXpB(8Mi1In9&<P?Xg*#NevGx
z)bv?D6M0q~7&YId%JuQbx7Vtx8vQmK4CXeQ`LDPD317u%<W<+EyI^BH%Bgq+n(!vG
zL8s6U_Sgl@g)3!Eur8`hw-E^{LA%(%H|GVpg6mccj2%*P678L=^kbIuoRYXQM{Ft-
z8!w}jgBX%?7tJL?|JL#U*6%pJ5KP?FXY=C@WEK+Z+0|o0Ph5#PgrtmubbS&}Y%fbB
z%U@wEYHVa0Ph!Bq&!Tu9|89O+Z#yR`A#n9ciWTjthtRvO)Cbxt<s6PS*t*!WS2+fx
zlFo?LF3uT3US~6q`x(xI;P_;9{f;_D78Sr;ChLzu9G;XM)>3-Jw=dVm1Zj+gSd*7>
zYBsznoYt?!3elR?^<ugrq2DqJZjR@@Y3?dPm9Pz_Hf!;%*^5A>H#|elmujS^>q3%$
zfwl9eySV)+pfC$OW`2~rxk=lQoe!SH<iW;Yp}Lx#VF7|gQ0Ih(#k);a>(qj|pMEg#
z7_fo7Cm2h1)_2N`tuqLHz__{wOLOWcV9vMz&k?oFr4V88g_Ap>hzAa_)i7kr51#Ur
z%C@C!Kd^*<%c3pGNM0bub?~6qWFF5kGDIa|nB;CAnbn|K1gc4P;au3|H|7KQ*J#rk
z4DU#0f;+<@YIYs|`*IhnUhTBS%hw>L4WWAAo6;H<x<SoR=)1$VWESo<Sm3glN5|T~
z70w;88n?K3-P6&Qio_~iw4YeHc!xwc+3PFaZ4oA!6ufxw@UCInyb<lRO+badw?^!@
z2nV9kd%7y&d{b}B)A?}F+}%=M@POqSUpTq{N00}13;H^ew!v58C6Q^qixHHQ+(?*f
zp0{c*khxn(C)tP*D%I<s5zH({XiRc9Wx2aMoEQQMrN`RI$hnt{%m!n?l_tgg$3`q|
zcGE`tUp2Rf2oVNgC4D<H(EL+Q{)3OG8WL8QB)M0+6z?GRP2r9kke&p5ws7l}8qy^z
zSXQ`a@Uc{aKJruBma);!^rKPyDF3Zo?~OZS4)JOt1laFUG&TZGX`kdDS+qKoKGxAP
z(N5$4UY7s(Nd@Ik{3HeLj255gp#ZV3uCa644|23T5!-&)nYr}T+1g2W&|~(|!sl*H
zg7*4FOa(?YF#_@OEIO~KZvMEn5EDK%<2R&WIO_Av_2=P&AlZEqY97ZQcJ$A0M%lV0
zAO-|?x6i}T4C!qdJ_(Bz(b|cw3DZMbFBNe)g2+!ZJ-{}i3yM9Hx({<$dYE9+&a=S$
z=*j0OdExu0umZ0XB$4+Bu-%=sLHE2yxS=hdUxOufQL#ZkjyEA$gWjXU>ZPElQP0Le
zLz~xx-&z&K*BYH0IV;8mMCYCE`P7v;L2Z0p;Oo@@8(7l}m_4<w<P&fPUD-#QVE5o+
zNI8P}$2*S#Yw1T0n1{h+wWl{(^K^RR@yM&_nTvwq{)4;*MetKAIY))|g!{WGPIhRj
z;h2RJp_!70EwQQTiR!B*1yKn-rLlS*2=i@U;;%PVWvBS^-fpq#Y?O55(sfmYTuaoI
z^xMzWjw+Rq!HJ!^ZQ7o;wK4)R2?@UZ=oK=JRBI#2pdFdw2T_qiZ`nbkIh>I}T7H`;
z2c`7mx+pVa|0hUMJ$LW@1BA=Ip2S!^7{pa^3Ju(@{>=H5>`k|19kFv>Fh5J{O!uT(
zZ6)XhQZ{I~{lhD>=&L85$eZ!`JB|bE@FobLzuTsA59YHG8_OdCJns9F@Wd_l{a1uw
zM*21+HnGr5qd<u-s^?LU&zwxLV!Z;@4jYXTpXD%D^qaw17pt|x%qA<kuEd9e)_qAb
z3QLtju^-q;oM-AECP$XM|5fx7SF4HYC-MKhl)U%kP`DQs6yYgT$~xqV3Xh$E(5+}`
z1r9rvDU>b{U6i4Ka6r=RYv}8R!ak>R0$HFWtzHrlBkQ;>bJpODRjQKAZXOZMK~Arv
zh(l@Fq2!lRl%5C4_Q3q<vobE5g|z7@QEu%%xV?UILW`<=Ia8`hG3Ak>G_Q&DRgzHo
z*27<{et%l^{q+rr1tgS94Ru`Gb4v~CswM=AW*oMuTD*;z@@u`xT#LTy;mZnSH^V*}
zuTjG!JnoJQyDk-aJ$wC*qS}Zd%V#vJNXDq?9jV3Zgm>h_+LN+O&@aiw{k+CjQ&h*d
YuRs0u^#0xB-yHax1AhSrLjOSgFE$m>^Z)<=

literal 121414
zcmeFZWmr^e+Xf7Zfha1epkPpffOL0B=g=V_-Q6Ha4HAlUcMOemij*{nLx*$^-3)y9
zsC&Qe=h^Rf9N(|+&&zQTGi%np*1F@m&g(qS8>pO&*sU8xH_*_~Zi&AXRzO3$qJ)Nq
zA#wdGcqgUk)C~>oMv;k-kes-X&?7lpD?<}=12nXk(8%a(N{VW@Z#LcN>+d{z{0*~S
z+zLIzxBn8=%WK4sG4DuR_fFAMAy6q!d*ajSQ+R`_idtmICs0oV56hA8wY=PLt51&V
z`Lzc=wHL=17iYV@J?>VFPKF4c!z8BLXw+}%BeR-2(O$o_eLaKDn?F+JOyG(3`OS5z
z&)6foS4I_NR8_C@8z@|8O*vgQmYOajdbhuMaiJoAH>mUq8ow?T<&95Q?o*M9wbOVj
zMO{WKy<JXB*Zm=MTO{H>V;O%0X{gN(#_bTBS$6#wbG#pBG$Y{XkK~Tr)fmz6o__D`
z#3xI)p=zKSm+;lY>|EyB>i+r_FD-cSieXL|-a^UIdduc4*#)g(+7pW8<oqAd@?fkc
zl>}PpCHix|$~iAd(HeKg08CD*XQiL5m6!&2YU88s=f4u4&F>c{z-mXfbBK`KW9jfQ
zw>%@mI&NqRs$TgdNWme#-ly2oF(V>VN;OWe7j6(c6Qn85NM{s&T$)?AQ%2>-BhsSS
z(kBTGj-A)E*~=I2m3enPX=jFdozbN#&GQ4<XCvyOM!k*uqtgW{-HP2k;?SE)`H6gx
z#+P4WgJK`%<w#TQ?8omw6^<nB-0`Y2R&;N8$-BP!l9WiC8WaDP<ZEuVZV->u!Zkl>
z#_Q<lZ>~HMU_K%C=GDA|y^j9%E-evP`kVM?TF-GQv*(N1muYcJoN>JO?W5`o>Yt+N
zJ2}q2tTPCm77285>5N+OCo$MVJQ~k7hh9cwp1jQemfL_yu#e9I9W8_y!&VL9Yq8FW
z{`?U-(JQoDDOfkX?-vWM*<M9_CO-0hbSG%&!L`N9=r7Ul^Is+K$$W)Rg(mWfX#~5I
z|H=p!womg9D&tF(O={NYn&`t#7F%e0Z#=gy-Sj?uh3E5#Ti9>;{nd<X_l2U~^R)PL
zKjx#txDha$f<bjZoZr^?S=m);pP*F692}Uxr?(8hQL3aIq5bVuzpJTi{gn5-pS`#|
z<SqS*IOdk!k5}wCN!KWw)vQSpZd>_E&QKrSnYf<pSM<4ZhUDl9cQBf7@Qb@1EjM(Z
zYLekW#lr90FMaS#SmHf(2DXU+=ll4Vw|8)k1DkZ0?-8`!m<z>h5!S_sdERZ??J9MD
z@ln>@!O&{xO6yJC2EANWi+u8-TV2g<dLK)$O0JCHtb3YYKHxrJtsp%m320`p$zmte
zd_;#G9i-u-5jGx@*IqtbW9?C;%S}?`UD@LHLulLG9lHUo5!dUw*Hy2}(_t6<k8t;{
zA7FS1atjEmN>e@len$q^@=nrSQD~>Iei|`D1Qi(_h2;~((~3JqcTb_|WccCFI^N=m
zOOO{li40$Txg>sD+^XYsDf(z6cl>jTBC_c4u=n)gZXKl0V0c(Jq8X!~WSeI^NGVf5
z$WJNcD&fM|Sd!v|CSMk0C8hIbdCH~HsL^ak_)5@3bVd?IT2j@g-8E@r#<t+44O156
z60H?=%au}Mm0zUZjf|#=rb(7-NaxNMR4Gx!gmlQM6)B|KiW~6b7n!E5W>YKms7&U$
z%9MwN_TO63ta;Pm-w@`RG}ZT+LFzuYD0O<EP0T>YJq1OGozgpHjSS^NM~Q*}=nLr>
zZbMEx++C_q@y8}{z0AGL17!|_t67sVtZ9XrhypqVdZi+j#)5P4x0y59AF@hB(#7XK
zP>2o++EpwH_VC0lvFMmGlueXlsQamll_SiW#|Xw|#w5!QO?BWTLj!Qk{tv6`-#M~(
z6{2(33gTh4(uZY;6cVjIMcMpH>CNCNyC|Y}%@RhDrDJMBCu1kxC$>llqzqF2vf^8+
zxA3d`SHo_K`<pkDHCy;w_zQ`;iZX>n5y6Qj*(iuUg(wC)-E+DZ784WK5|htj!g&7$
zgl!>qF?J}9mpMJoCDtGo5<AJ*9{Z_hu-hqKr}7PL!gGb^owS8&U!r=W^tvXav<6EC
zwX!9%RHPgAMM`E>7u4bE`|3;uLis8hY-)|A45iS@QN2OkUHDK3>!f4y#$_%@?W0<_
zBdw#=I_>(EgN1|22RYd1*fbB~9%^%`a|S&)WS??K<La?jU;Dne*E-s&wy3$py{tDM
z{-d;O0Xx14TffLi$G)}Zq^i)GXXRv|XOy;mBWGL(p^Pv=uplloNf`wiuE!57>HcUs
z%-LJse(gfxd_F%qm$}(`2-|U;wOFRzo7zMM>-UN;JeaN9DcS#U%zPAVJz3RNRnQP<
z^~NgIDmxjb6QL73t?zaD;`N34MJihM8*=ml?~%_xKGS+TIdgKO7savR&Eu3k&*6!|
zd2m;g;Eb&0i7>%8M%<+P%yw(t0iS}6#0^>eeB`v3hx)~Ivvk#T8++S&u;aCOPfe<{
zj=pV$UUzNr35*Lo{J{Gm{t2mYtVl=tSenUzU3y-+n4Y_SkiM&)g<f>4Hgqr;x1FVZ
zZZ3W9u>E%5cKjy~nG4PA>MM4&OR>Eg%$wibdyZFjWoc7rf6%i>y^#(S&k;ZBzP+Hb
z9f0cst;Gqz&}_nP!b+{Qj<=SZe@solBf0aET88N5!ON*Q?wHv~h43APZv1GdPWII-
z<&4k_zpRHZPoqdaY9|YPefgFBv3z8P_!yG|6QO~lIBn*YZ%NN9^EcFlN?(nhn^U=I
zJKAAiwg@fk?3w5L>So7d*Ja1|JXqV={6puHj<mdYv;;*OXV3fiHkZg_$bHByB=@5W
z)yHasKjqtUlyTd*jNfR+Kc+cRxth;i6;wuT0k_JtP$4YxtfVlMmTZtY?>uWDz2uog
zD@Xkve?o1op<-67J8YJeYnUT{yc%JaTD)cZ!9reqU}@iHKbmdCf@*ThLjPM^uXJzx
zfq9!nlsRH*wtV4LiG+=|&Gb;L261Ic1-_+QrAO0&BzY(~KG#dGc`jE8A&Gb=qK%2V
zT{|_2(GA<=ofECG+Mx}iQQ8R){jkHNnj48hX+g4lP_G(|`*NeI1`3c;!b&!$%|VZW
zQ%+CA3(a~RJFkIr`fFs@T(SL0&5Im!3kwGdN0qRKJI$`$?%}aV7H`fha^}@^x*B>6
z+_e_=oIj?=Y_uo6#XII0?2(VC+TPZ>wdemuG{?qfPEJumQPxh=ZWx+^Ye9>kz166G
z;rb)vykkevG@CV%BC(ebxgj+bJELr}*7d2w=yZ&rjE9}j4WWFxe$czyv_!AH)o5M#
zcF<-qt-bYQ^aNdERd>ULee1Er93(sMRwD;;sDEW*)VXIteb-@ljaG}PMrL}e#^xYv
zvj?lvxw3E~$)Prn3wDY~tafrm+}OmPkZe3W@kAg8uF;U{krMDKcq;9vp4z}S7aNBU
z*$)#|_c!WK$Ywj<@(N79<14*5nn=EgI4PTto%*zje(L@4?fKi`<UW_F3&9h;J%grX
zYA=MV!NKa0&Xkwd!=Q(&UbJq=jh>TABCFeVcJ9Nh4_MERJlakNK76@NM2hYC+H3s?
zZ+B&M`AgqXU#3oXW10J{^OG%&wn{67hG4#_8;_RXd<;i(wnIZReSo&NhW36@YVZ5p
zC299-!MI1V+yhQw@0ifsG|&SzG5N2<oYKZH9x_{@wT-2{v_j)zr)qnB!8Mf^2BkYG
z=y$!l@gU{YQA63s>#H#7h1TT7cmBKkRwp`;va5K;@on7z!3G(qiW^ExqfvvuucKk0
z6QNxJf1!hKUUcHW{}x53Lc9F){Yz+Q0VZe|zds`bexttLgKyM1zkXj1_eZ-1Ug3am
z=afr-efo-0%H_ZQ#*hHV&;%5P#KpmHMSWWX14}z&EBkK{p$PB>ru9oT&;tbIsBd&}
z1+p#B{)mZ^s=cbT6sNwG1)c6|D?I}`XA5goJ80a_oZwFj1AE;^&KBmDcAU;UPkw%a
z6a0;OoBqk8pC7R|<9VVgE%!*s%GTf!E8Ppa7f*O^JbLtq+xE2~r-HEP@6*96o+rlk
z_ST&A^iEDrbWTikR<=g;3>+LB^e-6c85wE8Cur?lEbVojX)W!Z{%Yi}c7zS=^leS7
z?M<vKAEDaS)w6Q2=Xvr3b)mn%e%+^mv&sKl$<prkV}S>xM;)POpnF08ciZ4pZq&P+
zawg6O=4!$w7NF0-HFz0baIka#JmJ3%{m&);I92tJQ<+$q|2*}NL;ray#LmE0$jSm-
z)SmZ$2JH9Ae;)jOA~!wi+5hN^UmgAPUC`6KH@NBl9yQ(@#99<>U?3lu2+M-<3Jpv%
z)Ym020nn&^{YL$b=t@}_z(YghM-vwoP;y3JpS+fU2N^otetYkZov?t&-4{y<m(8$a
zUTnoEiwJ0`@Lzcvh<;g5RHQqs^T}2I?i&<RIXO9Ro?MlR5s{ZdPF;8$4k?-#))@G&
z#GX&~8Lh)KvcFZD8b%LhTe3W>GGxZM!;gmkFF(AAuEf2UD;Nkw`<H{L6Fy_Tms=41
z_mj~vg76-ts*~aAUVrok?b5&ez=R<F?W(BvQeL9tz(fS^`v2>Rm#%}$lKkIgQ9bZ~
zo&4wI`oAgldnx~aT0pQn_m~WW>yZcJ%2ar1fd!RQW2_^u-Wn%iqSn3l?3F3!xm(0+
zmI=u1c2dtw<l*$RdWE^#+0{VdpK&AQCdSL0s3wsgcmBG%tGCm%$1sWemcZ#sc5=S>
zM+(+H?%fg7yaL0MfTOzIUb9BzLHwj$9}lZRCtjl_!#lc~oQ?8f?V$>b@l`hMRt4l+
zue^ea&gb>(rCs@AVI;AhoJ57633c3cE2muZLLPDrWl6^y(yEsggR{m%*=MhN$<kK+
z@NxW%e(n1j<`lR-tLb+tuFbk#b3c6LoMHWr@#A=_29wKIZx-u^@eJ){UM@gj81^Mu
zGzZ)cJD=)dn|80*s2Cq(9XHpfuip#j*>1r-INfYKZS>2_FAcijs_qxWf={AH7B7%`
z9yJl@xNo-*_3@rQKa>*mz5lbP{2%cPmOs`c554;{uzBAg95GH0SqFF6Ma6x#)m*Vw
zSUVPY_o;cZ$3e&QY0uLQX-Jk=c25jL^N^a+!$G6E4{{-9%x5d$8M`QZjd~N*Hagr7
zMvW@YcDhuE6{E3Kv>YI(Ki=Lp|MA&-sNU_cW&%Dvc7REl@apZ=TRS>>!4EigtSsI}
z2H{p+oS#~NUhcy1I^C!os&O!robGyi?REv7R?X|#we-27#`B$y>7`^Z-Z36bc%aX(
z!M;ic9synJIX3>UN07jm!^yRpZh#6mQN)|;Njxtzg?I9vZ3%I0eZIUAsiF-HBv5;O
zxAtiEU4cfWrMylquIu-UVI8l1xb{2RiWr?lU(?*61!6vvZ)D^4gVO1z5{JRzTQJp-
zX-Uq@kn?914RO?kkyKoB!A$EuSadl>%{jJ>CrHJ}Do=VH&*KH^In@_SmlU&Q$|s$s
zCslI%3mgy=qlmg)87$~Vf03yn<gf^IS8CpK-ei}>`{S0i8)qf&<!IkYei8VYcp|94
z6A8cga`#ulNb$r_dYm(#tDY^BXg*S`8+&;aOhb#k<$;EaF8anYgJ><*pn+mEnCSN{
zbRCcVNEWl9?mhPWbNM2%nmjY2HQ6vY3=%DeTuu*@3M+%re(GkF7fB~+gj-{HogZiG
zRP23|TnfFP^Y|8pRLm<d|3XQJUQId-mzNKzcLjN9I-TsTcs61^FMcyEt92%}l+s?S
z<2-ZaV!7I1MAbAm)7A<9P*p#KWxaYSY4^Nz*yWi(?#z9d=|@W01A?(oYGw#S{*AiT
zp<Gjq6;G{?tQKQ2-QOcJ)+cK#orcfAgtd}T@M|%JU_u}vt9ABLuHUr|m9$S6<E_>Q
zW%>5s;id0udm$Z8kC&1y_Teg(v=t*?;@GT`)F7D1!zs5ud-|M*F7qE^Z$M&weZAZ-
z*H}3e*z<W@fP<R;H5cEcBj1jNkx;4(7GFD&AN0tbgJ3mV_)Z5DPl4!QCCjrPQEoLo
zMXBtu<#nM5N0Nla5)XzfOhrZ_y59frI$6o~fE&F?V7E!T&>A=sy3O;tcB4W=70+W;
z^716{$-}{E>PbQ(Q4ygWCHNkj>S4a8k^QvkgklI~!9L9F<cs9#H=c~f8Dgutl9kO*
z2p1L{7<{GV=1<gMenoTP%B`Oh7yK~3bE)u#j8(u6AF0=w&1{SoBbEAMTZ7c|+ErLy
znt?8IYbCtUk22Yv@osXGO-H4%N3@(Y{lP9}qP+rMx<##xvPWYc_4BIE_LkB@ty+qT
z85|xz4a;hBT*A=pw0CsWRl+KIiF?74=f@t#I*3gAl~z0MO&p^uD&z@n!G+W%KVUq#
zr~MrMbHmwr5t7|`R$)8`RPM<AVdy^d9L|d?Y`m5f+G|x9mQCbFjB62NGbEB>6`MG@
z)p?CzImt_%c-l2f%_n{A9fZhGRxE{I3^>|j*DUSbzp#OR^^we?9J0e45ticS<t;En
z;ABA^w8ATYgcnz@k$UV6Tw#k4dE&X-!`9cb%4-X)mAas-UB@Y?o$)6&jA{>~m8#9q
zDeIj`GB!UfJk!Lto=>W9(0L%zr76hLRM03^*vX+zQW(bTpp&(!RgmwNS+nN8oEktd
zL8uHx8ZWk!dOc5Yo`qJQ22ID)rN*g6W1}J<I!31!zg0oz<G+&fEy2(A%v2}s)!}a2
zLwPDi#<6)x!SvMj>mv;61hS8OO~^AQj^08YW|>;al(ZIJ>5)E8Jpl`y{G`0kUa;J#
zFDW+0+I(g=Cp|QXR6*|~TE|m7-|KiGnlh?yB^amS;$*lH=k$6zKJOrOq8Xbjf9$9@
zX<~NcqbzTS^qBwAR;<jAwo3W#JvF05{D;Xnau60ZZKp{GXe&q%PGy$tNqb9LPLucr
zcA%$*?-lK9wVF3UGDO#^B|fJO4b~LS(yRA{&}z=PO8n@JW1&$u>R}n~&QH%+KiZyS
zLQ13rWNl>h@mNjO%hBXBs_+rd)zf@-7KC^a&K1CXVdpaQ<h_H#YX{?2VOE7d+KLLd
zi4NC>=}SU!^5NvVbEjmfaVh#&e@<R+fNCus5e7B?N~&nyf&!6-kG<1PVRGdwBzz&^
z)+b`_HMy?)U$E$(pKV^87Zpt@)w#vH2}D9?NDspJ&YfM$jT>ToVK~<jC0onvf`SlY
zdHgKct4RCr=d&Ojy!X@ZUkYKHTF&E{WQU$R&%uMkcu$P&rB<N49w$c|%V(46<zNys
zd$r=L2)F07#OK?~SRVO)0Xdw6cJR1ESsL0@WyQ`0(Sq$LidS1tAqMuAkCeQLP7cAC
z`MBx3H1Q;9%6$`wjMR;tom~?lrm_`;bdN>3JLuq#&*H~(>OctA5|o<aWD-`n;bq7@
z2S@rS>g5I{J=KmKYwenV*`8ZGKkA!p*(O8;4h4=rgb{yvqTb3A^>ETiT?`(`X}Dio
z!tCK^H4+uxnVK8-!lS;h@%%)vrba{Ob(Jpd7gI3py&FXz%V%d;vsY~8+T|1t-P_bs
zYr}=jxGs9M(J0O7aIS1FdmK)x$bHj}{2HxkD-L-7h9M+Q971Pusr387#1{EE7S-hR
z%;v{JuewAxMD@TyS$Dl}XHoi`;~bt?mwh*x(wwc-Q`979#sihgtpy32gx?j;k1G-L
zn9$2S5(Qaa>jAvdODquHB#;=ckbkQfH6*^Ms>X5D<$*j@h6jn~_&W&sJWi6k)fHj>
z4Ga<7nLL{$44yu}o&25(?&>PsP?I&6|BfI$!8Jh(zj9@<jWp!J9CXFmXCR~~R&~Rl
z*fMYkvxAsT+4Ot%E_^>kW7%f@N8W-&@7W}Q|I4qEpH(WQ$??jqkU6eQlMxSYboFs8
zB^Z~4%cbgcWI4M5V32NrXqDL(8L)rZMkAoErD$M264pzL$X+?x44VBWzOuJdOzYu4
zU0H{u8OV!q-Z#~rm_V2X9~Rz}l+J-Eu$|Z1ffex@aSQ2zIn2dV1|4byNd)WU$*ZFJ
zgB|gKx3_uOi4Fw4#%F2QC86V6<dUj|^k3|){5?L%-C}fq^0N=gP<^;!YLow0ALh+{
z;cJO}E{`?0-AY3?rxdO2YO)u0SZsOnsR0M(qW>-{jJBIboS`z_U-0^AktvmTAKwLc
zkUYMo@wTt2YrYZ$D_{fmMNt~Fv0tUz2-2}ze6h+a1ToIdN-b3u&xENvyYnXHhfpdx
zByDFg=r<oLN0vQjO@#@2jwmxE!@{L2>(<`O1ZEFEYxgzPuXi#amCb?KYRo+kNIu$n
zkSfT6G!T(K^lxy!A+00YiksrH24>ia4_OP9P5dx(K7Zij)Hq6EQdHT?Uk$2c+C!R_
z?cY(6U-VJZi>iY$vvjYR_Kz;QnRVNA``2ON#gD}^y+(#r1k1C8MP73H_E~G&ttsl;
z`Fp5DK9Jv64}_{LM#ag&pRVX~wYsk|EHsYKLduD1Qbq{EGhI!TA<`Yn4;v9B{fo+i
z`(=YNG#xK^ri%Slu8YqdvB*K3IG>U9N+&PG$XVnc7S_#S!u7Nz@MY=Ri_9Q{AP2`g
z&ILd)byWRSJuoxqKh5e~ZN-HV*^;$PIgVR+a;;q4B<1A}0?#$1OY$>dJw^qr>nc$F
zzXF!;=N2e#x>s<3SJfJmReqbZt?_by>Uqzgu0Bx_W4_KfCLADd*7Y_oYI%H;w;_Bx
zMJv`fUAvW`l`zj87$z=PZhGvi91Mw*jjgeTH|Uqs?Bo=f%M`$E1Td?9mC)x>Hb^~R
z)g~hdO@+x?aIH%&Witp5N()gf8*oWb_!FwMwg<zj-mhde6i+()=(H1`EM*8n!Zfqv
z+p3fzi`CDtFvuPAb_Yw3B#W7o%(%s~=t*WVy~umYWVEp`A>;|WaiK*uC8qm&<dCLS
zBbh=o*XfY$DL4nJ+c|4IXGp#uRlp~SQ%$*fef|6~`nT{>R}P=vjK`79S;dOlGJuD2
zA!6<;3myubiFlJKy`@V2IiqihzwtE{s!@MH^P2||zO+IZuf)<|oI~}UZ)=~ud`h)o
zHRos~Q-YD|aYl5RFRNkqy^FM{bL1&uFQ{Rt?v`}X)#826PR=W8;QaXNCR|hEI<-k^
zw0x`1)pU>C8c9r^a_37jT}rw+B{w3OhZt@?U@uPD=4ByBje@}KWf<k3u$_me!Db~_
z!WxT3msnnZeg6ftwW=eE=M=Hs9m|Bwa?mEyXCSlQQz(9+4Ab_-^=gXfWeEP1(^*v<
ziWAGIjGuXsZ5NT*tJupJ_mP0_YGSl@O?ou<%Ep1j)%Hz>kjA_$ea!Z3*^mGjFS13a
zCEeih3{31~PQS>sapq*L&LLDcUZObQwes17V47kwkr9L*;~RU~&TxqtGTySLqmhid
zJ5RQYh1mz;_AouG{5H9S)0j!N&(v8VAuN}Vtf?%iB9-h@c>K1f^qP%D0j^ce<`<6T
zsyze0@LCwf?Z&s9QFU2_#eNR^vC(CH8bvV;ZO)sk?;=0Sa6V%k+`mcBAOAvEaOS(B
ztn_m%ovSUh5oqvfmJpYGvXvb9^fP20S>5@NZz?5;^AB%h4I&Dm!#$=xZpHpB(rt_a
z#Ji@q*Tfp<w&{cHk&wHE5@YY^8`wW&4DTbpWxl)MQtRF)`<ckmylw9S;)_njXYMye
z<sFU@VINGX`$g4zC1ja&Yr85gYyZfJvq^E1b0tWiJ}{@|s8|X!P_BefcEo?dC0U)a
z=vj59f$qcl*m~A@>R$UK-BXujiO==Ni}9Y7pA36CLvBH8Q(?3Xi2w%=6y-^@ISalZ
z`Iw|UYbg__{q;L2#U}JaHYLTS$VG${OP@7xTdBkQQ(N=41<iAoiJ6V%3^gQDvNfd7
z#|p#GOeJOmkl*+2hnu+AoFu|b@A)4^-ySpK-j1rI-GAmCKWx9z%z2`f(}PN-9s_sk
zW|LRuom8_ha^nzAT5tP7+Vs9gvOGM%`VQ&w{Q#S1Pp&a3e0$s1%;EG#m+2?QQBsTx
zfnQVs_LY}8Lyzg=MSs3T^RB^_!(liR(!ng+8}DpRij?D6%l~Bi@%r`+VPv{?WV!^#
zGQ;vwdhZPB*^2Cj4&>u@R{N_k?wzmQ=X*!??j8{9*7$H0IQb(b4&}-8l9>U+ji=`$
z9`2-eoCOR@kqOY`0Gw)xdu_O(W3rqkFm{Ll?AcwSY5!i0uonRbpy*eMUskG{a!fvq
z@f;B?NtQ6p6J%9-(-tN<tTMb(yV=lXE)$lu<_T>#Q}>WCZi}Co#k6Y8{URs51Y)gC
z_7SU62Q0(1YH98iUakT!M%4FDxhy66kW_}Yw`M!=fl_OqS5sd*t4|02Gm(z~---c@
z{reW$N|ckAak9FO4)Z*RDe;W<K=p7Ni!^EU;%=z)safD*A&h76@Z(Jf`u4oS=6%HI
zCnMoBH{r+;U9}U%aX3$r+Y@F;y?^Jaa~)#*s6N%9d9!K=;={~4<={SfG)=R=hWYjg
zNqo>l`77cqQ1=U8TRayF{+m%{Hbp<YS65B;vD#~1qbrKGhy1Dqlsqculbs{|R*8zX
zODln<wSd`pML~6oP{`@G3tGFd)_Av!Vb7BlN)1q$BTH(?;#e%k%EtD!AUV~KFLjf1
zn1`?o<*+)+5O&(09c`bF45^#Z#yf!yvHw`Tww5qaRkaviUa_3^&NWQAKz$S-*#u%f
z%0#;R26WJQB+Ev)KgD5OT&O&ObR2U@2<zBaP?%~DQrneHc-Xu`40zaeh-(-`LeA<l
z-SS~sr&@;}iJOO`)C})`oVM+vH3CI7KP0c)rja<PY2qlsQ-=kaR823Mrf(xI%QfSR
zyXbrYw@6`_s)Z+7LAvZoR}Li({m(0wtwI2Pk^WwvV2OD5%nsfSYHH>SwgQK>p>r!^
zZF5&GB<-N5rtQ@9Tfj-h(UyN#Sb4Gdi%&3?yF{i${C#2e)o*?QHLs1iFIm>9+l3XM
z(m#l>Hyg@Vo8#c<#Xx0EaG0n74HGWvX2QYn>-Dd?N#k>Ues?eTu9O{p4ddZA*bc91
zDE99&?i>}NVL~jg{PFoaIfUMTWtnUXAvQuRyh71y-@6!!761B3iW0z7nL&5M{+&U`
zrvaph9z73b=)WJ&ya6x^B-r_{=6<c#ziuZ-6*P|Tw!&fa@5iYiAm8QcwfscL|Hc0E
zw}QqCMkM>7|I6`Q(4~_2Pk*9}e_j~XebmaJ@m6hDnFoK}&hIe$`=tavh`z9U+rP2o
zUzO<ZmvSYbahii~cC-J>aTpjJh1Ni^|GqF7C+{Fs<0a8axc|#>1rXk%s71Md=lTD5
zcBOpKcu@Ub0{g!okDvylqQo%!$5i^`*;4dC<I3xqOD+G)@&6n0|LE^uFaK}I|8JH5
zXB7CqRsJ*U{O6DVe*?Qp3ct`l!){F8C4^3jUtRK`3>WPo`&gw_GISc0qjvhY`Odba
z@(&`13u_TI6C=agZnU`PCu>DZZP-N2RJeJeb-RNl3*|u2p~GzieD9(kf$RQiBoyAz
zaI%^wbp}MNWMVGU?>SQI!S|VL^+EpZmdfWGD>L~B-Rw^lvNT`nO=K#;qAioNT!*M=
zfisU$@H_ShP>>vKj$1#PVpSdHwLYeYa#FK7cF|ULw-i+`##NW_xE*x2nCkGIAL&z^
zC%GXyLOj+>+8Ild-FL(-&69aO7)w@-frLQe8Lj2m5i;p@fn+KHQi-bNdS<kC9O^o!
zh}zgAo4K}ywkZH>eqCoSl#7ndaZsAArv;^VRF|M=Y6|7kI2JQyigVBlogp5px!H^*
zq(F7ET=yf?j)6kN-Vx9;Zo6&PJ6W2yhdyLNRgvbF<`?JSlAL@Gb!tNHGf}V|fU5On
z00F>s(Gw%MGVG6W^RSPzi)KWLEh32B&bHf)^vW%@Yk@v!rw^2?QK_SdXNtD^4%3U}
z<k6apxQ#%`TJR%qoc1`BG6zu7PXBa&EgC$C2RIk%jMVC9aRF2lF7lRz{|$(fgULTq
z@8=0T<8K*Il*|I8w@)pWw>jqT*mXn*CmUjOt-rAA$P{io4XD#{S%`%4_EZVK<vCeS
zhn|D6=p*qu+Xf#K4Sis8`5Jcj-n8>v$Pl3Fg)dMtNc#gjsWe%p$yz72=<6MOK#A!E
zJZvNRiI#1b3?Fh2oGHV5{2EmgrcIN#uY*EcYR{yN9IJ=?4I}cYXz+{ZJCb{RhZA;=
zK%FB$VSbtpw2~(SKna;1=RIBjxI7rV4hYm`sYc20F{5OUsAVT8UM&zTaXjKUfSY{-
zN}2R>j~=L!2V2dzDNmT~Ko7-A!VFKW8jcO^#3zI=GQ>aTn++F&2TQQ+d{(iP<Ysu{
zG+`rlx>nSPgQU~dqx+D&JlG8yGRCEpk=iS&+v#*%E2yM60SdzILGr~J<={oX&~3Hd
z#U5OwEFZG0+SeF7G)m<JOfUPuX88P2u3|QL)==)9mlq$kQ&d6WOowTLB0lT7WY_N5
zKi{s{`hqnMn7VtLJA<5TiTX9w8PNr0-z1=x(PWI#?vi^ZSx%c%DD~3tq!J9aRI((y
zdKd{0BXY9FfhoF0dau%Ae8~Ui{cv!va1|X3Kml3-`g5ctwgKoX-%u2W)Wt%yjwP{q
zu|V3zZXX{TGEt%uNRcBKXMGnLwLRpk>tJl8_pHj{%8pG}2+2;E)2AWG-nMZZ$>+L=
zcORUilZU3!khCu=K~Xjy1*AdYN)&XLs;Q=zG|x_SlyK6RVi^Ur{>Jphxf6x57w&!~
z!<4dy1q({^nAP$;38E=1zDDBwXfWAHdV=ugV)*x1Rjpx%AvMYYK93W7uDyOCT#vW`
zq;0>zHG>leuj&lLv}q3kBf7_NV5kw}Kz(KyXaw@!_|{A_>XVyq&t)>li*=j3$=lKq
z+jH&XD2)_c;uT^#gsQlk5`e+uE3;dFhS?MwOsZ6PnTcZZgLqB{(>YF#-rux=DTcPo
z0{$T3!;?$G9Yg+OFLPMG)3n?C36lBW0#D7@BYNU%At>A~hXuf_8Ttq5f8w4<s{TqV
z9lN=aSmLE(9NA>MpT%~L9166%&Tv@#FQ8ou;OZ5_eD8m+2=J*>qK{Yn^vnv%l@XFl
zL^rVXb66v2T5Wz(xc|HXu)g{00qa?CDES%vzYhIupI-=&Bx7=gc<s=CzurC_z`|4X
z=-2*{x+5uyK^W4=Q!YR)*ZeZYC3gUZy*Y6D?<Js&0jSW)3dT~yrWM~JJ%qIsOfglk
zHkORX?P_-N)jq8ch>`BM<4+F>TzmK$6%Fgm3T68eI0i|_ffSB7q^$%2E_->dzk0>%
z)1E>uSUY2S?`RDHD-r6Xqw9wsG$o^tNtQaMT1d9^kFL5U_yb7lrNEY;9?xoV#wS7C
zB4GC(4AQ|R5T*hqK@{d4$a!l4VjXSi*C!S2ByK+q5$e3|N7nJoCgFU9v^w?!A{Kx+
zF9kM;DCF@98hzuL?b<-P7@~wam(8#{hJ!a6+~$<k(&4mM-+H#%CqoeLxCW#3B<btz
z4Ubz1b<BY)pm(nk7zQdjDB@qAEE$S5dLr{>lX(Z|P?5!Lr-L&6c$K0p^r0EJrOIc|
z;)em@g}ydYqCb@;IxZ$yKA^2G^HWg$D;amngQ>xxjUS5X?N6UFKf-su9o7vsZ#Z5&
z044m$(=cv5l!$9s9sX8AILZw?e%!?q>Ic~k5$YhRNMrW*vrbqG5%YQ0$x;L~DDVPK
zL`B28>57@A2y~U$vZ@K2WB$FcIb=BH0S+u(EbO42I6N(rObOBhGkwL9cnb2m6)uHT
zAWr_OVry|yT~2Q97YC3E%ViQdN5S$ly+J>|K&I4n@Xh+wpSh*~9;ldD+wwX&9?M-^
z0;=eTXW|D<*GPr0rMGvx?XNE6aqj2Tk!&jq(o9{&eqanzMVM3MLK%)XECUI`)q2Ia
z<tHE&4U;Z`(E2)n)Uyheu078|2B-v}VBadaneXCQmM?>!f!KMwmC)i-538i_Igr*-
znP&7;AFmn62k&&0u`wYJ1J4hQhP9l^fp$7EP1CBJ1LJh~o+F<{tpfSc3*09SvugOw
zAdtoJiD}_#1;i0y!sC2Z|D!;E50*{b<J8OO7^dR<;&m34lS5Y0`Yyl^(VHc_2C&J>
zsZtXM$aN<-1jsA)sxH2Mc$!|nwmAsK+Q_PYKT1a|tR9s`Dq0CNQ~^9lt2*+<NraKy
zvdD~7E(H^4f<iWFokqVr1p6VUFgz1@T~d)pb78!im^wV7MbwJqKGCXrm>M7soQ))K
z_S!3^DF_i}v`fI{u3dXz{pPmUVyuyQyhVk05m67ydeYznIfcReGLoY}&K>-a+eYfK
z)pTuRL_cglF9}G<j=+SlWGUKSGU+&eY|u)f#3GZcx6p{FSvy%%QfyscAN^!8+X8J6
z+yi{|j(&L|Smzasab_oGIn-0m<6w9;W&%h<abPoXW?ZI3#N~jt*@ooHQ5~6O8Emua
zw*@aZu%ohZH&b6QeQzRm#jScl2-SbM9=)rnew=^MTArxh)tq=*D~RSTamQM`!>3E{
zI2lDwC+~nvg1=wgDLlPIGH~EQ_xN5()ob%6a`yC*;>4{LYPIx{7c<n%Oxqy!QdRlt
zXI-GM-(IbaEY^(b7tg(wHe%W;GTjmB@??bku8$<I=&2p%AoUzr?PF93;La#y>?LPO
zX9pgHmg)>k`)<qYel4X(!9!R}85@vcO@*2tIKlGns<O$ePMFIR@bcSidjbeoU1GYD
zBvqC6e1pQc(ZgCYH$xAVk_1&st2S~)tbm7(s%3?e%u9X-Cq4zed5_tE&UU<Nv;OBI
z`4wM3dtK3t)Q$MDI$mkj1j!cNl}K~g7%Sfbsanhfg%w^q!67Ol$jA4i&+X|cgJxM$
zkL{%(yAnffxYm%?)>9PrYB~QBoM%g0W;M<GY17$<y`_y$Yr(WE)J;vrU0#oq3~B#d
zLXZY}0+tV<Xz;xcFWqa}r*YR>je?|eJb{Gl=lqS!^IUf<<gLa_%Am;4H@osqzkA%V
z+V+JJCq0tDZm+bN{~*?;0?oqT^k?|Gt|G3cqD9;aVY_QCo}--I!uT(l&AXc74ooT%
zuXZ+w2B(7_^0ubEBfSA&2rf=K@|zGpPUo#3K5iJ(lv_%E%i0g{T2TmB-aSSRvW_wa
zwEDIrxQ#-Z>epc@Ey>~5hymwhdqthVf$Y$Z5HEG>ut7p?*OkmK!23c<tMW}reAZr3
zkEtQ#1rp?^3?bZIgvDON4HZ#1M9DuxXT)+UJM<LTaHP%ocooeB%k|qs2da%vfJ1Ee
z05NdnLmd7X_#)6~s)-6eh4%1R>hwFjPwk~?&_0Rt^M!?{ps+=mhs9E97ah&=#N>-x
z+ltUE2flf=oSn)MR+DcTK1ttRQo6p=YSf%{eim7aYy96s&98r_{~nA;5>C$C14%d3
z7KDE?lC^^}dBOU44il<Zzb=>IqZaNs$}R_tSNlj>jfy>b?{C3**5v>L(goedCDKr`
zNkvKHp4>ek<M|i1ECELhQT=0Ws{mk{g5!2+DX@%%NtT|J2+yGtNkZrazK!I$mMA!1
zt|u1!93~J)!p7WC*me^VZ<ORRI|*HR(QA~(WF%^p>l7KqA=w>tUkF0~-4?3Wcz%Am
zX$3rbvgVTQFa}u`lES#QBuJh1=W>8Vj~fKuURA5KoIJBX3aiK?v@;1-WGnMt5z$EX
zV^C_Y<1OzAAaVU#G3C1E?1TU&uXK<6;62<p$wy-PJ^l@%NHw}xtxyn6|Ac5i)84CI
z4B#pZl)|3@2_s>}uA|V$Emo<FiD!(Y$o2cqUYP&QuV!GbsNv-?D4ChU5Z^0MYSn}s
zB+lO>Pp_ip0JzD>VCs|cOL<uIrfPM9kfr=iEo4=TfxujYzy4r$+lL;rR;8R8GQ+Y0
zfE|}C+Eh0Ek<YXXP`TO_*c!|bu47XG#j9F(z*~0YQi&>Z`%wO)yNc_<ZK=1$<ss(e
zENyZH8F@X`|AoT%(+L78#|29z!?tK#?k_eQIVX5_Mm&o(a2@!ljg2o`O7Kyx9u$?P
zd$%wl8&5a4ZR=k?d!WzI(Qx!GZExf(p#s=gpab^rNOev=oZQin5`+jn^xe%jts7C%
z@n8uWkh^A@o#YCP2R71@CN3bqHG{HQa#RX$joJ)ZvK)5~mV~^Qelmc0BSkNk0{(<~
z29znj?Z7;!=Gfkf{`d`lGhx+AR(#s)cpL+XK<WXMIw|GFKF-w3{#5;E@zr(}F8NVV
zp}YbY#>rcsLw){*ocu}idD$5-`d|jAEo9>~r`jrw#}6ti3PQwxyvIWsK<%7Vz+G2x
ziXn5pFeSeK?(QB8|A9w3tUUK+k%di@UjRCdiAzC75CB%zg5{CF@dc+pz){S1HM*so
z2Vx3i57eH8Feptv6#6GEu^9EHt!1#v2WTJAHmU@TQ$xP%kNS_UfNX|o2hSORCb{o)
zERwvfENVFE)ci)XIy{m~Bm^k{HdbEIL4=5MMGx!v2r5^%>(zrymbNW3`7Zd(IWdvT
zsbaAwetf>;W6v%8AMG#dfe);_rW!+^d2!`zw{No@J^{ikvUDr?=pHP!kNjvItWkzT
z+su9g0Q)L{Tzq=$i9CrVmiZjYbD(M`E}&uCLEb~Iuz^A>pr)J(%ZBl*n3!C9XfIb5
za?}FevRq1ZN?O@0KGo|&xqgrI_%^5okw^Ima&^yh;JYey+!zzFA!=8eOE*1JE(cEC
zuYgHe2r~fK<=pxE#gyCT@wOz(2F?J=WGa19Vu&i>!F?T{xw}g#-cN8@jIG$|q!|bB
zX}NO4A8*U1CJQ*>Vv-%{n;An0iQ^I+CmmG0JiLx(uAKr`Wp_PQrw(7Mu@od4gY{vu
z+x-Z(DVz3($fHjPa^=PFYF))eoD9$BaGt}7LDIZ7DfqVOsU<t6*+80K7`&<))azl@
zL9h9WY&2Pt%v#GNIv&_1{UZwUb6j8EmO8xM>m!*H)jVT;6O7(hj0wpk97q5N$RWKi
zlqgrN`cnf)i1y81qV40Krk8NQ0r{l!ofC+!K?j*oxk<Ij^F1n$y4eBpH)a#jZg*9h
z$hmP(gZcp1Lo&d+<A1f^IUoK|(KQyv$l{qF=g;|)zQ|NN>?9xyrWpOX&{2!_Har&?
zfwGj#k<N{UAN+;;Ki=K;jF#Jv|8_5b`}{?LjT2n+604I-;9&LxW`qR;s2s%o0HM`u
zRT*<+8#43Z!nU6A0pXug0<#3D@j`Y=Z56IZgnS07emX`BPV_c|1Ph7DOWzfS3+F-o
zc^7D}{S!r<t1BOzSyb-dx1cv?L%RI%zSib6m)-2iOpv4P4+pc!Cp=MNw))05pFD-0
z{B+;M@HZ<PX4>O+kP2R#7&V5WyQE_&z@Jw_e!;c^v$8w9J6)9beSa>dH-_P<x(z^V
z(9|2V+uJKF!<wH`GW6=6&A87(DGi5I(iD&@zV@oakOTs-4cPs3+oWKPTHK?G<l=Vo
z6Do>x!58Gib88)nLSi{9naVRI=gv|wLvmMu-`X#Xc9Bn)-4DQid|d4c2JW3r1RBww
z9q{NyC5H37m!<e8Qj9xPkk90Zbzo;_<WXoqmF=*+Lem2Ea%u-AmbL(~a}@TErO&J;
z%i`m`rCSV0kodgMd+qAotP_OlSz^okPR@YW+iNbu`!qEX9LwndtESesgR1Z`OG%$P
z-WR(FZ1k7te~(FYLm+ISsI-&10_6}da~<f?N}JjgLzr5OSL`3Q0cKHgj}`BzlcwnC
z@ncOvxF$|xhC|o07*JDb-3q<(XYwJcM`?pzP!0i%J2?WcWN=_C=5ehCs9Lz2{!YL@
zg6Qu#EysJw75y8iMiz8lXQtbRMV%7z7F5j!v4S9)MkC#|6wDs}{~}8PHO_vyzbVOe
zHFsi{l+!-vp?$wVb=R^vH%i#-E1#G8uha<g<ErYbi7Fdb=dD-qJZ_G9vS#@u{)r&!
zWu>ASUuVWZZ~KQc=+7qvt$CN;0V*0nwD;RT)UCg2P1N!K@X`N1ohXujy1~PugFLzn
zI689W6A3Bb{^AnIffK2~H?N!H&n(D4di`vFSZ4@OEoOsRA<~JQ<>sO9F8}f&fL#q>
z&jP`cFy23Aj~oq(Q1SZ^@{_b+=Jy8v{_e&Y^MdlBQZVUQJ~JI%1V{;%H)(TX|3iKx
zN(1BO?+j-*L?H?9u{+KSkt*h(MBVTBuG{PnZ=+Z<ASP2<^Gi(p$33ES)`IUeeJ(#)
zp8wv~)T#Mu{_6);>mQ#d((3@XVF5T=+2??=^g;DDiZ(<=SHO-N{37P28%_~s7w3nK
zOh`a~2G@g401X$wzFA?|6E_OVBuh3>h*KQ=&4lqeu8&Af16P(Ip|-Pb*2QXGQN@1}
zVk<)h@u*1=dtQGyHSP&GUEvE*7@Hwa_J1>j2ea?!DNax(Jz`#WXFxv!lCXBteYYpp
z6GB4^Dq!e2iX3~Eod5{JVD0tD0~8r(UODO5)q*nHIHEY_lZ)d%FTlSK;vmh6>MOK>
zuMj++CIHv0u6xT;UQ;ac3A2M1{V4*d>W`wP-0PuOO(Lp*1Y15bY5`lqJ!jGW(5cL>
zmxFR@AsaZ+l@xcLOhsu}au9Rc8w0WvWfB1DL8ZaTX6+{L#ijBiA8^whAf4d@<`WPr
z4HotLL(ubBCZjQJw~a`^W<Cyhpa&Y&%(^IU#{MMBdrUUa2*Bhh057r2gHnJ=-vEq~
z^j;q*;158V3|L-8__PnomNQmv79BXend_-DSm1Vu7%I_kx3Apzn}v27H6Y{y6)}oP
zoF+sjLMu_0h(xEU``}(s8#REiK{Lpw*K%FMyrdNei2~<|U~E9Ad^HAo!OJ~BMI2Uu
z849@4r>E&*yqts6)pkaM`eWM_1ER#EU?&3sXsYHKs>&9=0Glg}foft6hNq^oux1q|
z)p*Wo9K7O;;+{aQ#{&9nzfu=mrh9MANl-_-bsSN<X$c4kbretRget<J=is?VEkP#%
zLK$cdoTq(W=O0(RfHOSbWp@$kiA>xa%$EI=fW82H@B;6-Cn((FJikeYb($qQE5et6
zaDcN(-L=vdE?0i(Z_EDAcwn4y2|?O;<&eH}BKbD&(IdMdP{VhGpuj3)30-a^vD+6+
z%GY}EnQ*~a=je#9Ar2jlQu)14i>l{eTdo7nB_1=G(Llm7U-H~Q4_-1KE@?P>2<Vsz
z)#&`MEfwaYjBG>XuYmy42t3p5xSKd_)g^StaZrbj+KCYo9#>S;ZChO}R@Dnp;K)D(
zv|u%^icHIL9gDv1{pf*)RXw-|un>(v>mcGA%xW=mgC<WHOMvgHH^c#R6x_}xsGR#+
zYUkxG*8%oQh638s%1&QPj|ON{2OHx?vsyTOY(pN8KcHGy+OPb9+9hOu3L;kDWT9jr
zXgk#-%gf%}#R5=kT`i~qb4k4tUDn%T%18Gh3L5Qka*vtse*~;aLNOo|Rm~exGzbM?
z|9T=gPyDbZBMWYxK58QX(+rAD1l__38vkHq^IcwSp>5TP_u*#4(btd-U@@XlUaO%`
zV%!E0O4c&}JS#K)6z4Ijvj$T=r^siRw&|eBpqs{h5Y=_3+_>D)oFJQ#)}E%B(QTAi
zIyOJOK=fw+?fnP&iGL2S3aD}(`<c`;*t|zSTEr~@Y1kmaJeAv7V80&|RaDYOG?XUt
z&N(N!H%~^XT&-qSx(QZx20|9jxa#W<B2tmsD+d5#j%Fu&#vyu@UQ_=-?a~40o(;FQ
z=_>JS*oe|-&gvI?JIseD(syw3008Cj&BlwQWgWv<11Wl3akl7tWdM#+4Ag(Vg!LLW
zhqncmtO2zUJh_k^G7XwCvZ~wmr@)nNPtg~ztTRiq8|27}r_S^QM4oUZ^~LLGOR)Kd
zHCj{3X^G_1EMBrD;4D*VYnIcXmK%IpBCt1rt{uBUlm9*t9xNF#p6qoVyQ~7Ss@Z{{
z(=iy(rN#zA49XBl`%@qTN%H~gmvou51=pm~WFBnZzN?mkrl3qA>xTSVbxUjy*kn)w
zMbU6WI$lVy%9P5Fz$>zorC1{pMSO>v+%MleQr{yEB7MMYv`*7$%s%muW{X~DIV2aT
z8BYS~K#C=U+8fkp`%?Mm4a=KSb-6xvfR}vdbZl~%8CFeypAqRYJP(k6uacUn2qc#t
z_?coY=CyF6%_||wYUH?ijrUhz%j8k2Uz9i!EKf8t8pXF!X>{B$-U8?->Rxkum2{J=
zqpxx8#795}C88>RaI1hJEARezbydY7M>5@n(bV;c!1l@_&hkO?Z9cMJ@fu`d7Da)A
z7|_W#zJV;kRgbwz(TM5;SOi=aNI6F42kWCy%fl-%1t9)Gd68Dh)QQQitIx)D10LdZ
zt(s+(DCLC96%=s=BL9&|6R~`KS49Z5B2EKs6G_pyenE?rB#avS6+I#Uo7>GVs$jFO
zoI!2X8UQ2j*4QflSrq&LaKM)MXWCannFDHScR;%CjiUWie)+qEm7@nZVJ#HwZJQh|
zHTZAx0ELi5z&}EvV>;Ahw4<-?dPrQOjSc&-9^q^HwEBI*)qkLlzZH(}DD+puW_bJ0
zJNx^nQXLA>Z-97O4nq6TkfvSF)ir^C0VBTvTTJaGP^^@q*k*08zswB<NWi|1GC(Md
zgmSJ@nN8IlPw>Pf)B_E;ZOZf1f(Gx#z&C)FYo<tDRGw^uHBt^-^%JDg6JQ5o^76fR
zvCd%k)&^=@+CeEukLC5JV9v{RuNvG<cQgEhj_;g}IqwyCFNfqZ&Hshaes|n0!B@Z*
zcmVb<j>&Lsyav++@|N8Y8wYse3O@s@t-gdS5^(238REDXpMZKX0ygt4%1U(blDEuu
zRZiGSp0@Ly4?L1N0o7$m=tHhC5Mz@$w8{5LT$dkf7$rI|c1f%_gRE%L5_tDuE{xAm
z*=(#Vw*WwM-p7?=rg;WYVZ>~7hgS7%I2%A!q42bH+*=#&13U6?H*8VZ0ks<}uE=s0
zWQJnIWbHU8UOp0MwPfH#+)DlM-<Z}rhyWGeP&*$T2Sf<finQyg$9z7D0beWr5rA9f
zKojVj>_CMWpm{{2oP*U}U=JLnhH<K&ZFHn$cKBz&s1?@l_Hqq8Uiq+b<bHavu>q1m
za=HW@V1ZE;`hkS{zy24%XHl+Py);mCIRX$o0_qA}k#4Yel+u3xzSY8exraQiWe;t?
zKF*SWlq=xCe(sIx_zujBBiBgX@9>FuJsbiX3x#z!dYPkqVG=3-)ZzKl@u~pnzSnKk
zM6N`<aBQOhv@70AbTg{f@U3&J&~t=u^5xsy#VH`5sRQ<&C+)m=0#X#!C$3~ih5}pm
z>wv$YlS<~D6sv{7zku~hKx+qq!Ob%lMZoS<>Z)y)^o=Xob$#BKuNksZ8L>SVXd<sl
zx2W<pr6}a(oIMPb%pq{NzP)RWx&Fl$6Zgy&s92IS0Fs9G4tSWvj7|Y91FMj_;zxK<
ztie8f1_yQ?qa|YD@ldc}JzDI1(u(XKK!PY64gdE-kI4HBGJ++TE{%M3pL#jF06EL8
z=&%Yzk)>0B(~Kmam-r^ywA~+S^zxd{?u6XBeFvrJFXnM;efb9RfRWi~X(s6f)8<{L
zf^-j=SjN`dPg7hVO%-4_tWxGNKpiIxTrBIKL4?d|Jpdyw8aBAr$`7LXZrE1dtbBU~
zu)-cw_W^70aC-o1E5!t#a>Lgr??%K(J?8bB8F{jb>=g4>pl?9bhcny12`#B1ka|re
z$pUdQT_3uP&Hh<7?}_8id`ETsfc_51=E*wB=6*1k^|br0bmQ$?x+$r8Fc>v9m%}Ir
z3ea7|0uj#X0}y%4b%9o%+35LLIT^6twLD-Svb-Oxun`dkGX7~X_(ZpQrRBqd9guU?
z?>&VG(HaK^_Lm`cy688H_Q9q>Jz%$fsXEVB`zw%^P$oHl;j~|Vud1xWq?9ux!5jUn
zT>4w+b)e+;`yCj+5?XiA>v(6wq{~sT9QNdUSsXRDmSElw93M~Z3meS-FJKeXy;f6~
zvn60-pgjX9NxqI^f<668^be^PlSbIr;G(=00B&on0kcAJ;*h_xkH3af><7iAq&Uc2
zm4M{d%<H7-FrmUj*(88@JfFDj>p-<Xa6TcKFX)6YVh5y3&vR6sPe-m)?Zinki!rc3
za$qp9Pqks7#LwdzR`iRWagio+IV{+V(Dv+XEk^bd%fQ?CFqh$pkmxv7)TTMAs@J9{
z=uDvN<&uP|t#lALuvmw+JfXw&7zX_th*sTwhwJ5~IwZ$M_@b;nf9DDizZXzjkRKG?
zX!qrxBNzeOzCCcdAZ#b?XOqs+^4}x}P1Uo6RnKfLP%fq*Eb-F4Y_}5mOZ4aL&YY}Z
zYayhe&7);l@kvqEqV{?8OJU_h=VK6s<3}3vo)p;b&ag);D?L#QtEwe*#ose_Q_X6z
zf)tE^eSoEw!QYJIb;q}5$rA?&PzI#gy1DIb6RkDpca%!yn{~biAH+!R>ySGEg|(5c
zioqg%t^vd&MjNn8sOQ1!%Nu~a<)?oT@Y8MZS0ZA;suCmjAnyj5MbqxZT@=ity-syM
z%QvnK9RLB9bYGR1rO#}L+O7iu$*fEaA`bhu3o1@3;|B_d_q=An7}3_DHn^Sv)r+Jx
z0F(RxW#plvSQmq5gHdu4&{u}lK-w2bVh_D=g@Sn_`)ffRiv?hnivWHGxU9Q<H!Q^=
zEG<CGfVaI$qWP#|LNbk!VH-P9@#UpONK%{r+<1Yh`jQrehBcLXv)Cg<9%ezUb`0tn
z0+jQ|I5uJ8&dmXYpq|niY<A=pzFfa^DR6*E&46uF?}j9GJ{2cXE3!0Lj#TnO#81IW
zAhchHvmmYF3Z+$5^mi8_tNuZ+Aq0b_DnJ<Da4lu|F9?wH%G(*=5O`$LGE`oAww}_}
zZGG{L2-ib7V+6w7Hv!te0;vW6A)(3dMKFJkwRHa8bgZ+FbXW_`eGSMv6%lHi7)#);
zc^9f7K2*mFC>ko!rL1}Vl7cMX)C$}>*lC2K_7nLv!g#q`@Gp#xbCafU;o2WQSZVs0
zn3`<he;=s8yCUt8_UpRK0mTFF<wBC93j>HZS$&mGb0tRKzaM60w&&*_Al8Y!0k0C>
zkl9tBb~+sR1s(pRWdN5^wLhe>0jx$wdWwbAgL~l_z|l`R;05w!OdiR;hbtxoc*}Yq
z#T0t8OmwNHKr#w(5*c(giPV-^z;R*KYYA+5SD2P^A@dwS=JQOD`pZ7@9E|9%v@V9{
zFTz+nE`Bt<m8NIA(UX~ZhE)zs@ZJ3ez-&`e4eK^;V5RsXnQI?Rhq=Ob46x5Ahy`&k
zh+e#Uv%fyd^r;JMV#_4-r>&k3_nZWk!33~$&38LC9L-`~%u>=?H{Uvnw&XdU|9BAQ
zb=KTo6QqPfv5adg1`0LUH6%2JReELIbrIC7*0_a?qncN2>S&zQhb8EfZ}gD&Zh$mV
zc7;&$h5a^1C59;*U)RvTbWkULKh_&f8OrUsDR&(?7SkG(@rgtt3?3jhBNR+c0pYn1
zQ-teC`N6b#A&OY|urVX;@*GKZ(RP^bb-_XQEZvuHtrO^;Js=ofH5&1p>PI8WW~s^`
z&Aok1Udqq3?%Q6RSs`W~1uq1hI8+4QYML$@5_H1sZ~lz7ND%V>@b%vDSib-Jcu7T4
zA`}@VDKiO?aTi*4+{mnC@10E<MJTd2S+~76QOKURJ(E4kxb5+KUh4J!^m>0jzwaMC
z9^}5S>%7kE9M9)*9M9t%P^>Y5?A;*KXDwxeOj>|*y&qjj9E6QJ<hRok#i(-kFJSEe
zNCQ`jHtI1EJa&iK?5#4V@&q+WKYLFlAJ6m7M@mRvkEO5WN6KJ@a1ZPpUb;Zm#lQ5R
zxK<Y}x6BD9lemu`i3y+4$u#a0(gNqRz$~<<JDlPDFj-%)y!X-e<5$+T@6Lxx7N6*y
z?ibh?eeG0SJ9Vlq>Dh6NzMX)^_?W)KU>npWZxeG!og65ERa!)8HHFgrnl07y)ftRl
z`~<PlUCpnv7cf-Jv9S@S{dP)IAN)<exYl0#1;Oa%{iooBf&I!5?6YFX#{y9AaaYj7
z^xY+5J4@q58)-xMSQvq6LhWs3^O}4d^_s(Z*Zn+x0yt2y51hsgMJ@e|8y$n<>GKd?
z@F;$m2?i9nn3A#&Nc$N&ksgL$8Oj~1m)ca1ra8aU_(F66!m4&51$A4k+tV?xNktC1
z-QSC+=N%(;m{iA{T@Y`yTVGtD``NRqY>*y!F6VRkn|j32gKb&TueDTP(jxF<0mx20
z7r!PIIKv-o{hW8_G$#G^-%f;?aj4!1cDu>zo2oHc`$1A*auH*@yM?eA8X5OT>1=<T
z5ndoIn^do!SXlkG4-ZdMqkCH)lE18VB}j00%EX`F`$Arn{jhxe7`5S3{%EkkXuiq%
zI&2<Sc}-b^ZSf*ea7Y1QOPjGYs-*phSdw?hGQiJ@&_V@*_}zS<{4;s2HKW5kPMJPb
z7G`xhZ3#{TK$#ji`=}Hc>SZu@-@I@~w<P*3@TNs!_Ix3987Pr^AMHDFeU_b)yZLlH
zhDybB#@M<ZnH!ZC{!|?ONnvC#sN}u&@d89{@zj{m?f$G9Phwl9Mx|~cqAHNrOAcw1
zzewfW{=PePvCgM-RuS1W^39%60}!R!8@T_DPUFo(=aLsM;&O6Atnz$-<6>GPk8W3)
z|KgNaUN&}2_0>%;!fdRg2XAj_wwAX#hdLUn4!Jdkge;9zF9|p?yMtD0L~%x;P+ci;
z_6JnDUtWt1eweZgd*0i<EMN6|iHQa@TbON2&E~BQ^L=T}6qyO&Rw_Gh3H`mSZwx@r
zx6ebva>?cM1p>FaZ-IxrnZB>Saem-+?aDBCeY`(^ADwO2B>avNV+-VDQDR0lN56Wn
z@s`CGVqUP3XxPYz6CF)Sic)S1p9_@fk~#9|`^*`8AK5VDnkx>ix5nSCV%i-X3>ADv
z!g?|Q!Jppfm?E77LB>xqACT@S<PkDnvC6oZh>?!00|jC{uG+0u25SvmNk7_=0!&%H
zkOXaX?G1@1k<NYX=_z^k4jpN~!(U4lGxI5BZ4RTdTXFNUh(kD9GU+E%x`6HSk5{Sc
zY(@G5U(I@^-lfXmrs6MBQ<;x{aJFIU_<Ny?F*~6O`O99mtYYl!NXy`@R{va3$kbkn
zsoUWe_zalsPWS-iKt}BK!7o)Sd0Myymg$C-HK+$zt*JAVJ5{+;763kVDti6ACtSsh
z>fRj@%4suu!ERv?Ds>kb5EsWhJD(^Wz36GL3oITYJw;qpdWwwIj#<uh)PS<|`J$L!
zcdUqK$soFYz?ju<CqRa6%b)Dc-(eQ9B<#i_rOz*L;1M(tyKYG&xA?4R((P-+TtG^Q
z5zV&HlSp0<W07KO;5Hfpzu)@1k|yQ_kY|hm*?3UZ|NL<Ip&6iyZmfq?3X8An7UvuE
zv{dFv2fo9p$@aaOnVaZp;EYw~AhF7lBgI&`G+g=Dl?3otdlN|sjMMlB!1JhKJm%I3
z7)k&}to9b#zm;1Y_B=(Sk8rSyH~6U&KLcfGz&rdh9@(cR)n8$+OUFt0g}Ld6s-HVW
z^k3u&{F*H33gYn1x^&0{BYWqm4AP!K-kPI#^^o%TnV7rziy4dnBjjo|{+}>Hm-lCo
zZ%b$fA<hW}5Qp_Na^CEY^oO!=#h{h{wO#z)Gn6TTv6=g1Nb<j1g9Hm5cmR+U06^Pf
z-TUnL96-Q!X0Jn;V*8)6lOOl``yKt$B-9u3e<+fFRD4wPZ=s+(E7<qQ-^Ep~VffHc
zyEz~s)Ic4Hn23Z`Klw)$XN5%8r_B0p{tHl%p+;yNkq<_{Qxcim$gg!D(*Kj89JIpf
z3VgRkx9>XgT~sL1KO;?AV9Cu23fEbEgWU9=s2)3o_pgUeR)$fA<zYP_l5_|0AXJ`6
zkoVb+8lV5yoDoGZIIMO{MCcO(D6GtaAc~`NA41T6VE#j=O#qlc<bnX_I{>VLV4PZ8
zynv}8&;aQ!|1lcxe#mzhAowhVWV?H7ZNUKI8;45@T!_q@{Diw`<#z;e1K{nz&j*rD
z(%t?_UxuMXsYrb!*e@9<0Evvm2(&}Xq;l6fQNwsyybx%J#8N{*gVuy+j&hX!des~y
zc)PtEVDkQwxr}fiXngO~k(vM^k%_P&OYFkw2Y9GDPe+;Uis(8(Bf213Q-)*+1bHQF
zka&gem8~ENG9*+&WT^}K+rJg%wbUjTfPjF^yFxNrNDz@-gM{@N%z3FR)?kBE2*9R5
z`#z)|NG){;&?%rCH3S)A!z#cuEs!S_itcxaa~J_1WDU0i1$-VzZ28-TLC!Y-ybdrh
zNETv<lsqIlLsDD_G#BGm<^G=ifvR7c3*ja6b=xl?_g2o=*MiNZ@vASL@Gw-dCW-D}
z@@9QbZImYq7vC!H{%KP(p+>e_-ZtW6Iq>u~G0-j);+FgH2G*bzPymn%g(Uru$%M>+
zp~ut$>?_i%9N_<iB=_*=m2MT2KHz}@rZt|r9yL7RX!Ir_Ud_?#Ow`(1X#v)$A{Qn)
z10eEk*`GkK-d;R&p;Hp=3QuGar=p!B&dXRP1W#vKI6?E@NV7)W2V-EA^T`#IE)E$1
z?~{grWpDTX0Rv$m3<DUj4JkC)ZpoC@&m=^0p}Pf;_DUyD{(@!aPmGgJV|k$F-Tc93
zFCn3I=L`Rqhkd?4ROF0eVF}$d^E*FjTxv1Z5~>s&(d1JO4}K%NKX}{I_{ttocP&0N
z0xwOkDA3r}IULXkC6^Vh4DXH$@fIMKJxU9WjQf9Rgn1aeL2s?zHVyW|)_XwPsE2N-
zS@$vWS_4wo`nBYf_<t&2iro~i3^S5gXd>JTl;bRC)PwRPn0k)12&kSnEk1%Xp8}vE
zJ)q>iu>?LoipKMD(t?3+tscVlAtt&Gr7OW%O<#WkRau-(bKANJuK)P|ui{13{08ru
z8=2I%uNaZN8G~v6l;Uh8y#yX)G9Q5wfcqG}F(#KNQSXCw4DT5XJ{h_JL=Fc>UKWtC
zI?vUrg?S4!DxE@=BBRBfbiU)ZqpUm2?8)q|bUT`Tq#gh*GdzXrV;-{1N*&t;5Vy2^
zRj`sYE?cS{R9Y94((YtG6G=T+vDyGF*tdVbb_=3Qb4{dTTEY;PJPS?4um$Rz#{Iv9
zZh(^QD(@P|;lR?#rol1We<ri_)Vfxj@LpM^3eHy5=h=v)CFx>q?B5j3Av}xAF<zI^
zrfaiU7FpsX=5j#e1#%Yv`sz`%tD#I@UVO*Z@&?EObQppSh3-MxsfC%5Uzptc5dC%t
z=AJprj}{QyKmvm)P`$*G>jQ$=YjReY2F_rmhOZ?*-lULn4#{5&H^#VGJ`Audq=8K!
zRG$m2dFsL~cp<ZxvJ)&di7dC!XRLuJ)GmnCe4wa>X-I{d66m<_7S16lfu*k!6G50K
zTEWKK^SOR0-?@gC-^rVmKtW8R$Zo!r;w{W9Y{40!N<$*HXm8tXIr6y0cFWt}-!K>k
zq7@!mrsn(teFfC-+|#6(M5~@Ycb#6YD#dGB<P_iKTptJ=`k~Z@bQj|#gzJ(6%GGwU
zcxp%K?aa{wvOKT_(^d)#bZ6|O|C?fLqwPJ+J?r|KCZ3yJwUCH@m{xSpX0GW(T;hrk
z?v6_of04lZY?0`_1Y;KS0}y)6N~DRGO(?9ZJ1o^CV!B_-<!6YG)|Bq70*V`&5~6rC
zW1AGHl@l+6bsJaV$pz)msa__*Z7pA|XS=QyKIUnCfX-PY3TWavMojerQ{u~u&Mo<&
z2h?k7j&Nq2fQhr0oI?qXMH6QzMs5LF3xFRnFlJ2IzI#coKE0t0FhY7@Q-U%=6d5<`
znW7`!VsUL&4-4g__-f>bXiYFN-8iX2sPgGm?5*1djPU!udMas!Dit}#%7zhK86w$K
z{<tzA`&~>N{ztbihOdv<6X@P%TqTgA`18MjcXM>U=ib8OjO<O9ej^)yZ}Xz*as>gV
zbzJ}C_LbQFa~YSW{=tfno%KAZGuU2xf4+FI{We*-noB0PSiL#Xe~}P-qNF#1`LDg(
z>KcjqqrmlETeZbtqu<nvOf>A?-aS3yO@kr51>o@&LcLwOLEgyf4*`-I$*JT2+MWN<
zK4Zu|_ms;)cb2#m%*|su+&fldlH$bhFu6NZ*#7ST<DakVj^cs%D<9JRHlz;in9~(2
z`wd`ux`WgIPY%ch#Xt)1*c<jMN&f=-{uAM7l1_s&2=VzFUlC#89H=9oM1ZD0r|9Gb
z{J$RgpU*>-^a;up62<gQqCipF>d*GqG?r<{gVHHi$&l++pzye@0)vd@fM@^**fWr4
zpPmoZiOL2&W&<RmMg|$h+MKCa+a_4Uj1b)bLO!=wAnEG>%;&hxt&q0H=oj4J$q1y+
zfi)+HHBG>D;o6KT5VX|JPE_mino$Tm#h5-K^Qw^(YIWxI`4K%5VfMe2FJ90I%4f;$
z09o+*GYA)rKo-bjF)0y!RW1i{-ppd~y;VIKX`>!3(YYM$v{lcc_}B>Hp<o#r4C66U
z{?a|?yi{a1J`YuhSwK{@K_Wm)(*{_*35Z>_ZVEGzZ9O;G_%-%+!E>SB0(kdAM4@8i
zoU7A{$XeWgO<RDR@K@ahtFA9!0g5#QU`k?s6XKMsvn3!Z^h9Jz_N(pp5ra|hf_8xT
zqnmvB_a;U8Dn8B@jr*=lrCnZyTEkzwt?#hQKm<;vwEatI#fUgKg}x&vLDAoT3fp=X
zQVEtT`Zfz;Xn6W{<3;bX;xotQLj{9-U(SxayB7N4NWVlf;`?O{<XMky79{1HKNcU0
zvjx5x0%E}s2&%Ug<`0q)IgJo!a>H;NXy7L!0}5oR39+VX19^IyfTIz6D*TKFjD3g^
zI@zYc@cci66*NW%9WNUk9v~Lef{*YSx2Ga3NC$uqXkGvmV<`~x$OWM^-cDeiIzq2G
zPG~*9bH2sxD^e4u1v0oC5GHNLr!~7_XnRILOJW4j4fly@&b<!|_qD$}?eE&i<(Px~
z;|7fi-(~qPcYsB7w}#@5t-j}E7)4bxcdMVnL#I=-po*#&VuHmUlO{qe{L-XoKc|YT
zY|rZ_gI3!I84~{8Y6|$j_TcD%`!NbfSMQk%CZ>tb!p1#HW<SQcK*6T01l!&MpwgUZ
zCCp4S;`3-`c@m0!U(<Cy3490fY<db_uH*Jv;%raZ5)U_R5;KubbJ#>-HgNk$J=Nq?
zV1xtI5~M?@lqeeEL9+pQ{sI6$E7<@76$TD%#Ce6^(DI@iuz>MU094xug|cq!bjWwI
zM|k?T?RGTY9<XQp>Vnl7yOWX9y*<#Ida#ohEEJ9muNIYsktRSCS`rBbn`;R1QvvaK
z7Z~EsBz5k$$A8D{u`$Ln+t^O`A`l8<SJDyQ+%$LYrgf@^LkNrr$7R*Jbc3AU=iZwY
zSsh?=H4lzNxZI1em{}~u%ERk=yCAvM!nnRJef3I3Q!3f+<enTa-eU0k|CqP@UP-e*
zL+drBA=?^5Qi^e-Z0P7@Lsx_?8O@=0M*>3;fxV4!Cv`{m!!z4R*pb}I$jgw`5_$Xn
zu9x4wVbPd}J0fcGA|_+!41Z%mo_Um4%HU4)3=RYnlcLc<rT5FZ9Qm!+dmVbk7L&x|
z?#=0ygsj2jyuWSn@Voy0WXa{^kqU7MZ5~JPffL=femK{(+!kwP<=d|yDLEC+(^)c$
zp!XJvHkuO@AmlnjBR+UrqV%iX^C3iQ?Pc?@<1WyqahqAb_Em~if?m-|971H@!G>~p
zw=fYxVd+Zeq2qb8M^(QM?mHh<3{`V{8p?s?W%{oI9%PKg7~*f?<|rAzsk%Rpg=nF@
zjPw~)eaTi46OrJq#U>>F=!b}(6zBQr$qpAs@i~8wr6NqCAOCE!eqpq_$+!0x(ItD4
zZnIFJ5!t&B4v(pWP7jieAxm?If!DlW#N75O_19|stSP9Y70|{Ey<Hz=kdi~PI`RUV
z{W73VNa2))oTH55<D6*8y2><hCV9jfv5Dv7SK@WC=SlmKgxg@DmLzB6Xsmn6Ii25X
zR^H}zD3QvQSboa)eMkS-zHpYq6a;Hxgf|!ubw2D!%_zZ`_CE9646#JV*yoj-0_jXX
zsIo`|0+xGKg3B5qx6Gbb31-k4tpIZS=)ybdOEJHG1W0c&H@(*-iU!2(t;YUpr!+^h
zKUPp#{Uu1h9TE)Rv`>9@Jwnu(D9Iubb~9zPcJAF;f|l`B7pUiH8hMz^%t=L>0I~Gy
z!yj4Yt2%GI>2&Orr_gFFS7K*9RNu5H_RK;ub}gAyR$<0VeZ4wc*JkN!GRAYL4bL5h
zQR4My)o#_W_EH<k)ulBIwtb&dgW|upx6r&SGdsZ2d;A(~9hIR5I2k3as|t1A#oKW!
z`>+o0MIV*>YkT~YzN69!i<ac#FF$FG5{#dDk1vOg$FOt%R1f>um+8*~%4PQC6H;cF
zr|%T+P71DN`XD}~?G>xH%KeMc`5SF`w&-L~Ncj{bc(s+S*^pwM-Nr1YrH}{6U`b9`
zD93`%dt_R5g;{$@DZSksP2rhXw`@g>mJAC%WW^!3+{o>4cV}t$<z4PUvaPP`$7%|)
z)QuSBy@yN^>ORP%uikc9`Crk6EB~37kdegUt3Gg;S^0eAbkl0;CC)tHUn#%8ZP_SU
znh7JtU~;;8qp<lKPT#Qs&uADAiD;!iU+IsyEWc6^yok8dv7+y!M#Ib-VTv86^-9H%
z%f*&}NMd6y2Y5qte(2o?Q5aIRqzIpzW*g@Ocd>rjAgCL!W#8h6#du9TR_?WdwS@UY
zCG1k<gG57*VpBSMWt!7QCOigT{;0gDA?Qo<KMVE2Pp<c_l1LZ0e+{PXvoZ1Y^EZvv
z7*F@0$h?j<XS|7y#prjEWVS-nAECOtz+hBxILa94YpXnIbl+rtai9$P@y(%|^LKW%
zRH@eJGogox>pMUUCu~AiAziP~mkAN9le!3}n?c;8uE)yiKxWJim)qxy(0^S&vo;h$
zhCz5%UMG-^e@{}+?1YVwMfmpLz3XaDhQwHzZYe34)Is0y@jHiHm(R9o4qu!}kVZ4l
zCdkb=`ShevNLMpo#Jtej$=-ndiM3zdh)Eu8m69zdf|OuCe}}~kn`%Tu3Co;Sj~ZpD
z?qElm0#uHSXL%STSUf7+utjnw<+LS}{3vEsv|2<DlA^yFCZNm2^JS8U6{bCzRB52&
z`sor%ef_bQ5My3Omo{?ey{_fUFuvu}KcqQinEsY$&iK-QiPi5PM0Lf^@58CRTM<Wb
zzJ3Z8B-#YYr+YEwIWZFUY;Dk>M<AQcq<_3@Wokt()MbXG669+IJ*izUU3$DQXwjOF
zh_!|t*VQaEBf_ZbxRmRB#`TbD%ybYtOsX%x0LthFZ<7lwMUIoS57*4t+bL=|3VNSU
zGC!7#j@@nD-|;Ih?Dl-JKf}yz1leakz!+S!Hi4l|fXH{=;JufIz~xBoY>4>TZKw5w
zkHz@$%^=x12*|~6Z0(ip7v2AXBuJCY5|UH?;H1Sy&H3D|n38eRu?Rmm(zKdTHbh(^
zT%36TT4Cgc?p$Kc`iwU$p2+Sxvpv)pYX~Zwy<W?A2h>Kf*E3kZBcpq)I&dc>*T6p@
zHyW|g*fDd{VlUQ*z%U^i6g-~Av+}~}D^`JJPT~BgXreR3ZV9h1U$dxJvWH9);dTlj
zpC8OjviJbt<l?lST%@t8lCvXriut+YAR_Q+9u&_63I%Ulx~9nNWjZ+3JB<MD6#diW
z<bFG@qnys^KGhe#z-GQK$*7?8e<{uq2-5&Tt>{QD2@aPJBb@R+gy*b5zxTj#=K~JZ
z)E6-xmi2+6TFD+qWi&X2sZODq8Ogo9Dxu7PjEPLR0<+cwP~0op^wVrY8T#nCWR1Et
zkRW?tNuwd1)c^i+rM9js<P+9NxSUnU@D;|svvFk-7vfx&;&qdQeK?8IswSNIA?|V7
zO9^x*QrUgSFC!^9RsRW_ki(rfA2eTv1~My(I9o*nZ1#@sxol^NLVqH&abrs0IK%$L
z4=$#<>T?*2xVn<RS6C84V2ZDXKzON40)zxt9DZ-io!4<}%(mpxTQ#2N&9I(Fea1tj
zLat@QvN*mAy<0YtNQ;#|%|PPUnYWhoLcI(M6$?`9f&rj1tTbVi1?lrboxM+;=M2<|
z-!Qu{<ESUJ7nh!;zVXhl8Pj{7OWRGXeG012$^5FO7DT2#UH*YsQKTom1!O0`cD!@V
zvSMbE*~EhE)1xpTcLQcjg)>*$dy{PIl7%*zXVMe1^|_B32=IV`G**8yqSSg}wx`qi
z<_l4W@-jWeKGh9COyaG)A|dchlOKNz^u{{1=)XusKFZM#`+U4Fh#-l%jc14N0EKdi
zbyX?=7}}lH@cYoCO$1D&TXZp|y?@ZsX8?FB<TwQ9rV24{3hvt0g+q$A`r~ze>wfJf
zdF}VQ0Vkz8UFDMwu%G0L-$E%q)R+|J8}@Z5DC;Fq^`!qiq<GXoGMoqZ<x<g-#oV@O
z@4hOOMErLI5J-<`UbgT(QmvQA=g`%B7QWEHVmw@^Ou8y=S-^r6Q=>uY)0fIY|M7Yc
zB@J;tNYs)QeaHVw<A1ZiuEp%vZI@qI_>~U8OCT+8dx*wh+|bTe906f-#~V*gd4cfo
zdnhh0TatIEYcQl>vUZ5gspf|F<yd8HO1Ce6iYESNdvIMQF9I9C`TcAg;ZZYr++JNJ
z3A>UEE4jB)+)eWRygj}4#Gp7MD*Mz|`?QkAnUraY59P`_?k~x<-8WGoH@VY~Nq$O&
zlQpRk71hJUBs!y#A1UuWI73Z%_q_{!oV=Kr|A@nsP3w(qj?m`NK0%!|!Pd^j#d$uR
zma-Np|K0h%^A_$K)#?@N<v)otQ5r>2b$yfs>=6_mv%o_&@3kn;Uv$MBrA7Zu%lu)&
z>5ksJckhQq^mYWSoPxGP;M6ld0fBrF;uC5jc$x$?`tUFwmWi$CUrFHqOp{ZDt2mtd
z6pLE1c|sFdN4$-iscBfvJLfPl@^&nS?0-J^_fJpp`T6-jm6ob(ZEq*!=VQ|{X4tso
z4H{Z&|8C-EZdVS+8F6z!MjHX#<&d^*kW-zb-TmX9nSrl*QXcp5yqcQY4LBXbGzgP<
zq*^S+frfAEFNuzeCLUN`1i&IGMho;YQn;j99)J6HYr2A6yj}Gb?zy12Ss3b_{-Q)=
zG~ubKP9F+C*kO}HKl?L^zkr-D2MX%LaIvOeA|oTm4BJ5D_efk^e5_NT;LJHqq+zq7
z^czq3DAq^7dgpm+np!E3b!inxU+;o`RnaciJaF~N_L)MXp~@ofwTs53${GHQbB-A=
zb~>DzQ}T^P_8IeaRm?P~FEDKir!t>ELgyDylu5)k)nP_jI1;$_r0%+pW|n`?2g$x-
ziz$yfyXzv@#PhSiSK`sY&meg?v$toUuWybS0R^Z+Z|3L)j1-QAC^BV!$dF<!#1R&=
z67o0OlZCXydE4hoUX-^6da#>mw9YEjKgJAQu13SVzJI5#<fBEgm2`K+MwDOO!{S}`
z{Nai5Px8)IYNU=*%U^unYKU1g638#?Bp>wjXp&2+>ntc4t1HDeVz@SY$X)f96Z3$N
zM3bJQp`o$w&W#G(Uhhg0vND9Cma%cgise8}N}#ZM5nrCQ;L07wdD&v#(u8iNrDH|%
zr4QXy68f=LfvbIWZNK|RpImB|lR|JZZXMU_EwmrCUIs@urDjk8iLhNgo~hu-E}+hA
z*0N=g;s0K0=mcAxy=r}?>r($UhYbIvl_hRo^=QN;Yw+hS!L_KRcRC+w0!$p|RK!Gn
z0S+C+x({<FGdV3I1PZuhZFh&1^=*4fhtcv1bs1ebT8%;eTFzUY(G=vBP;&SvXsFEA
zLiULoQZzswXkuc*@nnrV(^gWpZ%SZ_kFn8dHJ*-*Idz*-RX#p>2kINdgAnY^8#yZE
z3;Y?<cUI<97AgLjPjSnccG%QML4qqyT;2_Pjg>p%^*I_l-zmKtX7?806pQ2?ScK$r
z=BL0~BQB@pu12AI-O_oT2-%$Z?={}u4dBC{`l1ivr;^i*r=UucnR$Qcr&HmJzKV*9
zxSZ1hzfko#berGqdo|2@!SKjV-swmX7Y|a)el8~42kT$Xi@sKeNiH!ht9_)DC2kpR
z)G8iC%ub>xub_xXJ;WyVSA_R-U-Y>XQBnjtjuB}n<sj#AsK_g9?|=q5JJ|vHpgyNZ
z)dIR;Y;4S5t8w=fQH8OZywm3LgxJEYS78M%T}&%_Z#5~Hb$dqIxg<GlafHKpvoUTN
zd^;WCkvAI@y&CpbqW4*!Sc4xJG^d6|eaS0s8Ce0ug9HK2@oLbIZnlrtO&DdFWo%sR
z&1O($Efc<Z6M6@5N*QP^$ZgA$k#Y(3oO$YzjoP>z@Rc#F&&f!)<d(5&z9zmqb^%i*
zeU<{B<d+=%#xqtR5t1cBZ-{~9&Wv!6{=?{L8^XP0O2JL*vB&j%ny-XAE)&R|2*0}d
za(!_q2a1XPTgfv{4zqqMv6?%f80;9K5S@P-5)Pxjg?rJ-pIQTzrg>hOUM@*7`>9jj
z%3eNcd7+Wbu8GF%>ls_=<8;Q~z>R6$bVx5L@(PlE!g|S-DZ!ZO^GMyI<cx0sQ;MU2
zR0owXorXpK)LQ|>@r&?PwrLxk{d_jlaba@~0{MxNy@ioW2BN2E1+Icx)^eENkpv$@
zivLk;4%bk6N9KK@5P>`+fzV3#%xjy<?IBY9qwtq?%dw83A+=?jyn@t)`@y^0>y6^-
z%l^}tUlnVL+iFGlbh8gg&i+V0g@>M*nW<MvIvvEOa;2(=Zl(LUE9PKlm|Lx?6I$ck
z{J|vrBu_o_u>)XrQ|Sm{z=7OViVoiuQ#Qe7Qm?Y_DZF#%)l&hJNFf3|G@;O0cIEw@
zDPavKQ29)ypT<KMgR*)VX<ykm&9aY)1A|F<g)zA3IR8lRJPuh}3O?N{a^!=yn~fS#
zg?pfk41&^jKDc08W%=G*<zA|8`Ci4<L2Z&`3Y;X%?1-AXb2!|85ER8?3kc9?KITqI
zfn0*HI=BosvOT=+HJ^`ykNaSMI<4O(6w3ORntA#a3jHc88LpL43b@6=v*KTrhc5jE
zCrbQ`tJAh5#KXq*qzUc!wK^62Fc3hxwlz-j!F5{{1*_7Zr^b&}ZMWMMgrh@`x$4(z
zZhVxJy2d~G8iig2I5o~%_?PSHZ(s}Ig>)tUMXlj26X)##xcC?;yg0XL+f>Mg)!Q|5
zP1`HOS8deC%%P&|!LMh=!}>;Y1MI59YXyb%HWe$8W!Qej1&c9rkkNa>x9xWMHsdJE
zubSY9c?anc_W@{lj4%^;v*TZkm~O>Yi`7ZhWtoNwJ6B9+pI2Muj?MI`)&dpw*vJd_
zIo#BzinX{#-pWnMO~3sOKV&Ra1mU=8)Zp!~A2df()Gx4l0v;j&km&0s`Fjyvnpvi{
zo#Dfm2MUZDRnqJ7Xb0LSpt2(XAVn1(!(RHT<E-sT`qmozw8px+??+koN!?nw=R8kS
z)1Eu`esWS<&%hvH^aurAiE54?K8Ep$5*S_y5r`haPKP&`J3rmbl3qP?I2eD`=TXXO
zqE*RE!Y@RPqR|$58q-906@(!%F<qR=b<rN4TeT(G*|M+Bt5z(~?us46GEY13@G$|2
z7IBwcF=Ss*Oo+tfs6>piE-FU(YPBZ*JpKt0Iiu#LWH#>b4AV*Pq3#qPlq1nc{=L=L
z-lKC#_!!)7$J3t>fWQuE$|;jRP!;mkmn>Nd&xaYJ1(ZKt!te{y#4Fb%R0ssF$-FF^
z$xhU0?#jFxE+#@tmKSC<c*zBI1Fy>Vv<2}@HbsP=tU{(b+ZNVgS6_hWH<X3Dgo>eZ
z{&f8KaWQFGGS!=pL7YHU`P_>Umq)(E7H*z2@zAXEEl;!J9@)evFhovHbZL3#?ZM}k
zg6+~#!Cj4s#dsv!rOjk3ZlGUiCw7mgzH-b6a-p@@vTxnVXlw}opud?-gXpwe=LtxX
zYw5(x3*JOks<u)O_xB&%C^R<g61yalRFe^g@CLP;yYa~qFEYH(K&6~dEkVk!JILBb
z9QkjfDe<uHUH1<}9J4Yg4%Gz8JDn!_)4V)N4Maufl4)<5qWEv$wxBy3{Ih(Z72=SE
zrC1w?$nUvaPda%r^-QuqYIUxz<e^YoGO0lLozZ=K<C2-gUnk}(oYi~}|9)kMTvw48
zu5(~uwgBFgnn?kGx`c#hora>XGhI{)yF>4R8arvWLjL0%`Wo;`goMZpH}81JWb3?_
z?RHD|k6Bz8(e$Ti$S>-_|51OKveOlvs!$Ir*b9*2=2*@=&J7#D$0v{{Lw$mCy^<>q
zx$5JZ`!3<#i!t@sfH2KmLxe}GNbHcS<<V6<GA`{VdKt;!(C*)%=o`lC{QS(s#Kaw4
zU6(~fM6ze)<4L&WJLRs(NVWzAok7uyxlZ@^;2g}Urn6v}Siy}_$D0&L&Pk81SMHY!
ztc13Hgq^b?O3=QRrNQD)82mm5LMD9;jp+G-LTQBz)oWZjEfP;+E5ewdoNGzj<jl8s
zE>06lm6Il~&=wzjo95|nzj7JJvpX5)KHmDk)EkAvr*+zR#s|vb1eggPo}Q_N&kEfB
zsEfq#GLB)yXtczh!<=|4+d+ayAg@ACm8@VtV0ijBnx%Y#B)R)>^lx6V>+w^V(V)z@
z@}%4@KTRn+E=aB3C>+IopK~k{+$109=p;?+?W`)FtYF8-RQO)ly?4m9>^#Qwcf>3s
z4Y$GF?4Lh4(g7up6np(=NdQ<?eKjR)h)KprTKM{P`sU{5j_&R&<>lo|Ec^=oC~WrO
zSO~{SG+fSOmeQ{tIN-f!gXr7Phspk+Vv0oLa}A2y>F2eFF%1UDTu;u1SLk!xrTKu1
z?2>Rn<!i4Ro<klirWTsQO?#-ZnJifu?=b@6Ws%d%<dc6A!V4g4TcMc|OR-)YJRh~K
zYo0Cxda^4FF?CV2ueG7f*>`@Vazx<A0k?a|wTTFsLEn9SPt7bY2JP)R=@}XMXmHl}
z<72LU??5zGB=;e2@FmqEd7utI*vcu3(q}-!RIfE0kd-gcpKH7y7wa!Pk7*D~QFY93
z<AA19!;_Vq*SGHCJdB5~JLb076$sx^Bj&vgkO>S9mq18KFxXRk@}b_3`EH)GggMDf
zMlX}z<lRAVnGqEe_VMxpB);&a3GJ>w3@;Vl)@?h;({r9yz0x;i@8*H2nUI?5EoW8J
zfk`8A)z?!02zjt4j7=J<lO>N8p_8n*2j|l?;Rj!0f}!6j6+H<=>wz8~$Mp2|{bOS<
z>PUpdh7HTWI;0~G`nmTxaMOm*H~moO;UR8ZPrDa$0?*xb7EauwN2L7Z1x;7}sJ>z%
z{`c@^?Q!guz6&#iR97CL$$p|Vl|uGRGIzg~s2_*%2re^UHyIWHsf$Xra1&@CM8`J7
zKL*#5oKuXsE`!*dPDn!xUS#zWD&!9C(m`&@EhA}@Iag|hhuxxeUZXv1auhMoQymFr
zeR2t)j|RcXQ$QrM#6%l|+2~NQ&$OW##D2-X@EleVeJ2k3yA%&VnZSjC!bS-6U$n=G
zNm+hGaqnG!_wJqBlvN}Qi@f~&n#^gXhriz!%y9vD{FhQ6@6r>)4>#rY$Xs7SKQ2XF
zbw^&|do@=8RwY^h@&P}-*AXCKk($5is=xE>+`%R|d0k>*VZleEJsXD7@6P|v_fb@E
zA-&{|D|k!ruEzOfPaqFVUy8?m41-Cpj8BpXy+lO8`i<QQ&;DLa*r7|3$P5gC5gwU#
z0Y{QW2}7Ot(^32@ET|a%Cw#NN2cp<#EF`!UdOwf5>i1XYAvXB$7~lh^PDx*UpY!?l
z$fne0e@{iwz=<iDW?55+QwVgEp~w9i!iQHAf3a8~IYDZU+Mhxm7b4_o?>`%__37y8
zVG8Fpp(xKvBl3<qVzali*)crKE+{C-ZmIX!5dwgK=kcU?1YOE7N4pt5ugyJq3WXd<
zD1G6utG@56+XV8GqoCFcLS%j0ybS0gnj!~FWat@s86V+~kUc`=%C@_^+wp&FVJ|pk
zurm5R8EVUo8hejdD;DFU49jC<pI!3JX`QQYw^<y#R=I`EtmZil$8gxT+7FWz(4<?!
zSg0pgRJ{m+d-??k&5<pFZy~Sq2Sj<+dFR&2C|&~8R4uKA-Gzk8zs8%gae1O~B7wtE
zuUh3%L2=qK47fFf4?;b~GPoRgJ~gpzRQUnYsjO;5H=_kq%raO3a2Zaen>ZTOkI!a0
z@1`PNMPr;BU&*AuLC$2By6~7U%0+qqJ7wj2V9KPNmII>QgpZG7(;TniVf+0K3;v$k
zpREp49zpjse_!)Ek1<QQ3Mh;e;RZ#r{;g%Wj4XS}^d-LetS^t0XTvuY;aP24cPBWU
zKOpF$i5lbSn=x3^n<ctgWW;)&9v(01{TY`BM)r7=C8E~ylImNX*0@3;;~lo6z044E
zxIcVkM|%_EaMi9D^@W`D^mN^F78VC0dBh^|SE^L>##g3>W1pyuDQOM(c`|LA4X!lr
zB?<37gfJ#dQNwQqFiLLKq)gXLCtRvIIOI)~pF;8o;jMPR<=G{AzQenpXYEI}qg25N
z(;OJmZ*Yo?h0mO)O#28&G1G-Ik(k-hm^Q}W)B6G}dXgq60{PC|1gI*0AWOgHq4)(w
zd`e9XAS(omdA7bfN1XYxhUJkp4q(k-fJ7PqEe)|B=8-7}u~an*hgd=9pC<-Pj!69%
z9w(IWZnjb`coLpN%#PbFRsxA!Equ(Ap1UHs(sE#?17jL5IHvMJqP}w1h^7=t=*bIJ
zd`?sx#Xte^35{`3W%|RuzRURb<_r5T)Wh^hN@vkJa12A$GUIufe-6vqj>7qivwr{+
z=a3}xLVOILm;B-&C=ud>2OhEEV-n<*{=w472&YF#`1KM}QXX1Xd$AC?>U%MqDRX^_
zEXQSJ1Wv5Jq0V<=W-a$$X9i4eZGEBJGaalrR{5rm9cp@1H|b?)3LOTS5L0E<@H=n9
zQrBR>u@vqhpDPjyLGw~XP|z0&-6BV22-x_aA*k+%FuDHvr9>UyM%ZRApe_@GTSz5h
zs82#$15_GV>~@W};TS`VXx7#l05ot@BNjq^TTmfMmQoUuavlA6s+i|Bid%64=#SGq
z$6dFSOk*=S%h9d$GR=WwQp<SASu{$f%$}T%YWS2^R4RG8vc?XpI$0czKb`ltvqmx<
z7YrO{ZtVU(B#S;5uS(CYdZZB^u}f&yzHv0~xh7JMA-CIbS0^Nvs?6dU^u#{Cva+&y
zWcP~H>^nZn;87p@4eSs(i-OzGHl&pt2!LX)76~c_fuFXmjoV%+L4?(LS?^jDu$e=1
zCm)w@yopj-so`)kQ{Pn8tfJX{u?hkpBnS57Qp;ZeMHr*NnN3fj)z~auOAq~D_f%iK
zSG4r_0mKYjKb@MT2c`vsRf)_8SLXs#$CUk78XnJv5G2L0a#c<_(#RkP*c7SNX!a7H
zk)282$!qN^Wfi~NIU-imlm?7a{G#M!TeVV?^g>`KDf_|5LAYxX$!azQ9b!N21nL~O
zDG_vMaQVE}*flbz_0o}gP#;C7CL}rC0&Q@0T?w1f2rDcga}z+|oRTgk!M`%rgkFhY
zDYSE6nufR{^5R}$<*wvGcHQ!j+AT<oOFnvgaH^pb@RkckmTPge)OJ$OXyhE#W|xw3
zzeO*9O#0D4Iov+}pibWJiJ*t&y%OL3YImY9aqZ_ZfH0Y8A1mUcWr>X4f<*qJz?v`L
zWJvX*n;(+IPie=_d4bFOBg^g#cDtQf&iEv{>)y1D07c~4DzzJt4)B|fqF(OIuo^8S
z(bDltwVPZyYyD_lq?J5<e|74ua)f2a!`RX-%Ba)It&y08nIc3;V-5mhPs!QVwxt5O
zq9J7=9*s>CL&J{%B`I6G|Bg`UW!CYwc-_3Y<ma58cHn=z%iJUcJF(-5BB6lk<H8$v
zjJxH2S(MH1R`=;v^|&+!VXa$$voXD=IhK4p_FG6xH(Ng2?x(IazZMy8uJ4eVsr3%_
z!dkeN<r{RZaJuPHr?qK`OZKS1M=`P27Os;8Mq4q3RW)|CP5}G`Ij|SeE6Cn9Q|L7o
z*ExAC_DQ|aQH^#wBD_Wap|<mHYA=@fwQslg)$>!!Mzi1u*I?N*AL!xK3XMRS#^y(8
zT3$HubB?u!{#L7zsBk$4E16?j=IO|1szm~@&1(L1{An-V?NgB$$0biRK*0WhqMKtU
zR7Qz?YlBE*<F0Xd)wRoU((dcHTi$CUgFWLa!K2kGjL|)sxg9muDtFEaQmE#ozHqjr
zNNHK)$j3iC+dHSeUgD2y+B<+E(cG$qAa7Dt>v7H$wW)5BA;!Z_>o5Km^1Zh`ynCBt
z1Le%RfoczRw6gNRaG^f<PLDM6J5`Csd{f?EEZG+n+?SZ)ge2nOXB=-umAkvSNk?<|
z*5ksJsn%Aq@T)Rd9-)n@)05bCRFy+|aTibAoJ0&&Q*slIBP2GkCrXGwdv`L5Lx*OU
zIoDY8=Ww~$g6=J<p^D;6tM!Ee#pPlI5Efhb`s7(|`cnnn`ihXf*>sQEDv64+3`6j_
zs<u;B6l^AKdVFe5*F~WSN(vczSa6x2|M3DCesMRwdTkbGX8Hl??33a{r?sz7hFm!z
zpI>q8Q~Lzn{zjdWsZ@*7fH=PlcP8V4V#?`DaY@(TES5TL#CSVR2$l>FB7Q-Zl$t|@
zecd3Krk>MLx_*Q^lf!IIk6y+2q6{0;7rWm()8N@6D9HIzsj+HPgsj;+ABvW0slP07
zIs4US&L}{fSXldd7)Iw&yotLNC&C_-Cs%ADJADWds}SH^elrqcE7Cfm6j4n1*JZFP
z`m;${;DxMqZdz~jOk`hjWm!CzoYk7<FY*b_$nvh99?B?|Hn+=m26$-VW6fKwj*Avz
z6bL38V6R>%tKUvv?-oMU6jBhc^*ZyApWXvq?Ju)DR#(EGypp5(0sq_e>3ZHxEvAyl
z`GQwOo)hkTb{Wm1c+WLU)Q@yd^HIce4ZX@#o?7T(65g!O9!(wAta2}}wyT;UHQ!##
z_H%@|y)!C|oW1JkJ9De9S6a(>h$K5C>NvKn1?R_OF+XGE?7ZJAa$4|Xp=&h?EnL=S
z4PF%mWqkp55@C$DcJCNdZ#{GIjjiBW^{biAPT76HFGHN6Kv!`L*W2EspCYEybu205
z8%|cIW474es=comEX^APlO|TV1^Tpd25!{3QO%@1p$SL)>Q_7Z#<Wl7tFBH-SZZTh
zO0ZATsuah*L<d*tNtxqnx8Hl9ta9Qd1%JOPeTLEJF&I+2NLPq;a`W#j>89d?y)%Ux
zWc!9EHb{~&WffJt&s;oik+HR-&?4p2%PG>1%soNJcuMf6-ZZ7F6q{=mv2R6tFlcYv
z8C1WyjEBypDUaz86*qOqsR~!@;WDSZY;0_%x}7JvqU+NOi{)ktDmI%<Cbwd^h%>>c
zsLWYocUIA&^*%8YMh)5#w-5!7?qn5z=H9Y0B%?i(OyQNV(%UrbYaxr#jlpEbPsbfD
z<O_-$OW4dp>0GCDLd9`a#pfypoRZIX-g3QPd|FUqKVPHvj_`ZVy0kTEj<!$R39XcC
zBVCz43ObDR2n<wl3Oeie{i*=zEG-^?5ZxnOZPgxHlc-*N{?w$14+s@<=h;b;gA&F7
z4ne}qh_jGMS&=qYNu5dNzjf6TG>Wt0F|>HtYSZ}CR2EA5>67wmND<y(2_iQ5;XSJM
z(@rtt9icp~+wBb9-Y-w@+4B9}pRIx#t3NI)=oTrRo1Q+|qZ<;>B@v*+Jk*U8=np7r
z-WX2z=q;H}YD-i#J^aaEt*4WI+EzAxTU#4T9<_Lfy6xAuyz`OuDmhD3>0K7IGZk|w
zR~(d@<}x}^Xr01)!J~JzinH54OH)SH7}l~B#i-`JT&!?%dD`&lO!6ep&8kQwLo9Z3
z7^fZ~5Wg?X+6zire73L7CH_V!2}^G+QXRX+bh5*i`CnId#U(kltu*=C_;S-7K@09R
zog5skwoR~(wE7LuyikCYey{BqCFQAo<HF5jk9X2*SW*H!R!xe`>E6^CB&;QDG&nER
zxzN(Pe7<~?Kz`pj22=HhkbE_@J|PGRsuXMSG0H8f4;~(=sIGB97t2p8D)B6i>zc;q
z-i>!Wo#8-N7^*bE+!sW6<=wbw;pAZAmX)cTb=>(^?NW&Ca<QHJop~mqrMH6?cGcFD
zlq+zwxh;=Dwy@qYO#As!jqU!PM~mvKB7VxpFk5yvC?+qNPwKd2^qjurugu)1Ft|bz
zuGzek)+QGgVwKP`a=}I;PMF1EqdbmIH)OY41rJ-qAjyZkf7i4Ftgs%`F)TZ!cZA<q
zYd!P8urS;a1~|p|sKd+g)YoLY4V6(pvrOAj%?^$l(Xe6)x17XID}KbO`nr1;7-_rC
zhX2%!p^9{;iX_$(;_g~@r~J%pSm|G9S8<PQTK7E}u|*Rf4U3*hSx%tg98?jt9zDAS
zcEVLvX8WNeH|@r<A8V%WcJ`!1&0gfUzr|0)ZHGA&S)=c8g2UnFaD~D)pT-fiV>u-|
zzmQ#vO0KA1(k(7|?gzY@1yHG0HZDHUwH8n5gwcw@>=r}ezj-DOSi6bEIlh1Z_3yA}
z`-Qp4NJqyAd)MAR*4!#C=$D?NsBhT3r)w_^8&`Gq^|?5R)DbN7K&?Mo;j%{@LwDVG
zo%rnfdK|x?C1jeXhpl#uD|gps=|AO>YbPh-i|0bNU*W>xs=vD9Vu!XN?pZyt&fnM&
zF(NJE3IV3y!f^R*n-5HCHh|erv4_P^nY=s3WhaE*Ey;E=asqRw;-_~+{1J+4%suAj
zNgt1t{={Pi+E-H5b?fj+sa{O;N#N!R+xbDMv|jW&g^L;YiX)+V@G&{AT2(uIQh{Ug
zV=bwTirpD%3tEYVfG88$N`<tsu$Ft+Z72;q)#{#70cvoi-l1xi`e>j<#?H+ue#sw3
z=(y}@bZ>Ld9`}16<DW<9Hc&5@KIwi=xR{yhZQ_ct6`WgupmKq;oFt5vfxkuj!s>Su
zJ5^EX{TND<8_v@+GsJO}*0a;o-_%qkHK-@^)9T`my|sI%0l}Q@lr;8+vNT6WqPNr2
zxbd`SiF@8_ooSs@M8ZmME7(`LF_dN<(ZqJnabjN{kR1vanj61QYx^P&LoTdQU(lp?
zJx|)MX}a~Apa(Fglu_NH3plGJx|`Fjk8(ww*8slDfclbOBCPps(OT_9%3@y(clpxB
z<6GLtrJOTn`5hfaIRvx|rmE8`4s&WI6*kFOJ(Hh4f<Qfo@QhCYbX{@{ZJS9T&%zrI
z&QrH8jf%%$RH^VpM-By-?8npXCcijO{4$R6y;nCj(|0Z|mo9#esf2Eq%6Zjb&K=jC
zu0mO$u_#P@JI?C+G5&UbyOE|_EQOKv89mnH#^NFw)j1Qi;qN846S(9W1HChPGL;NA
zj3@-7Wmh++9V2IS*{NPMlv>WIz+8DtW92){ak9I~LOaoP^~=eXh?cBwF<~v<-1KG(
zvY}Z6Ig!%04Kpjs8H48FrajT$CRF=mwaC|)chU2rSwA}~nT-m+c(2<)dqrRg9j=X}
z>^eO=`hE{=H<c$!lj=;0Z?xk8YH}|f|Eh_HX7X^A`h(YA&U(4@opaZvCqWCy*0B-6
ziU>?2W9i$iyD|J9NLm?OTypV*j$-W2Fj#1<Pg7f$XXeUV6?|OuYkBG;lzI4di+Pur
zPAs{Q4m)dntAP?pq@GrTes9b(_0{YaW^V6eHgL@2wG)h%)>ES1>gO8sQVYSma8wjs
zJtYhg#l-AP4Z1@WmcABN>~*dZ>Go}YvUR|WS!Oe$8qLbuRCW08RVo-$O)oC8pZH)I
zQ5XI}ceZ=EyAXvI<d$b6@15}^;y-0~VLEN8+Ehx1W-g90g4uz6i`#<uLHrc6Z-ucX
zb$cDvtJ$^HvS_lS;Or3fDt<7Ivj2wBoW#^E@iDVhuDniIC4<%&?3|wEYD_AySvqrn
z_GRbq6yhHk`N&&Hb@P5}HioOFaaVxH$}-KWJI9;I72_R$H~L`5oK-_fA4zb&KRv?2
z&ejAt#m!UgiLdYI{7&54=rlYpn7>in6*3)l!B0B$ev~o;Nqo#fyx9O&P(jl|0&u<#
zDSi<t{&`XdcO~_WYRU%C`SXqOys}7bfRk9$=<@0Zi!CO=BriJ4mxAD~JP2g{SXGN;
zK)P3?WTnb+)=e{Z3_Pgl5&B*W;E?4s9YJx#raebZk(Xv*^pY4UzQ_&m<!;lQYzj38
zu|i;alJxZ4B?8q6kU<B5GTv@5hh3nrqvKN5%(crKpHx;WIJ_%rUuS+0|An(|H?tjH
ziDH6+zlV9-ca$hAtY2B!xo1XFmW>`=ey1bA&~7A3-5JeTb?xNk4b(j;zKOGXy1H+K
z7AAI|-mAdpl4iU$^x&ny1?Qb50KO3M$KY;zw3yw_(WLJLetf&Os}WNI_raRG*KBTc
z?Gvp%GK018r()KOc&DLi9T}^kq-~Wot3R{oWddVkV<i@-Z_6P_Vjhvf8PsPpSKaqi
zO+f)qYkkKB6YU9J+(&(RKwR2Xt&@B2l?r|bfT5BjA#8ufd<uZR3D5U}nPhOzC;N`z
z&pw6=v3o{FT8p}%upeAwsI9I6RT%uGMe1M=5XFBbXZp}Ij2;1T@hLOj3CNX}0`9r+
zy7S3H3yWMpg&%_MVaLli@Uo1RA!-_+=C6QR`Amsk!|x!0dy53}oO#0plwa#xun&W$
z=|gx$w9)S5rzmEmqOq?=`f2MwycFtb4#27bffkifQByaH-hVtm3~eutuWf)CR3JEt
z)+6pKi^Wpo1^0`W;j<G)Ilr2uotDYK??qg+wNAA@Zt9#6m}N|8?Bg_8QttJgfWU+W
zKmOo^P1Gd5DzQA*@yL!-Xa4M6E~pCtVBdP=6jThj<rnYgk_GFCC+<tLsQT`t;jegG
zGut3l4`=ztCvR~!v}k3LxAF;MzWH#W5mY2vK(7U0#1mfPQ6*<+RH`E?iuPz5yD@=s
zddmMrd1}3QaoNF{+iJFJgItca*yg7?!c6BC7K%$tOM9t2D%7i3j?UmnXK+D1f`_|S
z#ov~up0BDCI?JIfNGUrgrjuMTuf;*6QwYQt%wSn#QatEGq9ZhNfhg>Q7vt7|yCM|U
zCS>&r?WaIeDA~e4$7+j9)licaJRvRhH_jX|V6T4wj(6q4xY%j`IX(S;anX3@=g;8o
z?rx-o^(V1s&Ge`Me-C*8q{XF+QTOQup}STM_0GE%K?{GQu@C`pAC7#P?YM3Iq`)wg
zRXOjfL22;x9&9wT@A@(_*nwD*die|^YE$Ip!zq&Q)oTctzCQwPw)h8;B+J7ne9YOv
z@V*S%@mn+nV+GZX5Tm)zC1Sz?KfDkoN#=q(^HZBz_gn+oD9Ph>OBXVpBTosG{#Ib6
zx$OC*C83mawSBl8RrB=&4f}E!1?tM9=Asz+;JZIq^&qOpT!j%XX2|1?a)oanCOCa2
z8To;@t?%;h`KE=x+JE|g$WE<CXlHI9JGq$tLw3TokZy4$1b%|)(04t24E<9)wBUX(
zc#))LiShh%4AcZN+7PC@^$EYtqAKX_^otjBok)=}j*18BrAz%Oh-%^#DR`@9L0qRl
za?Q>KMMF5vsY~H2%uvOKhb6o2s;{XTfAaKcMUyZ8DV*{`F6_<U;Pn++Ko<Y$A`m(S
zsGBnBYXyC@zn>!iEbwIsLIVfV&*X&9;7Ixc)TcRiIQ=J}dtxi~AK(G<;d%A#<EJjX
zZ)uUz)6@F^6}3`I!&qovhVRK;aP%OA$-{Ix?tqm;-Gg7a__O9IkUM;K!-L}rLn9zc
zhq|$2$O%JNST%g$t4o2$j~)9|T>PZJzyDKVA+M8I-6PUueja*DsLcG1Fp7n72VIRS
zfY#@7N6ix>7eqM#%8%VBPrB^NoQW_#|InB`2S*mma2+N~HkZYO5g(@h{Y)R=tvS4;
z<am_-?|HNnX_j`GOP+?Bqy(tbC1~`xiY%$?<)f<(_w8gcAXoI55_*(l)DO?Mbab`1
zQ?rc~AA_uxNFrc&z`1n%zc?2&gqFH0d(8C*Tswd_K@9&p5XrTHaTgArL*b}u+10?*
zZlJ4FKQ>?$iI9#gfy<_c$7<iFfl-gFlu~MhKt5wlb0L0)WsyLM5N3<=!h4ax&4x;m
z*LM`518%@TYbd+wgL;g2MP^}cE&%F~1XHy2pdX(T0COnSov9JcQm?pYadz-Gdl{36
zu%MtX%z;A4ur-E~rGLu+I;Q0Y3t?pB<A36Z;U|SUf(7@!yP&el>bH8u#vtQ~2fOxz
zqMqp8d7a3u*V(5$U#Vq?0Hk3Byh#p7@cBPJ4gRePFuniO(o&v^is}<U_Z}V|5i{pt
znoytky*wPoe@xMa!hwTC0FXyXWsG5<B4r@iEm`s+XZe9RsHr8zo`Mzf6US5SH{on-
zu&qNfph;`~n*%<C$4vqmIo9MO(31bC1JYb2ta%^CgbihDwUT@*b7}czTX%c-B1GvX
zU#@$e=MSYbw{AP^Iz4}D_v{`N(FH<rD^JA5`^mGP54F2X5Qz%6UBBJm+k5rPmoJkS
z3xD@Kz08rribt?iGF{DA2<7Lrllx@xbC{l1SR1ZM96{sJ#KSzSCldYARo#v29T)Mr
zOHRx1J`FWCFK-ly;(ns3d>|b+)%aUJFwL8&ssy9NuNoAgpK^2MR)2=U<f{D_h3ep1
zZR&|)s-R&!6Y>r<=6TND=vVB$SOUK@52Rv#*+l7u!kTV+&>i3}``@!IqSpuyyYc1v
z|4UAQYK^snx`#<Ly?pcd=wZCKC(gF`c8*I{zPg~cDyL2Cqy)t369lm0xDl(kz$hcx
z9s1kRY>*P!rnz@ZU;d`l@z&26^ooES_<OD%%oEhnM@KRDJY?z+8Ac8soY7SmVtZl8
z%a*;wcbkCzDaX^hXu(>r*0Y2SPRJD_$nrY`d3j}2KW6s4W^;*nj53!U&pR0TJI*k(
zX`zmN`5S}Z#}_F#d#^<pDdx2AtEA+vd}r6oPaTj>U4T>Nvk`6KUh%K~2c@mrbXK>i
zFhR<z8=dBfhAB7NK`6^-9G<wF*KZxGHdOaWU^>?p{b_wguJC<Ot_Nnz1Q6F}G5x=Z
zS?R^I>jyC`$+sd{MRpE%o8(#A*oZqjSLPKI)Mkb}uTR6r@PF|}&SLoWnEsY@v(hgM
zCqp>6UGt79*>F#t`#y7DadpYKV8*8*`uVG~ip2$18sFKg_*gUxQwLICn8mjCdp-&>
zPfX}eRU6bQ>rs3*fNPt&Tezg|8CVvj_Ody_HsV*9{qj^UOWj+)Og@X9qbB#NqxbK`
z=qs50o&qQ}0>tfp?B{1tFk59;^4zust1x#3_(-QjoE4P#)n-Xo@yU}J754j}*A`3q
z%QfwhrTG>f;CSA8rg9sTx0-nu#x2vQQ>vZFJxtr{0`X<Wk3MPVY2J<V^~tSv7fRxD
za#IO7UD!}=xSDUEZZKJ(+V^-4+NW2^@`QPYE3>%1a<XaeT8j~5OW?a`7UA&bxq$zS
zOIW#JlRGKjh{G1*0?Q448U4hxjX-U)QPWOp^<*#9wTt4PID%gJSTVz0CB<mslJ6A3
zJD2#bPd!3~ZX~a?O=m%b(ffS(t-?Sz_DS5?q#?WMm6w`v8M2v64HQCC)do^^Mg9r?
zLMvemGHmNypZt3wu8GI!Uk0rbvPpa<KD?{)Q%86^lsa;=Qaue0^KkG3pAxC0tw8sd
z3vsv4?3mTvczC#Fi1Xns%*FREE_)mNkasX{G*lu&Dn^wqi{7KI_v@z~rFX4#)TnhZ
zr|$X;=nPdpYU!L(iDt=eGZQ!1XEw|56si-v*TuGIP~5SU`nA}9DK(F-Y%<D$TmHLh
z{YLSPD}k%Uq~4F#Ti;lmLL+<k!RnGvNpNNRfr!ZKo7~*>ZEZ3Ow^yA7O_wfUT1J(u
z-B4K4_sG9l0wsd4gz{`*Nq@W#T26$ZaQT^z>8fC$*|HYkdl0A5%bZ3;O;Bp`v7$n#
zugo^LDV#Tx)+0`T`$3HUL&+F@!UtCPtgt<?{YpM?SsKlEA(W@4rq+7-vUkogcIEeq
zx519g!qU{nT_&0Q3p99WaTJE(=|xnGzUzH7vI`#0gq^LNR*3&#>wS7Lrt%0BL9)Y9
zz5!6+@+9x0Drn?4f*2SWI<Qzqi^--B1qB5g<TNY~WAypx@i9Rz^7lW3%ms_*hdH>V
zI&AxPf)19n2k-3e=b8&104=&4qwM}TIr%R!NYIh*-(CMd_TD<Is=ZwsRRjc)6hs;%
z3`zy0VInFmEe$Fu(%oSIf`lSn6Oft+C@GDyly0Voq#})oQUU_sGbXO}u6n<<zkSZ$
z=Q`)?f4mkh=U~h+p5K%AbKf?VN^k*y32EC0g+}0@{Tw_1`#E#61NR_2*vGBOGHJO}
zfYTP3U*0gZ=>uS;?C2bsT_5(qpv>v2yQq(lirkOxWkudYzzO&v^)U4aZs;m+7{JKo
z*o!$YkJi_=a~FOSxUl%HyJP|G(Zc`1pe!yA=!KALROeF(q}oDRB+q{Wq7EO5YW#(z
zKb%2W5pZiZqcwUU&qRyB7dytzK%1eruP^#zY_Dx%(GP=7CsqRleMJIagG2&@`xYL&
zQ8B{o#^iV@dpi~P_lYX|to;>V1?4p5Ku)%3lrKH+W}*<}0Z+qEZtWq#bf7C+NIi-3
zU&UN#o%<||jE{lrP{GvH^sO6z&$i=PHA#;pG=i!F>I6QZMY!VPpFarEnYRp#Y=2%v
zz`cF{T861P%Br^N^?gQS&tkPH5#VQQgwAz#c#+Nj^fFmZ@iH?rd%L??Ra8{Y2?~B{
zABECNm>ND;{vY@(K8B8=`&hLsn*PAen&bGGiP<Gayje)6vgeiNai+5fM?t%bNmxQ+
z!A@?LZto^tS;2z!53UFeT>|U%`ezFs|9B}6;xYM2+N@Cfm{~v_B`_Pibvhydw1Xjw
zIIDWX1tvz%ONuLfMfk>Vkil*3UK03ggr&p$$IZ0Ftc5D!tZ9(VNEP<K%1&Q6q=PDE
zjtkq{y)~F9nf*@j-|xH!WD?MaN5^pMv;tH7gpTs_CmRewML%*v7G(Pvo24F5`L<&q
z0A7`gg(MZuX1shu4c$$9`?%E9)cc{!KTQH~NWu629S*5bRmGWgOHhAB=oTC@JqGnP
z90AK8VSp+q|ATg8bw&sj1~f)n-!F1jNkfACFAqY@uN*T&rt|zPpQ8Ewzx8BbvIC8B
z(|gHhZYZ*~r0rI7h=EP5>>}4*Mtc<Q;HZZdE;KAblLR~eA#=~?(NXIbo81R;blGyd
zk-7F!S2wqy5*Y*>#mmdvxx!7t1eak%jOl;t3Zu>j_VlPjizg~On~RQ~-pS(2Pk5mI
zd#XLK<sXmxZ*^hqDD9p%4G8#xIPp7o?qp2!KrvFNoFayGx5(L41>ec|w?`%(Rknw3
z{qsfYs;h?m@wD4|;O3Ex{@;K9qj-E%XP`Da=yL5EDlsW3tDr!|%iDWG$K@$UAZ9Y)
zg$Jv_A{CZ$3HHFSN$DAxxU)wLE|Kf7qBHJ?QKA>|jZo+KZy+UHtBV3H`6uN&CC(7x
ziL}^{4db4AE;sn9DtbFNoWo>2jv_nqa%qV#5EFbS_HvwPRG(Uw3jRkZecm`{;G~vN
zYxv3tHndo{e1Q@<+zsFHKAoA^x|rtrHA8F4K7MV@4fZkayA9-T!9JbS3lRXu{>6he
zw<b{*9SrQm9o_zm;NPXZ<>Z3@nA0i1ydq>?6EXboqrM%9VEAt@Ki*rV3jweoy-uhT
z?wjfX{yPw8HsfG^(gI>6#pT}j7bC+0=bD>CTwGo6`?X!>SDu`Iby&OYA^Sf)c_OV#
z0NpJ+EpB`-oFZ5M_)kpkAI$`Xo?Bh&@x(=*!-tU44z?w@po8pa@dcP#Ie~WdArLM!
zL&qs)R_CbL1+bOSzqOU$#o51c&1YqWYSW?oE9LyV+it+ec<nrgp^bMf|6Zuxw||V_
z&piyxm-Z{GX(+6d2;>=meHFhoEdG2EUw7BaUAX5=*}uzEet)`o@S$75g`mu^`8sV-
zFY@Rs7M_M`zwIXuyR@{l8L(QvSuZ@-zed{MJyVFmw9dyMWjP@BS7#321Z5}@#p`~5
zj>rQbp>rK-J?gkOaTl_nBZOJF3E_Tw{O0Qb_kP60sm9x);m<7^HE-WOXzC8eKSD?1
zg`<2@GXm?M^D5b+|8|E@4AX_RtlUMpp0lQV+8CJpQpP;423i5D7P&Jg5o<@if;_`^
z&*9!R$l3c<_`@`a!#)4kCIqpT<}a&xH_6P>E{D}ws6Zxgi@kLR*Fg9M0I=cQmwt{j
ziC=iApVkRCI;>R*y@;aN)9YcVvet1MvcM29d;B}qI74>@EVaH)V@Ky+*f?_vX~8Ib
z4!INa;$+Mu`kx+a;Ne3HqqPRvdw^UAi!XbSYb5}e|ChT}?9Y9Inm_qfUtiy1V4c+a
zxr+PW;8-`s9U#aSgN&43snx?vN-f%)YT@>x=gv{34!CwEivrUBXnlq$C!}5naT!07
z?ZcxTSEs>c!?z#@DGH9<^If_>oh(atVBr3nc6MZq{iCC!zopiE%+0aY7hUoHY(w}M
z;yb;#_EUf`Ug6Z4YWFX(r!=#hZxfKzJb!vgj#%qf`r`O`BgrVHCmBja9DI=Jh@2wb
z%SmA)XRZa+qk^;fUkIA7Q=KTVzJrg^o^*3JWo%xIAeC%G7Dm>Z;w5nj^hw@pc-YCd
zxHeEj(X66?IRuhBG@~Jz{UI~*!Z8!pp<5s^HXp6={K}oJ+xvsGZ?Et^Wp`>M8yZ|$
z{0ECUS+SJQU`9~Zkcq2$0tWE(JOw_kvUczJ!9*g+wJrZmQ`*-$@xWkCPezd*^wT=t
zhZKY2DiouRNViW>oeX)f`vNzSSwlyw6>R1>ar{RzUuZ~2Ler%;_Sd59-@PU?<v;bl
z2F0Q66?FUGeirFno-PW0xc8RLE&>`I3!4$<X3h7&|NQ;U{omqA5uC7tgBp$p^<@SL
z0@YF%`D1kuu?NGCJpfVVuLa~E9kXHkbtH|RX=n%#6c$!8Fi2WiTl?A8YTNJl6;(dW
ziD4fPhffL1UXsIE1{D+-D#spA;VPCIN(Z4}5NJ*TZdVoPr%Z&K^NJ~=^+{Y0R?N-?
zF0j%VU0{W@9Q%vHO&qy!!x1<is)V6-IAaCNC5slhsm;KlR&ctanqdOv=58~iw+8RM
zMEcDF8_uWRgoYB|UO(|K<_T?7k}jPpr%A;GQD=)Ep{HU6{Tr8JcBvowT7gLgt7k;&
zfhX;lYc4jynG~`l#=ckD38*`!?y=3V3P6(bHMlr4Q?{zQI;+0kMBl&w$*-aD{2%qu
zuw2nRK8|ir6jGvRU?}-D#{fKdqh1Mcc*ViA^F&oW*5*N&hMx65r42CHI1V^3{TQaA
zSC=6$Kl~?qM-dXl!ios2dPRX;kywd4Z1W@4ytw<~<Jj1vme4<m0P~N4Rl>hHsrVRU
za+bX!wmH6GW+qoqP*7e|Gx{(osc4BiG7UZnH(~p?H*qEqlPYfaf2AW41&c%22c%XH
zM%dKC0|kvK@vvWAJjh^q4WRO?S+2<G-|r4Tj4&~H;YiY=3IoDK4}<ZaW%<_@=6}Ys
z@)<vFeJ#VO=IW?r-F-rb{1d&}GX;DMI8=RCr}H@a@o@;T81@4)!Z?G6|3#+h`t72y
zJUa1YPJOr|>lQF)4f}&t?>0s4tvJLdLz`y0J%N2|{Y#o5d;cEkc7rUgiaReNauVvK
zb3Z3YKTY<JLqq%7wm_S|o?X<{(FY!qM0>d$nhUYnLGj<SP6+wErdB#9D|E>)EpZC9
zcC&Qm)>}wuO3r62kmHV$Aw1|88jo=;?&idR9kSs%-)bI%a%EuVdt5f6<l+zAKi4-l
zIy!MP6*eocwCecUp^y^&r$Q<KrH%9r^NT06-w}a@o%=^CL;sF`;;Q9ZVM&QVGHDfb
z`e1P7m~Emr1DBaR`L{*b?=r!_2gDB|A{2jX$VCZwHy!>5&po%%wDx7}%WB<TVA_xG
zLkp7$CPqeTNy}LPD45Un=hX>_W06Yi!_V2uA1g6Pti!{@&p`|0PZwDo!A|bL(T*Vd
zr|<ua!r^Q%Si3a(z<~q3!^1qbwzj=PLkz!W7QehcFz6JQ8v*ejA2WKGh6z`W$?HOj
zcwH?K_x_eH|LnW91*3%Ow6m3}JXkI1lT&1&OhePZ9OuKm@juR`D8U4Yf`Z+NwImr!
z1*_TvlY>s%9;Z-+%cdU5!WlTsO7OJ#6k}?in@8gQ;tefpFx}XR&8K0sS?By4e_Cp3
zweiR__ik+C>;Dc*{qwx@x5gNtQC!^IfUqPC;U($WF0e2KgYEyv2EzZl$~Y2QzQ>Z@
zYaV~9<F+<8%>YVXychkl|NmOZ8FpNE`!dQ4sGf%eBu70=^}-^ka^GGUd1+ha#VRg;
zDUJeK#3&(04SHO<yM%yaq60IMaQVP}lBh<GEBaVMVy*nv&x>zFqL3gH>kE!@A@Zy@
zE&{0O!-?O^8UC7p`~_tJ1I^?Au`U|)HFiR%p!<R;<(O70(0YoNIOv3fFl0b}PV;;a
z*HQ`41Q&3H^kB-~Ot?V-S)V(R&ea23bqeT9`4qXAg<eFWTp>Q@&v*Qs_d!wWYRNN0
zjQD_kc&Cwm=^v}P!&<lDva#|16<vq6IQ}@`D1P&&5k-~b#N}BSdjn5v8&GKZSh``5
z20|kY!2TNR{)-14A3Tka01aOE=gPJdtmXYp`{Q@L^niOmbB_H?e_eY0KWHG^jff%Q
zeg8A)_@6<?Kc!XI*#2kG@jru(|7Qjr*f(!3(=juzd71n=N~|M!L7%(1IOntrb)a1E
zKQ~n%=$am&H+3h{GGz_dfwrjxQ>rX0(o}Wvy@?~EZGa!N-J60^_a=7SdqX<^_P>Li
z>63+o9W0*YQ)hzaEzGm&PK)7&;!)2aSiV9F;x>@S!vpd~A3&z+<fulhWyBamztVy|
zP)SH_LluaD@yFGa<h?eT2+}5NMs<_oT1yzn0<=;jdPu2qxmsuKp-r|E!6dnE0-3_@
zSu>yvgpL>&B*s`#=c#5yqHKVu4EL85OKM0IN0X9~wZb@|Jw;sSR|cH_#t|~4Cv_&>
zY>PV@d#$ql?0+2bCt!zTq2N%%(v<+UV8&Q+Pra#+QGB|!sM9~if2e(l0LpgNyea<~
zCHO0|fAU#TGJ!xS*aD(qW<bo;xiEs!&Nl<7L9uBQnO>Dko&an@%R#J_d-%@=^p~z2
zjC}q#vuI?pEM4xu>sPdXZ?7}QI$n+N_(5N0MpS9KGld&S_g7N}^N=Eq3`imVrWL}a
zCi&s+kx!u4wECYmeO)4(p>h>aX6AsMl~`Ep*8LS@@key#Z=JUPo`GLfqlbVNr#Q6d
z9%b!Zbz&VDi!W(fgywaXz|j?aq{)h|f}{|;YH<sfHZDTT_JzRF=vm^xIkWzAy1@rT
z@7!tS^Ze6PMHxM}H84qKk-j&q5Tih*9nJrrn|7$(L|1`!&N~AG7(LI_i-hfb?Duz!
zQ`}Tn7%bg1RH(LAQ%K`JWpLm5?Uv#F*Je#mG#c+4?_y_`FZy;*a}O3NBLC=pq@wXw
z%wq)owqODnEZrxfT+y>7Z+l)32azAz0aVn!54g4`ASS8*z+ID0zsBL;*RIyFF2ABq
zA^%Jl2PZRq8SudN-Lsb&Gyq9yFvGuXqW|HmFpL^?v8%e_TlT;t^89`O1%P^nkJv3u
ziMUnkpRw~VGvxGr>-UST!8-q9hHuDFVQjuxi&q*qUA=YPl#4-34Pk5eO`$D`(Ob``
zrk~bNou(TQjQsrdP5x4);+w9f0mD40IEI|@+hiZQhG23NE54wM!xllUp-lyR>?d#(
z2>MyPZ-$f|>ITnl8+-)bicp<EjpqcXy@eU72RBavTu0ipBem*;+Oga#E0?R6!Z$o3
zPgrwf)aOVYD$9wrmSsQZPF!3sVYb^?O3Z%}d2gb{@!OLSvDhs-b?fbDp%Zn}6OJ0z
zX>A4hpM+*6ZrRmH%{%wJ#>bpUcDUdow7xgTK%ohd0lg49H%SHBNn}F5p860%3>W&O
zPa~ae-7v$7#(c?6_6Uxfb8S*cXYrW8#1Vv>vxy}1P^64){-dmCJ06kTj_0=ojlHKT
zo%piW8h_NBb3IYTi@jXwDz)aBu^lO8((--ddty><SI+=lpmk+tz$)$_>CwUjdQ<!>
zkW9E+=nzB4{WXyH&_<Xb+ZKvsv7dm$(PZ{K$+TKoPbMFp8{W$<Em45AO~#u>zz&Tk
z$AIoqkil&=w)J<zJ<GGNjTEsgMRxkO)!l`Q?wrgk8!bAD`bVqomA-%LrZO}?P~`7q
zd3^o0^fl+{eb{3C&i<Z(Q06;XM>Vtio$t+ibV#0h6noE(t)Se<N@u;RVjy|8WY#^;
zRLyqvn836esYPxptc&Ms>+OZOT->--rz?H#Bgt{1U$XYF;2cESMhiEdN>S#%xpCU0
z`SpQHAA3(iXa)hi!J()<spHkE1W;<V60F>o$-2O_{PLbLZ~tBCY}e&v&vt;*DH|iq
zcP!S)Ak}uxf5(SSEv1H62=~r@!18WzPyPL$)Idu|$`xz5)C#Cd=aU>)QEnsQJ8V&R
zQF$osMlIjWyb2yzSV%@)cfd5gLkL7)y`5RWek#ABMATn_%2U(cB{Zt_5(>2TtrvXf
zjrce|n2G&hcEi)IZ{mMG>dboLvVts;7LVE-X?D<4<YEy4#M&LfM>j>PD6(<2bur$Q
zd+Nsc5-d_%i^#eH1t`Mx!cl#84=jvoxQ=V>mi)26{fGc`L~Yf*y{lJNR-U}ba!Hzk
zj#(XJ^B$mXJW7_VCBI|f!Lo=)007*k^0(<{;vJf7?w-NQyYzXT+P9C-S>d9L7JzcS
zfF}*OF2Z1<wzbjpD&>PTt|^Xx8gaIf*q$D?k&%(!u`zz2xIt!gkLZBXYqMYQV6kfE
zC6f{kRD5(+kC}-N^~oQoJA)sjB98m6=|Ofg-IMtRXQD)a&;wk9n0M;VRY(kOD(@BL
zT&J$pdD&o~9g0AMH}S-j_%>brEpoE$JC9PFqy{EMaWlYqkD9PmIMjO}_+O;o&;p1M
zE=#?4RDss%^7kT%_cnHb29FTa9-%}OYe5mh)^PaiF?nhkR`WoRD$_|_PM!vo*n;v;
zOye(<fAoDK+87PM7)n||`eCq_f}BzK$x<gK8w0>z3_JLdbFwvJi#7y-xedUYa}uaX
zk{#4Bxrctbf?P-rVdIT93)4ZwS+5P3(fs|TX!(tK=$G~w?^d{X;TUz<J|Gu#0yI9_
z8V|U3pD_Y!DZ*tL(s|W<uNwoTU&N3VzrDh-WdKb_Mp#PRgB+nvmLog0%B({75R*h6
zFq@ylI`rt_@=bh%qx#oD9`url%Oy6lVG=h4x_fG{Bzgh{M&w-wS?aS5kU?|Poqx)e
z{yb=2+mToTd{9%38G`G1#AMEiM+b`n9pmU?CEP1|4k@(hS#=^L>a*TKhRj~{RX+&_
z&*i2L^HIVZoOM+yWNtRj?DfRIf$T)w%{Yc@Do??O1%#h000|#p9+biO<JBULva7ke
zT*Hj;l6|rcE0M%ch2Q7fd)u2xOB2Pfn6*2)(dpiL|3s>8he`=AJ_QlJ3t>A@WN`s$
z@<V{L$w5;-^5=?`D?0#<{Bxnd`D!_)(6CYiWH#v#SSGC3a05XHG@eh<d-s5K(>7i_
zA#fVT-#vlm70I+#H*XRjzyb^_bQBgBfyZqon678S4lL9OSpRj34*cA?38aUF{}+~k
z|0q;*A`J3ie)db!C9rOk_*&h3Z)fr}<uN+Chc92sBnaJ#6+D0bsz%vjYZXvQN_d<}
z77_>Ar5^GiUn2y4(M<VAP|I)lTpsA#A&n>8(SdCtNVYBRA$XiWPmzBcuE!#cJAmZk
z5Yw!jft*UQ%@61V%wTowlx_Mq3vnD%o{|W#Rk<C(0*{)!)$M8Gz11cScquBBaoLo|
zfa%YbkumiGt2xwTj{|Swu)O;M6^Q;YQ0eB-m(Mu_iy)5Jt6+$Ko&f&to`5y4M~;OT
zA(iGa$bxkg9E{~15uA?q3rAd=V0Dpc%<`2b+z7IPlt+XRF;}{+=F|L+*6s`_i&X+~
ztusRpxiD-qpv91U)X{<)ceTYs2n+^rKSi{BiZo-V``~N{NY*SQH^RRnBPTOzLpIK)
zg5#N7U{Hen@G^Zbk3}%BxM9o6UxzITPz165hQk2-!s?Cq8M9w^qhe{{%EpI}r0gM$
z)f=#kZ(z=!C2~00E)nJ0>oSO$nF~zjHD4nPyaCVg#*w$IY6#P#wL%FKeki=!Fcn=K
z&R5L@#&v#?!hx8vi8$RaqEWa#uhUepM-&qF3aA3nX>}%`*IjvDG#E!B_k<4}I0te^
zlR%yn$6)tD=8DJ%{7N7t)Ewhuz#%kH4KzsDVek1DE;RH?sKfQaYWJe{H0iXWAJu1$
zl`R}N=)}7^=q3r(@7v{>k}qXPbt~+EnI+zjBA1@njxsm30kqn9fLHqIy8UI$i8ryh
zuwRUT{roxpX}%bX%oha{7sBk}N!5iP+wMSm*GJ`Id~s9H5O%Zx<L|)cuk$o2%wbX}
zclq*@{Cp+Y;yvPbW?_EZLH+b^;Jaw*B~i6npe27*baQS%$Q7vQMydSFogYzOFEg;O
z)rQl|jxaYhHX(fHlL*C~aWs$u#6y(5Yl8KI6&bUyA72;yY&dz&Dw!neL>z1(0R_q<
z-@;6OT!1`OY@<ch?>I=DU0MkLF~PDMh0uy1^(jC=A6ZIF(0$t+s5b_(rNamj-HnQ9
zGnupl+S>;f<)k+sGQ!#B=qX~T1#Y#{j~k!u{Tck`?B5##Xt(AgP0>l~zA0C={$t#y
zsa^mJ-m3|FC_520s5XG~D!m;Tcmq<Kn%^pCTKleu7tC1NtZY~h14C-ixuKsg)gI~Z
z*I#0<C9lhFy20i!n-v#9kZj6u-sYLrJ%}c1c>&DsQzi~lO^W$B8aqIB*!d3m<N3Iu
zoB7c0yvc6je8uc;r2mhJ;>ih#uQ)#Q4%X7P{7v9TGw1n^Uy+-<mJX|YMLgt>kT}a!
z4P5R`u;2Fv;n15LoOgDNHQC)9Fnzf}xjdIR&;gXn9U>}EG%P@0*V9+goSo)8y-)5L
zL@h3Yrf!WSiCF4l^Zb_o?z%ti(Y)`DX9SKHb&vqT)WBt`qC1AB!+G+oBK0%x=CJu;
z5o|^#b_nD0W8v5-5AGV$y7M}P_b~-Pw&KsSrtSX*0`Q(M{7i70nA;SgmcBLiP@Wus
zlQcWE6h66(dTk>d_#F2y&s75LlNYn^T5T*|k8njHJ_bPRANccOPc|m2$AL3`@bP-k
z$azR;Yr|;>#qOo|R*$Y04xi6aO*5uGgD+6rx|VQ)>x4s}ituE6W|2-U%Lz4vYfv=X
zl%~M@Tg!>agn>bXfThKiCr}^o&A`UyeIZ}C<^$=(;)fim)jWwl4m>Qm|Mx~u+sg;E
zhX!805O0h&Cs>)jvGVTzC=0(DY28`1a^q6C&*Bw{y+u-#(V_Ty$rXFoxL6!N$<Uj7
z^+twH3K{(1)_<ZvuRVe<R1ROr;^;>?sc#R7*XMvP-*Zu=+7@kn{-~j=<bZ{s8unSg
z2d`0;i?U@tVGhHwt#5Jl$p-$NqSNsfFI42mp>iO;bIaFg=4pl1jHEi+3qs&@hqW(o
zhYPP&k8Se}l#H*CR&JHOm;gqhl9d%#W}kch7pQ!<lT44_M`;TmOHO1%eovVvlky98
z>SH<$?dP$kMYXk(!cr3i7)X%F+h&8+#N2xPL2JjYFnpXEZ^EvcUNw;p^KzpJ^IWw2
z*WK9(Bn|+>@--VMNI#4TPPnAyaIrOrYTa?y`mOGXrhQlIUE#;dF=dF6$ramp!*go~
z&R6t!K4?QcBsAHILud*;3#8U7yJCQ)akkPGbc{;yOGq1=q<?%MpA2D-hypGFSu)9$
z?q?|od7@_Ox5tJXuf&0F#NFe{6-oYah?K9Eg$M(aW-1w6iSe{WK`^#)3rRu{TGJ1{
zl9AI0k^EBKPR{hln$5X<PNHoN4`)8mTJ~71d?oIc+H5JcoN(QQB(1Nh%6Ho`79o!V
zKo*Tu|MF`xGBU-b;2=+Zgzi-on9!Ufxhtc8DZeev#}Q9JpUSVkmB%M`WlBgJBfXip
zg)Q=<+1cu__hUz-u=beb*7BBW%Bt$$r$)@*qcuL3TCF|==Hpng{6@G=eG^h5KK%WW
z_~|Bji_aE!c~-xZSmcJLvMg_O`Pcbu4UKh{^l+!sMugK@miZ11jdB5)k)$hs6L{6-
zjd#Aq1)C-f$bWhX3`YlhOm!pmd`auSzvd^4wCn@wdBvrdiiK~?Ob%8O-WK0<zTB0N
z*W{gdsVnViQ@m(oajJ<9Z@Z@(h#30yIG;$$dVJs0E@OzJd=(_m?n*bVi1ep%d;O>#
zC*W^htuSN(Ep#bjDF=#z9O<Ilnm0mrevn_uq1t)C`-3jqSNniulMBVupbo-7Ofsln
zaSEqP=dZMuYzlmby}|aF9@DG-QZrpLl3x}<P$S;E@Tz;es1}x01gl(7_0sKUZO=;4
zT~7l$s~kYBXuQ7fu^2`CMIxn`kbMF;2s^qJ??}hD{LAjQh1V<TlwKA+C|-As%}AZ^
z=b7*&)g2<{qZYb4ck6qk)aJ}Ppz$1iG}I%OO%9}iUdB2`CUf&A0IA)ACFb)mtI~1F
zpT_Gw?;^co7;~?U7Wfbi#k0x17<^^~j~eElurJT@G#=Unxd+2{{dXg6D`@Wan7+JJ
zt+=LRSxk`b+#>Zm5<4khL{CFcM`y0GT&41>akbZ|mgW`9ce6PoRaY#33D+rF2ol)2
zn@af7Biub-;6-Mehxuw*UfL%h3p$EuX(-w8iyaUZlP!A|{-WrPm1Ig!p>MC$e0$3)
zP9lS_E9B|stB)yxpK`1TcUBgbJ#tQ*8V>Tbspj4GM3;P5+PLL^hvZp;%H)W3EvV#F
zm`V0sb#Q!MBi3_Jj2ruy!9K{k^vX(I>W#+!F^~p9*sg^Iv{ohBpgu<2bAL1pLm%v<
z<oa_z_Uh7BH9cZ!a)&%ymV2!0K$p(W#Dg^vb3kD?&GG>sFW``0WnC-RSX3$ZEHPR<
zAOA9bISpah6J!nN+l=}6;)A-NnlCorm}X6#|BdDge4U7{)%sykQOJIJd{5JG8G;_J
zr@8Wms?WzG*RAwD_|%qt&h!;MQ#vA5J{W6!rX621n>@?zj$W?kc8GzhY?+hNFzaU!
zQ)$(k+i=U=)^0-h2q(|0$Iai-ix(}@FunPfK)`R=r+xFQ9jME{tPy+JzN|Z>2vm9|
zvckTk7DiL6O8joLzA5oen_fNBaNxThFI1Ep%-$&B-PX%U&zUu;96~rOJx6*q1%JuQ
z`5Yt86l9Cud`a$iX25>i;q^U0g5Fa{ljELO*-yC7t|Fkik%cA5I1>mhrzt{+UQpy(
zyD!!(cb=6b#}FS}i7p?%^`p|?3vX#Wzx-ipBt$OEriIZ^B_u@OEL7(X>y})Ur6Hvi
zWbH<2__Gn1cy`#DY*CVGb+n77eAaz&Q8sA4sJ?D#cG%RpQ<>C7DMeiE05hpsrjFX~
z*zWe&Qq80HA%qk?bfq9Fx}m$|iZ$Gd8Gb$(`&^B^_WOefl_l&%_dE!$xOb|xU6Re8
zY1c|A$S=_?+XX_uk#`8IsA26_i{{s)TKvG$VdxFM%wr2QFEXi$mE*9E-Hw8RNu7s6
znEky>A`4z|T2}#Y!*d{6<4mSumh6Nr6=&>9Yi@P~VGNRic^J+we0Zfdp>PoMQfI@t
zDbi0K`2W%fybKeh<|@rPy2!WBaCvo_`AW|3%(3i5>v2^VjQ6uw?%bsCn~c$km<`qt
zo0k+3Qdkrv4Th}J$!KdOM>@s(ia#Mijt_C9^hoe4x_PgN84aZs%aqX~Gr0DYZ2H%T
zuZr)LGk5~6sX|L}L25j6pXh_g%r&z#Vy(2&+<9-tW>?7{3qIilhQwN=cM{l+PG@NK
zpvsnRRnXfcs+oab&E6Ph@FG2aNcO<CzP-zub#EZX?Skcfl~Vj^=Jr*|P3)&}!|2M@
ztlJ~^-}A2eWDSKHCoB01@$?O%r_%he@9uJK*6d^qT(Ufx^!4O=8Fg)OO{^%$<|w%e
z843@gv`uvAt>pMaGUvG%<l{c0r&YB8bv&XikWK$o&Sa>EZje4U$$YXkrc%tS=NV21
zqjDNJ^*YKkwgu-8*>~JhUsr83E=dNJ;L06>=`f-E0z2-(*r1A@iFt5TK8x=Jn$Vri
z=O>CCC*By2S#+f`0lQQA{7uzEv?)SGtEJmc^|Us?tl4EI>3f?yj2PKI;AX!7ENW1D
zVmP5dFnJH<P{O*SSC3j6ZQVC}QLkUSTj*g1%P_72%$4(M*ujDx&KGJLd|KUT8QPQr
z^V@`^#}#=uh4eJJ=yMYXW3meU3fVI9H0OqMv&sc}#7ZP>2igxm2vX<DP0mdb>C!3Y
zEfd_LpI*y1*|vU1kg{#nI{%<wBdoKyFTn|x|Fcrb2EZQggx#Gi&l$olTPAXfh4Isu
zCr#(O-m<tj|5QaMwuKP$@={)?+e^kaug)ta@tP!7`wXmmoGFq96!ZhF^FALf+=5af
zc0RbX>g^@f>(@84)6F%MIEPOEKsB!93>WOQcVALHd#%+6c&!f)(Ch$B?Cb_Qvq|wf
z9mooJVS+WL%lU?{zK?V#m|hp+^2!es_?0n+11Y6SyTs<7E`1^NRh?*sT_c^#8E>}7
zQH5XkJ=>@{nwx{tuD7rsMfrceWx_4GxJhG%Je$jw=jRB_Gs66Q-}RpoTISY2zqoDX
z@Z1`UafnSG3?f7;@Wt3D9gy#f6se#$p39gX6`X_~iTLbd6RB1vZI&2_41Fu7CH=f&
zSX`z*2NS@-hnr&EVKp)O?KU#Jnr$fUt)r5wK|2c}sht_am5-D@YbLuoR%GQ6pJ`H0
zHdrn>u3yx@ku<TQ-geRd>df|wu+Bs01#UHo%Q+3NJvu3ePP}{E78en|^PrD#9_2S?
zOkHfA-p;)|2Fog|jebn^Fb&kc!&+twu>D8BA@Ly_oU_S(b!p2Bq|m0X%GG~ku?)ia
z8NF>!G^ld9Il3WPrfzFeuV{Ze)C#@1`>s0e-D%8D<b=u0E9(1sA@o@%snd>`Ybu|l
zy75dfoKM*>p6cTTD$`Tk{WG_tjx`)pKVq$$*L{yi{t2Jp<71|Z_ee=dxUcO`Cw~6^
z-ox+0c_f;t8=cGI>%vTnwaJy1Q=`sPbtif%{il*em&F7oN4q2iq8aaD+)oPkfp)f0
zo`CNGo)YCuXWPu1VZ>uAp0TWrZX@OMy*-)PD)dn^A%?Z*%?ow~n|l<BWN)@=&l~hz
zDfp6PUzg|E`rH<c*&m^2;w5PIO}`d+5EJxV`=+y1eHqjBZ<VGp`?u87y?N($7qfOR
zwr4`Es5&Y2k+7e&D;m>OKPc6wvuRjFleGTW+{nw<ppUP%+_u0})kJu}AG?<5B-U!o
zZ?9q7yMt}>c)}liBi0tGg88z<^8;ey4No)mobj+03MI^Yz2aB2OJ~(cO;-I4pcVNp
z8kqV6sO1`DSm`1}w4x2v1#YWMLqwS5m1Mdq%U@fVP50r6a8OU*_^Q^8Wlke|E7QW3
zTw!~~(HF3OE<(0b`ViJq3Lp1$%gZJ?Pj`-N0<jmZk+k+a)(N!h?pLVDWhJi8d)wEp
zUiCS`ORA&Huwrq3m5XT`^YTH2h|1WteEmu@qU&$dUs_UX#=3IyQ!cd&bPe}1pk_lP
zYqOu&Wxk(xvu^N#Zo49EZ8N$WtYm9>>DP_ME8cP3z9%7ycZSa5merGO=b7$wk@j;(
zk-YRr;v!4Sc=J=Wf$Oo_&P+!6yQA@aovXE8Il@U^p@~URop8`n9wby@A{Ph5S_f9r
zn8U=V490}p<{YnG2|Sk*k+kl@(j(P>%Cqr=WU8RyK!8Ey>?gl0b61lAhTw!-spqR$
z;&=LbioAZ@@_r=TdhUx(Qj*lUp^RnyxTm>SrP^+l&np@iUy#*cVk-*TTvwI5Jx}^_
zrRr%_+KCDQ&(_)?4CugK@@bzti)s1@T5krkU1sqG(oQqES0#5=>@%KsH*F>|C%ISQ
zD|`zwEh#C{(>-Jmiz+*#e;P-WgKAJfeerhq)J&Np_KnX*>OjJmAkTrZ&Q%~fT?1|o
zcGlOF4irngYASxS!Y0Fg%P!9>MupigMrSH_c+*SFJduf_%QGLX?i+qkECKS=oXYZX
zJxfja(%355FBEo{o2>PQl8R?$pSQT)BvR`)tL9^R9eX$~`m>Vs+wKl!`r9W+d)v6u
z=1=j6-IRaqQ~P$pn$}d+c)3`=5`?Z=1PeHwkDxcGbN#kg2i#ZQE@?%+w9c-wpQ$tY
zEa_}JY}RdLTe)&e?8czE^qG`=VXJel^M1NtJcP_wWJ^wEI+L1)CE5(E!n@?#yv{aQ
zw=(g`F?~Q?g=J!Pvcs9Q*tOT6#U?_}=yKP-T%m;e%DUvmqR+5Bf;Fi5*+QMiUs=BE
z$9#5dM1SDX>suCg=Wawwq`}j%2HGM#4aWTk_mAOeGmT`YTbDIlk<*6u`)iqyh+aki
zp0R3od!F>1$Jt|jVjWMQDf>d;gN#$*yu8#vQWwV^S|k4|TQ7R&0!_0HG1t_mLu(fD
zEq#4qWbDOH=V3vr;?Y2iO5&{PHbtrZSl!Sfm)~q}mdA>rm*=u!<I*RgtE-O=xgQEF
zEpokUTBab^Y29r%C+y60c75why$*M8>3|P_?<ZEjD@1%0vJkU2RQ<dQn)DwjA6f`j
zdYat1y8QtqN1HR&$Li$gCsmG}P#C!0$~j$S_^om+#d!Ixt8_QbJduuO#l}}oyKbhP
zbx;D=X-!an;eN7rX{dZ&qw&)uWxHEKCG!z6dYz)}#AKJpcC{=8sa|t=Lxq{y1Jy#S
zZxfRk+&Kqgd~7<oy6ex@DbJ#`*Y&ve90-L~K|oO;hU{t2!;;yqs|DE8P4DAoEH1K~
zOQ5Iz4!@0a^9El}*;fuR?b4U}Ct*`Vk066Ym;dx5zq*+YF6sN1uPaaoDNv6h+n$rp
zo0y|JJ39>xDDMeUjnv3zJaoqCVuE5gABd`_VZ{oKYftxBN!-UH0=8Z6r>z&+f8p4q
z;#YrnQdQ#XU2VEC+Fskx%c-rv-*6)sgzw~$-C6*j77eX{BfFGA&hZT>mRb<ndmw8>
z1Cu{HV5M8DCh0u|HhWkXr)RWG@89OS|LOwLS7pdMJ#endB;UZ^z6jQWUvFJ5<aT(<
zBSvGGG4JqZTkozOb)BM%XN1RYkEu5(etZKp5wY?rzpqyP#TWM%tgNpiaw~KEj1ASB
z?i%ZC+v0CwEI0MNZ`&KlmV_!;>GN2I=D*VVIzF6{JMQ`YL-2&@GYhsR;gw9M+Vi4U
z(<(h&5~4k%&k(GB`Of11<9h13y%ztzz@tPn({9_!nPYJ)0t}iP&{k&Jibr87qdpP|
zo{ak8#1J04%-4O>q2y!`iUKxJg~BH*^w?iA27W|3(83m^xFW=l<YFeDIgmVMl;|Tm
zQpSQ1j_Vj)0=k-;<$oM-6!dyqhqhFLQ{o287l<FRQUfYC8Ie%7s5{WqB@59CMBRZl
zFX=1w!#K2vAf6PmhQ3xku(DE$YzcOEM_m*?UdBH&2$T#FOdlr#7hWh(%drwcOPsap
z{^eMN8}12Hn*{;Tlh9uj?vclh!|(`vI<BvbS=BQ3!=m(|;mxIdE;Z76a_2!j6Y@`n
z=OYv;mXMDAMCWsx_aLIoynCE(WT~n6m))ZVKo1ASlla%w`{!uPJ`6)U+Bwrk?~>Q{
zB)n7px}ck}mwAk;w1AYK;JzJ2OxCfsDP)Gj&ZUjRoZ+8>+{~;(n_dO2pKkT#AY=3A
zkzE_#dAN>&Wm;3>YGM+7yPz18n2Y?{;>eJdW2@+)i|(p%AP1~t{ed1s6MBuAQPk<;
zD2Sm14Q+uOM>CX3SO?ETRm4`g7~vT&Aj-WrA5yKSfe`PZSS9v59ov#(U}lbe{`_o0
za`I!?4Bfg-z^8!LXSI{pc4iIkHz+MrhR6J2ed_*^6QA;n>ZnL9Y)+6~U$vXFuitk2
z;I3U^t0~UsKHjij)MM$Ic^?c|sb5OOCnZJuZmqQx)$b(cbs+36@TiQ9fpSK9Qdh)3
z5L9hiTVuHI)bHr-oa^61w!0yB!wi$liq<NQ0kw|PvHGb8L<wGL;wN&T(C;~{uHHe>
zz{@**vgR%-8muYlaOd0}EPzO@p$R2~dV^7c*?jAbUDzzErBON_l>?@dn%I92bsj!v
zW6TqdqJ%GVDOcmf-b^T*<hzF^=HWw1QLmL;Wfc_#e}Df82Dz3Yc;xo%Eo*pz*5~<S
zFSGK)$7?w94hAm3#|Jx32BJpcKRm-USbn_@4-b#LoSdA`#Bf4F0w|GHa<FST6RK!I
zR@z|oi7n8Y-TJXigAo|cNEAJV6=HkpqOELEX#g%y^IjC)0M(4;{P@JZ?}K730q-{V
zp)uI}#*GV7JKs%#CR29ma|X&5?&SUXt6yM$?(B04yh;))v?*6@<MW-SyZi7uD7*U-
zR-^?uBjuLqo$)D7UIjEB4CBv<iyJJo=Hg(EP%X_J`VM1aTS0p<n2n2ly$^rv#iH60
z52q&>S#9b)1|-&#@XOFuVQ+Bzk)KXFe0W75>bqHb7yyQd1jEzHuNaW2{R})iALpwO
zj!NKaTOjc&2i^zM;NPx_r+rW&6n6=HUN~tk>pdPgAAWE@3vAA7Z;_>>rpAJvs)#ZZ
z3rodc&g(I_iECA%L3=lWaP<hB#O|FJ_VHU}nQ<@mpWnC#6O9wc+O7%$R}ak`b79}W
zfkD~RiFZ+p@SZB8HTJeQS>cC37wQh&1g+T!(30-G*;nP!@3rjwb%3kKeyqFr8_cF+
z>mvs*NE56KtV1&S+MNXUEmlh&mL3YEBAGcWA}!6&;=A(DX846VP-O;F+4Cmh76Ahq
zZomxeHS=mJla!Ax7Z=yZuFo(j$*r*J8|}L_J~thQ$Y8ZZpGYbnt<m}A)NVuuWAk3#
z3uduDGJ{Bj2=UyooM~=tt+;ab2p*B3Dk`$}ljAxt%EEEcNgUYs7Tz<sP*fNoM;64d
zd3fl)dGp51$w`BdkZ^(I=nJqrl3z&ktjRt_d4-k7!Xkf}jb!WEU1SkUQMIS@91PE6
zpX&_b;%Fcbo>$y=0A#g*<f_@~99A#1e56>45aYkS8fRGJq0a0vzQ2bhNH*nI`rJF`
zU(@v8ZWpMeAZ?6>sdI4RNkG8r5v+ik&O!34p=U3YU3humXRMYd{(dYsJMq{8U>uqX
zVK$HdL<ElvUIv*5^`!=xYY>whXTgiLaTnEmR6BABWdHKv<X$}+yBBrP9)b7n5zNAR
zOKK`JoUc=U_hN6sI^13BYiIGWgx!xW0b3%y#AS3PaHSg8`xzlpw}S4kpOOZir#%DY
z>GpM}fJ(@=@Z}VHx8Z()3&T%W2QAZLAhR2lFyG77_Tllu#MO4cSD20hlqAqo*n2g=
zV+bO;Mtnw993b~i!&KG?FW~hd4!NcRqw4Z=7h)!gblt7x8i?Xq1lyN^<TN2AMMyOP
zM;8LGi3Hz=RLcO>b3PwvY!|HYwB=Bt?A_>q(Z{kyY&p&XHyc`U*L%{)SaP0;Er)$F
z9!AX~fuG3fEkMoa@@-Mv+pZ=8M|BzwOKoJ5#-KrrmjWJE!=0aDx)QZn+625BB$=Ni
zz6d7LmO^&kWJ8g|#6UbnM9r=?{~cg_Z<6%)z)Unt$nd#r%p^M}C#$GP70#=fkIyg?
zYR_H3K9}{8h+-1H|B(ov9nJm`m@>+9Ywu#hL_BVz?C`|s4gG(SzXEW+hd>jiWqe$#
zKy7JX;4ne2^zk_rt_UV^E;wqMnz*ADFpXC?b!Ky78Yni*fH^jmq`ce{A5*ZpZt=N}
zxp{P74+Z`q>Tb;+aePKzL-a)2_Us=B@SP~-Bcl2yln?Z~G-HYq)}8cD#lSPX#7c*m
zi+c#E6;<w47Z2P>uA2{L*w@zu1Fhd}#~I9vMmc9bIgWph1Q1`qi8wVqZO^Ux%!`4T
zpLLuEojwQim(%VvszFa%T35gKDU4YHapn(vK`rdWEvV@n(N&(y7NUUs^5aJTOfV&2
z)$sWj^!@w9l^$O+7KSS|SAvSAlxg`4L!=uWTI<PcdtRM)V6C7?)2^rmCq7VMZ~_pv
z#x8d-99LqKw6o-fIOi3l-zt&4yTWz1SWl{LMoN;@z`oO5{NCFs*o(F}^r?8o<mfcd
z_5hjfl`r;lnzy92EPcl;T0bUd?Ou?$Qmk9rC^g;q{BxU>Wkgo758c_JMn)uV@7r$x
zq0-<KW$nf8ckkkruUu(`rFq|%cyT;!MB4Xn6LK94RF&GeXwU6857;Az+<O^Btlc3f
zmk(dQe-DGLTaOiUVZXwrCmZ}?+aj9S?Bos!rPuGtlEhr;M#zRv9g;7Y$5xdnOO;kO
z?`Vn-<-M=d&X*oa@4yaQ)0sQ2T~FiwCem_4y{+WE-vHT`VcF-xRsEyW6zf1Wc**4Q
z$NJr#8M0+62tY`0?)`;vi2Zc{y4eLeH{0j6d<+}!Zxy{xqs>5Jvl0#4U?835xC;vR
zoC97-MtbG;108lo`{vozNYT+&H*OL@E7LAFh@yqRc~hcIYo5fbY@nn+$wsrR>QRyT
z%@V&T`PkaNiSGBxYAd_aWL9I={zF%)SVnI8pZQ|xS~w82;x*5k<X5!Nbf16Qa&yGO
zc<p@fi>`!;AmAE2^J`kGj|X0_>^=Sac{uozm(g=X@{ahxnM8nTnG<~1vs<iue*Yak
zE~Q#Fznkmn9q)s9qGuVBJO}S&xIEMA;>a&iNt@X$Id&^WYQcH#v3YCn6c6*IHIrIj
zwr%6VA$Xz1KfE*SZnk-Kl`J@0=-~m6-_Lg`ji1qWX2fe_GH%<deYfle3?;u>KFZ5G
zRrFpv=nYR#c!<>aS)r2+W#aUAGmqhk63wI)Whupl8q2N%Btx`co@Q?ha+j?^NUL$?
zgg#E=vzr;O=NikuT;^AcOy8OyjIc-qDI54-a7MrP!3cbZ2-z&$OBNVb%DeYsH2wwJ
zB_TD^5$yEZ5vb{5mw2zHhn2pD%srZ!Zt&wW?kwTBCwVdf{C102{|#qA5=G7NUO?-s
z2CqXR0%1tk#2WW+mVjmsJ64x&j=S&IhrT1+Pw&?~U6?$OmAv$HHoE^r9&lu2K?A9|
zPFzVQdQ(h8%Bg01uPS(cWfip+70SgHRSh?`_(ThrYEmFfHLOgR1aMx_2~K)TN@*Sq
zRqpwip|KcPqaPA3efk5w*W?$|avW6zx(YesMJD`l{3eBU6h*jXgjMYVlCoZ@b^Dyy
z-q8Vk<w&tMNXz4(uuNwGO?jtppIYz+v}~}+-@eSWfod?$rl4Y4LV_hooR$V`oTJ)Y
zZMwaPPmcpFcT>R?z1Kc`xK78w5Z%(EKzHoeGk)FiM-S9^DTJS3M5AQqc;D+I>1^@p
z$srt^=Nb@9S#!VMBbYKj((QjhHBE{aSiqnJj4ie`E*zRLMPt}4BAF#md&P$DoMD{%
zg-E+8^-pnl&jw&mW@a(->z+voUV^l`>=eW3-5AK`fM4oop7?Jvx<IN!j38Wm^{FnK
z_u9f&i+Q)t?>P|`b5P%HhG<fAQ-_ZJ0tDBvUxVw<L;`8r%0S*lIN~da!+xR7@JL8V
zdZA_ot;=%)0uvo?A)$Ct2xbTTQh#r{Ufa*1-ac5%yLX9wf!zcUdW(#zxx6|+HPd)@
zZthUZU_N5UZm@3gH+Bq6$e=|#I5>!|xc6o+fKyaK9&SBTC{+P4xaY7v#I*lZL;zHv
zm;=m^roc114HP7#Ra;g#Lc4%F4SAY5W>D86TfpLO3l6!b>NB7!jfKb%19b+%fDX}m
zh<3#W5j-VB&f@QKp})QZvY%xwlbEC=FE9`R4<g6J#KanbqyZxC`@4t@`JJ*(j~e21
z1CyaTawg0Y!~Jj#$f9n~VlZ$d3R-*p_?=iQOwvkT@4&LfW*=?o-m*m{SbN8+g)Q#I
z@4&Lf_deS2(z~cK$}+0h4=BFtImHSu;S+>?+Je-$;?WYQP=frb3_E$WaO+Opgb<O}
zQY0iKcz`bQoT%t2K)9_DkQhEfm7#{CFjFY9yb7yZgHT$vz0I`2kw63#z>_SXZo{SX
zk5w9)k~B7~7fjKuDtSI528qLbcv!AGC1UspuQ~ByX56VOMHGDvT!k5aQrXHPo}1J*
zB>3KoQ!%DF|5SE>ktR!eUxf{1b094rwHbj`DMm>zt?caVC-wDXNp3OeY+#<X^1p_F
z-yc`_&D|q%6Sx<z$o1^mGgf4^q_2-%KtN#BhKXG(Z2l0Wcz_B<(!6n2Xl@6{?J<6#
zK`s5)ETenq!rnC~h>RKrP_9tp@fp{gMEFsF$~mZWL3@i>i>Vrrwn$q=1RlHCxcc5x
zw}Pkc=I-9Muwe72s|dO1qz$RU=@S2^)72n@+9j3vS0Tq}-CA@KD(1=IVM8CIfz|ne
zHfWys%5=pdIV;8Aei<UIJBw0#K5LwH+>{KM*T&NO`uae!mNq!=|JXvI=hB%f$qhs?
zpfQkrjmMy;yBHs{OR#U>?ykgq_lsYp=oV5)lMS^!cgdgn5|`aRML=`3<MPu-8X>J0
zlntoLPw84y-Yh@Ga;m?$Mtk`nPtheJBD`LXqdx3U^{Ib(8W0tJ(JXN32I|J+I985g
zR+5YAqZ1?=NgHWnwp|W@jT;nqp6W~$-;zvIJ-0q7(dF5>-kwsu?%vgQl{4vZ-c7tT
z!1{GSv+x$*i*Y>X)>AoxpAX6sq0o+an;Y-F<_kJjX8Sro9k%zy0Up+U`-xEd@CZ1U
zYQOk)Er8O41&DQ@hj!sbnOidNC<6B3v+fIgLyT?|?BwAHgBPC&UP=IxPys<fY7HyW
z*Tpoqtz~O4=bYJe*govTe}8$OOqp%&DIz@Nvv};|_}K0Bu`!={gRw(AVy^;`>+L7{
zB)5-=OMD2J(UTyGlz{lI0I=vT1c^?$_%}j%nFQ=w)4CdUg_K&78%}KAc)u{>!B9p^
za{T%s+zsF38fU&7I$E9C9gbSU-Q^iJ{O;?;ra*6V2BM0Enb|MUUo#hOKYKz_%lWPW
z$ADq;C2QUrpFy=7lkzsB{~+!Li>Xj*9g3koKfh>;66-~oGEMzE%$QW(`{+4h3TGk@
z3Y6MvK=3KtjZ-J?yKSAG6gkee_uy>V0?>&p7Wmq#<wTg&eP@FMfc;aO+#~+NKDa(A
zotQQEXw?Q0Gwzp(J!pWAgfei&0HfnssP$g<lgS9xXT$Z)!v6M`1su`RdyMJskl*``
z-E8<5XtZPvA7f+_hgox>hjANo0@cnoF0)Stdw@9`_bt?{@UEzh0Jn1oD3q$tR8dIa
z?i_`lK|UPTDfH<(JXM2(GT0-mQxr?A0qD8?<eEgvPI!n&Z#!r{*)Bs4oCF=@&Kh8r
z7xrGP%t3^cP0I%@k`%54&(Q|LL!i{Ze~gWSdH;PWfQnUALPxP9T2<=i&b0qd(~rTl
z?Sw(Vs<*>dpl$hk>-?qLT*rwxh-}`Mwm?gwM>$Pu1js2*bY5xayKU{Z2%Wa`fW{U8
z9{xLnwiP4A41}>qWUwVN@XGh!Alnaq7{k1D=3WB_=MpHPBp?XFsaOL$1<s}NirNl@
zCE5a7;Hr{cGDLST?2{3C9Lq{ZL>=4!KunbqhYuH)q4%6k=-VxGFoECiYnTO4*5pl#
z5(TOe-OX)e&9&Hd13Hwqz~z;=u{@mwIx`DFfs+^2R&Amv_|3XbUndKE1!lmjtVubu
zOX&d}$_^Mb*wq0BGYJ8Hn)A((X80r0wkjqjCjGL%q-b?ft{(4UVMf%3+a%?ZJIaUb
z7`9~)79+rPlj7+ZxyB$HOSh5>BzT_BA!2%yGHa|9kjP;J=<leVqa0_BC)VUk;8;P_
zbEZH|m~Nmxo3f`(Zd>KTZB#D)1?+VoA)>C0>JB5(u!RRyHmHmdNH=bd#!%)iJmCyL
zi``7Bi=x>D%$zyE?aUYJcXz73?tWm|N#B6My@em3Po-PkVfic(QuViGJqq^=fh?I>
zc+9NB;Wdp;d?T4SkMk;^yw(!|PtE4+ckTju1Ww_QO7b@!A_wx7^l5wVl^(eyNa4)g
zRsbX1iy-n6mZK^0K63?-_1S$a;3bzSE0Z9Z4=1U7rXn}agaY?)fZmZxTV~nST4<3+
zK~lY9%+GrnLE22Y`>(r~#K(hb!COWE86|wXQm&lHuPwU~eY2=`Ez~PrYN_GC6kvC+
zd@HR4uyDK7)`F&M@&O8>KsYI@)!z{9<GWEI`|*T`9mt>F=UP9aW9*r=F@8{5S3AN_
z+-=aD)_J)@SVv_)I<7h~^N}$|`<(~{(SDcs1=8mpUvCIVNHDr~#{)B(TO*@$RsHqN
zt*s6a{&wa$WjSy)iq(_i`|z}sPO*ws)%a}^iJEKRjRa*I$V@^DrN!wy1*YDi&;|IQ
zEX(9;;@XwF-W8fL`C`Fg_Ysqqj&QxtsjfEudc$(l32nTqkNNb}OK+$9s__`Xuy#^;
zlUEe52dBS<%wM84(&}s^#J|9P&D;Irn4b8qu2niEIX(s+`O^!%u?O6G@*XXEeytZ@
zz&pb!uAgnV!|%V*)(h{cE>xn`p0@Dx^z{RH>6ED8meJ#gu$qf+<bwxh!mXmyfTixE
z^arsBOs&Cpj?o9-yBm+<`Qa~osTqXGkcnkt@%%(7ZK*5EYKfOfvdV3;5xM~<dOGw`
zfBp(!mP`f5dI1S*w>;p#JzxRa2$K@@RGFhW{Ix5OJOSN|;j)w7S}5tL-yW)ee$E%v
z4c`DJ)9Q-kYJcdRHS$)g!uQ`_g5qw)l<LkW)ze<HR$nnZ%N?(`pOFT-Q99I$ht{EG
zt3A{^f1w&@e&@HdDZT+%NkI?;JxyDfnggB6ke$VoETs$}^8xUxaF%n(@#{dWM~Bh$
zAYM9!8u=XbU@mtBLY0_)m&S|{5<CkBhwdla+v4i`?MG_{fnK%~VCrOV>OLjpo~dcO
zq_%o7<PYw5_AW*Da4rC2O)m|?8j}OZr&)B+_Y<`Fy^WDdE!Vv_*)RI5xw*Pl@@S{Q
z0Hfx)q*jJMGGgn=VEra)`sVPmrT?1c<uHArM3>K;(MMaH6sDo?-a+oSbRhGbx0hFY
z-PYnQW4QBbp2;q+Iz;*XqTdP{bep;o5M@3}?ji1xnkk{8Q(L)ZY?NYp^)QzkLGZSD
zTvXXYJguBVrcF<EpTGM2jW`fZ#F|TpWyFsTL{)zY)DKZGmjZ}xPp$m=1@T1oL=Cxh
z>5+r}U)KHjWj_^JC{$a{+WcV7_1-|7s60>i@N)z)-jOWotntM_!M`&Sf2N7WU&5;;
z=jJUf+S-*qX<qutZ?*J#AbeLbY7HZR`p_f7qaLO!AIm?E?|xo@RD))@hF`Dvmt==m
z#5eUs3HK9GhoC}kTL&&-$+0^j(Go=6*kL3-EpaqN`Kt!RSEMvRTOdEEUoZU`V{i%x
zl>D&wTTc46afeJS_hXHbOsP+&?YrQL-nhDztVh{o@?^^;{*+qpZtVi%fv3fy#?1T0
z;{~5v+H0LTzSns5_LrEkG$>x3DQL~ng?3@a4K2%Rl-8UJ;pqubVBy|=ZSvz%47pCM
zj}hQCIJXJ=mCEHD?vcM<ZRIZN7gS#;JnUPyobu&qLfjFGcq_ca8)-8~(LXK|mrlTg
zdYFCdIiAJqFP=XtMr`L(mk_|arjU#Ovt_%S%~F0+c9yIAc$vUS=XnaY@4%Z4`!*QI
z#yR9HxXSDciWM<9FyyDc*9ku!^NW93f8n^>R{ulkUL?rnmKLfwz1uyNldK^ueS&RY
zpdwx(MhU2%RdOZ79lhDeHqu1s3wQ@h-;R7*4nm7<yicYNmWBA1T+aH)2H&o7GBz}r
zVY_GYqD=KZ4XkL2Ke%J|>GGVLSkDfHIOTF-MeVvzjk-=IA1!LlXj|w9axM-FvI*(M
zem<+&n;h9P(Xo(U_)&wKlVJQSGnws|{?Hd@sz`L9(2}*K`-r*3`o$Z~6!`G8MufZn
z#w*%7HSSL{-${js)g0~RH={kc*#tc$DvRO*sf<e6!&6IDhNNL{<kp)4Pxa{Y7`a-G
zhr)J^^lYnEj%0XPxM+TY%O)khcGuC%YODAmqWBVEyIu{AE*FS=0LPW@q1z)H^Qf*3
z!cd{eG_y4#wRy5jTgOP-(U{yq-{Dm1!kMj5iB7p#RmqvRigU!#ujOJT+`eDq`XCg>
zKbgl#IFv_$xv{geUT=~g+T*zTJv`34{cKZzN0$PTGROWCxm9Bi??1R#%ocSIHM)*k
zJs-XsBwV}=doCFkG@a7BKeD&EOx3>heL9=At$*X$QR4?&*s;T3Dfw^4AmH$;*E8$I
z<sq?i!%IJe8N7w+ybKxMc}1RwEHXm-?-~)6K7`bW;ku@l645(?h||nhIWkV)4DtT)
z@<g34gw%_kf_J6;hWX;DnId;_aDz{d8(3$pTHeXbw`yMEMOMSwugjTV3H}(_XxmKi
z5s6N@Yfvq(WnmI(B&j}6>hGdS{L7tk5>M^NY|;~1PjYKP`t+-^2zekeJH-3zGpAlC
z;CYT6fQja2C?NQYdTZqy2Zd~}6nGCZxeu$CTusEVwh<lHNtKF`?zpTu+Mg)KYWJm_
zrrN6^8@7K|&Iu(v(j@Qq+L&43I4SPGsHu4*ik0H@ew8oj>Z(vNzq}iQPOLp8s1pDF
z%gci-?RM1HuE6WOEc55r`HtY>X=Yu)6(eV_Y~skpz^Nl?-!INWEXaM<SE(<lXkWK6
z)k|lYtX>5=+L_W?3%41yU9l|bo%LIc_98XO5)ef=t3$5F6fA#`67o;EarKez)6G2h
zc0e{TF!lL|a*JB#l?+TH%O1VR%88Sotvsrj<i1u+(ZNYG#n5s@$myKY>$-~*9vklq
z*8_~Mx0B&>1?;;$hmXpU=SLoskhN^B7A+)EzN3KY&MHmK;npd9x>-B${Ocp5evFw;
zT~TrPnE1*!RsId?Fe9^bRb8_?Ii9Mk8LG2Lo~%&UJEh`YCj~gjSV0@<)P7T~Iz#65
zgtc${r`{u?@e$RS_g5J1-=JN>v^MRB)Rgri6QPXIR)uVR*CuISiQ2;>w=tIU_dM9V
zuM#^GQ3rwbTu%%|?M9-U0*Oe^B3oM~s}7%-AFbqFpVkZ7%8_|{=ZO)Mh<?ol@-VM!
zy>(LwN>VmP0QbJh2Q6U$YzR$YZ_`rUidG<xY?&o~3^_$--nXpa*Ir?^R#F3Is!X3x
zXcnI+yg2H06EGRt&q2haGL?9m^-MY(&lA+r_k?B8BkQPIeAo~1KUQnl|0CTiV1O~(
zE1kB!%i6o<)OADGGS(ZtZ$HDn6pO0SCw2aX72#XwvUroTyq#;A-4h9$W2U8|YgNm-
zpNLOOeF2AP_LXKUZL>e;w9Oh!v_h8`P^^M@>2qgbanb%}4Z-*YQ&GyVthrb4SNF9Y
zUcEp@LBl44o`m}`h@cJ@JtEKd9c`R`vsPvJK$O&o+V=P7Ue4ZgGV1HumuYjHoXaAD
zQIhr_pIaHYzJT?UCV>L6T}Xv0S_hz#xEjOniITK^bXMNVRpF!Ux0)attkPT$LxsAk
zE*1U-t^hQ>(>%rhqVBE3vRt=raYYmmNkx<fK}tetkP-pu20_B0LnH+x1O=oKq+7|C
zQt3t!kj|Gzlx_qBMC!~3*4k(7wbwrD?DJjU@0{PCTwcEQ%zNH*jycAdcgQ>)M@~?~
zvZz_31efOx9zBi2n%3P{WO|n9-R9hFVE3-kRgM{%3Z=QJudr@0+%{2%yovd#dcRYc
zQ*59LokdmQ-49-;<3ikjyxDkfhmUbq@ffA?$GBS7W0)Eot_crXy+;JBWEA|Rq5gHU
z;vKxCNS>GIIx8c>l!lYXH)^j&BD;Zs7G`Gicpq5k)@JmIA;ER6iG)BeHbfCuj3<`+
z3d_Z8A2VZ_*ghI<-=B4FU1%6OG8EhhIaryO6QSx}?Xi@g;fK~QfyYAg(K{kS%x>&S
zXM$dCE}b8o+rvpl230o-(kCf$<{gGxJDqUeHVxQYw}jc}2M(Lxl)CN)D-v`0lg{Z*
zI2R@Ky>S4>VoZ=5Hl`#r>QC&T)Oc-=TxC>lKe`sB1ZJR3=bRKZ=}f7Zxq)Bh{USdc
z@}Q&bf}`<hm;`!);O3CXQels5lcZ3XGyM(oFKDN*=*+~Nrb_Lt#wP>*Q4tif`OeC5
zGBBje@z?UuHIiwfLd6!aCw5PJ$&o*tv+TXZ)Ff-&E<=bNBbISJ@Of+WfQ}qZ{*$lJ
zRci8($glBLeW`vva7D&0_VS)#l{Lej$7+B<&RPU7k7BptP7`T9h}m@|i`P3uYd)5^
zoa&crlBC#pEVavEBz&h`{O)V)<T^t}kI9$PQZ+bWFFP1y+b%<Zjz3OXFHCByqT|mm
zgrsuDV)L0kW-BLy-4F5voH~120sQqLj3r>V#6r_#z4nd8EPN3s_PT-fX-}tn#J1tZ
z+8L)`>~uNW>fhepTYg*<*Q!~RpO{XpJYi#sZG1gfrwBD(xrUOq%uPKhJib3!lX&b-
z3qd1ory?|POQ{Ol)tGDLSo7Yu*AfX2g!?z0w!}t5-ltY9muHXb(~h;B4k=%`Jkd&*
znO>iq<uoqCVKPz8gYibc`(yCxt~8~Cdevh5GciI;djj?R7NM0&t{1S~j_#(oh6Q`(
zURaF*t(|;8kYrYOCikN=;zh@A8+wZcz>_P!z_e`3kW8{LL2#vM31AuNDT{l7lVv~m
zK}&R-$mD%VdQ0}UcF_f2a{o-`<J{Z^`{4vp^UUE4X?QBV7+P(qfHk~BC1O$9E9-qz
zlMpOY_Q`&;(|bdE)qbUFOLHoVheJ<CX|SL}L})qK3L8@fYM9T)w3%ec*G;cdMs`uj
zC2hBwysl5MUs3C>QttU+T5bJ!<xx^OfuT&(j@2c#lk|HT3LeQ(Pd2)d;&FM5I6VA@
z<+b*bswFF)(e$bj3hVaO*mnNo$Kmw>$Kko9tbpmAJcL<TisNM-e`=V}lmoi!&9!uH
zQ4E6X-^I`p@lT?AIB%8Qy*V^r$DZ_DIrscx7}dG(@l2Q3rbZKmslIt&%b3m*oD*P1
z@5{t{O+)NjBKJLz^^Cu`B)fVt-7w=zngNK!)O_u!!0JGz|7F#KX%U`?8`tKJ3kgss
zQhkae@eAJ{^c72lY_^^nu%dbP^8{7&^!fwpo!jU_{yg?yR-$`9dRUh|%YFOEKYX>^
za2~klt1$qJR99DfVoG>S$}0F6L^o`C+>@euzzH%+J>O7&31Piwv+!A^2I!|>4%e5u
z>_!FZ@FvSeiDMXegaZJ{$N|64^u+mX6ODX3y@iqyTe*F2h4;h&sE||q6ZF$leD>Cz
zTTQ~Bb)hz%avIqhN(k^UUkz}QUC_&_X9|;{gG}G(@rwR@!(Q1E7PaeJbqf_1;in?I
z-|M<W_dI7Ue+WQZwI&cHomAxVth|)ouwjqBWaBC8z0#T*QI<>0WEH!B?lEuGI#h4O
zG$@O}0&T7KbiZwrjzm2Y>09e^5zmZHU@*fkq?Y`M={AP*$cb={ZF}nr<mB1+5xfVj
z`&O3Yl!NG_Hv(Y|<w<2bch;y}8ZMf=>L|Nj&H+{1x(px5&f^gqX>?WNX#bC_sGND{
z?0OyZDpz$8zdAFSt@2V)s}?0!8z~&tCIKslBz2|x#F#Xl6vwOLyEf7jCukX5GN=2=
z0K>BcaO?<jQsj;iaVz&Fm8XR{rCHtl59uhX>9D#D`zza=se6Ekq|n_Ku1%W?SI#Wc
z<l)d+JI={gBb4rYJ?e*YG%tPE6y?e~EMauWLy061awmqk%xpZ$6fm)1@Ox*rNqo;j
zIQcT^X697f1Oi`F1RJ+T6W!qd1iLaZ*GjQK7pb||E_be%E@=K`(U|*=M$q`J3Xa+6
zMef`{x%sKV&wE^Rk3q}Qu7S=tZzn%(ZEs-rwr^fL+gUICepdDyeNFSzPYc)lMWXJJ
znBt2iw;|{spXGpZw>s5C)uI_!UN93rKx`NP@qG^=HY*{|0FG+@jpLV>QdrBxo<AX!
zWp_^z9r-~Ry?M8{^<B84U{)&wn~#+1{JlKQUr?W{kgOw!1;Y)-7S;S=sd2PKdYmKe
zT!0()^INqzC}Ou;gxCpUR9j+g%tN17cAcDkeSI$lx-<#se_3t!n3bSL4bm6s{|uT@
zn{lvK*r}k^9)Ou38LAF8&xElL_#VR~qbEcq58dSfMh2YUQ+LyhB$XB3e7uTX=nE<O
z3BQ`G1vb6bDEe8K!EU`ls$hSmWfK-_8(VB-Ue)Fhs#0b+XJ_>bvq+ViMxqaEpDar5
z`ei(e-W$prpYA27<@Zlws24SNu5t&j+Kz6*%7|F~tTO#=$fJKav<p;D;A5gbqBOGN
z!j%Nrtx8dn^YlgQB%KQ=P1#K<v}}LV&i!ldLAo!Q^sEh~DzCTI#l~N{KG(BkAxlGy
z=`Zzr={KABWZBQ%WaEtT<n!eDK^I#6&xslzDKNYNO4f-^r&E@rSxQ*})xE6In+W&O
zXTdx6!Y18k=$~Jv?u<{^>E(Rv7x_?hpLzAi?s`Z5qrHuM58?H?+apmfEoOP>j3oE+
zj|Q%c0wdq$41THF>k#-O)kUG;Iyi}3Xd}p=O#lK&UfJhcBi2KkMp1f^Iwtiw{Drqj
zaWEw5;S+q)jH|pl<3ONp-L11#{IhB<`5C_d^i(jl-LOkpOl4ytz&u^oU$86nDd8k#
z#M{cYshm?1-cUyi&w<)s<ww2K?rNhPu#w!jXSP3j<H}JV{A`p%lxxgZ;#pV5=!49+
zzx#^nY9wX{OpAAvGZ{s$eLmsoNQ|xGE3VwOk2iZx+<{eNa6dCFmj13U8(GE2w`Bmf
z1y(up&Yk3fImX4`<`_W<66gVai?-6*Qy7#0!fEk2zQlR(WZ*&tlkcG8uvMF)(30Vo
zAL)ECk1HoAr`fEX<p>>$*7;&!`l<QpzLq(!$FB?z9h;m8A(nLjs(Cg)bnSv9Jg^jj
zG>pM&A@|BD142_)Pecr93<%`4O$Hxlz)NAa?JW)^tm7(^9gIt)w!|uahP~ss_{KtF
zOKh*NW`84}fNl~&G^VW&HmzIU4Rnz<u<x59Nch@2P3u`}FJQ2b&Fb&O_E$Cl5FWQx
zIU{TgD^ATERDM1a8;0Vf5YK`A<Y3gt^pcYP0V-63<C&(kKu3N6t5SC(@?Gl(XkRqs
zy*J(8FgQT>kWWKTdI=IJd5WYy_H*&tydReP^ZENIx(mZ~OfH|Z`9I@h(kMp(O1kt~
zy`h~!n~wv58#u8C%4CC<RPNwUBQTqzV(*#=IlL4vR3n4@%u#x#eF$W~2u+u9#Zqz0
z!{`T!TYP~DYC~5Hs;@VOtvLiG$8s1{!mT5KOj3y0^JqiQdg5|aa=H-BjSOo<2i?>}
z677ZKTb!ytzEB3CV@C*?cs)<x^VcHF&8KAJVZ1Pn;CC%MmN1DL1VF%eB2^bqy9UJ+
zBXIqZ2Sgn~W4k4Iq)SVEV1N|r4afex?4+mI1<XXKl1)EXC1j&rXN1O>vC?Q;7!&MO
zx!oG64_2wA>j-)1Ut%7|?q!6~P9iX&B|LpxDF5}l_0(9nA3wW(Jwcs`y<o6EWzf36
zvqBxz-PwCxUQ*4|mlC^|{_Xwx)5!Wd*v;B_p`z5Z%{3ldO?eb{!6QQTHn6@A(u>Z2
z+jf%x{gIc_6Mh72lq(jE@)-=oNa<Ywmf>klwOItUi&+c9!3d9m-qY}*+r0icjM%Xe
zb{C_?kE$^@G*{3y`wKPgucG9U`OSMm?03HAk);8lSmH(4x!gGS)wkApEcz}~m_O;2
zlflhIkZ~#D*y`0F>sO2x2X91wU0MgcLsUokW+Tss@|!$qlZMNyZhN$E?xo2hTJ=D^
zs-fMVjiT(srIjvnYd(CHw9Pa7;Rnca;MmW4!_5n87WX26^gmj;pto`@R=ql+DZ)Z)
zhZ{5icPEGPZdvWGQXh2fLayg?)L1sXdRl8*j-$#julkqy11;<z{IUz98Tgw&end;)
z3w*LHXya{fHKn>+lB{+;|5~Y=g8o4Yfg96%#HQxWx3c`oHQB<IwVOg$$(I_JMz%HG
z{fPopi^7<yX*yjn#-M0!15`hJa23e1Z@eY1k%tmtgujAcj^4@%rN@K;PxXK(=;LRy
zLKkzhL=6ZoWqFKp_cWp_x}}`arzsqIRi@v!um>c5OSnud|45SH(`jOQY<Z^R6)YL&
zx=<cSHbin7N)YiqiOk8=a=6~+D`f03(CkoHm1;gTq^~mN`fvggFe+wjyX3z49Ed1U
zT&w%8z<!cLQo%x`h1Nzc(+tdHvf{?-&1aOd-WG_WCWf@d1>e8JZ48X-k9ecR8mRsX
zJx|G6E4>Qit}1liD%NrA!WXp&1mC&RfVg3tj|e2kw&y(#2*176pn$qi#p_;tjFXq&
zI<1cTEPJ{%(24_%lm2eT){DHy!+y+}rzxB#U#jR9pS`1O<8Xu1Y9$3ghW%b}AkxXL
zzbU+<zg6BZIlQ}7g9*83EE-G;e5L9nbl-oBafZJa9fg+H0k_EAuBh1&DgaTW?PzbD
zOx2B=ja+NVfT-O?6VZV7EYexnCp*Yg<C1`aKuqP6eE2Y!diN{V^fT(+yPpKiDqt=Y
zossB*x?eO$s_#S_TPe_@oB%T*$#4<IJ>GJUA+wAaFf&QEOcyw%u?0%zG*@}@`~7G%
z2_>*o6SgxT@AkR~T-q?N8uw>?ZP7v5^Ql*+W;uT6+ybD>!b8HF0}p~OAMXfTTQ)QH
zcs`dBn??(F1kUA0^H~{EZwMD#UdO&%1Z21C?X945w>Dk#%|78^%iqVy^PLBq@Yf3#
z#BY-439%!eN<|>f0YTHsXkC<<j7N*bhE-7egHjJN+F<eI3z*zPn*9Y*-hyqc&p$(1
z5>)5~&ORfsnuqOqs34<{WA;arcpIs0wgk$Ck>&Ac92~hN$qAMMKTGcrC0C(duQY&H
z%qq*{`QxbdO>narL5wcew#?S2+el^#CtMR@JG{nBZ$%e=3df9%vsRw2c7zsI&H&j|
zk;gg786#`PXbx@9l8?TWwn^qm8_B!n?|+z`{PKQoVBof7s|&6sYg-H#qyGZr=9~xy
z!R2bZd7_0dlD92aUj*zCIcB`>fHRe^Ya-OIRxhwrnpmG_Vxi%4o|2HYUYY=X{&+p!
z(a_rG=|V~@#XHsL(}YaF4TGOyKOd4PdPaz)MbaiZ*f;r!+>fUCvEH2)_8a$BAeFY<
zh&cOm8SLulo-b3r$<7Z+M$yiJ>4!=aySZxEK>G6L+7mF^(X4+wY*iAWJ&ZoyGQ0wg
z5U92Jr)LEclF2&g57_t5D)Q61qrNTueCtxNW)iUnemYZ`SNuL~sA`I%71(Hq)MchR
zV27`6Qi$8)1C9Dr9F${PeHj0c{w%|A0MGjpR(8$7;x$P3tY+i5heLG#n}DF)%9YGH
z9CmlgVmKze>;{bKPWe14TW7AXg%k$Lt^_c(>2+Vtx>#_>H@slU3#DhsG>@Jc4&b18
zo*Qy=-2@etdUkg4zC<nuy;w+IHNa$w8ur>QnX;b)aNWSDg~N<W(=WT@`nr6ClF(vJ
z>+Lb=;gi*s)}%S8W&<T<4P!>16&UGi6bDg-W-d6z!!^WEktsLVU$gHpDdjce(66WU
z9#;nK5T~K+Q02O0fAXd1s;!aSZ4Kw9b2B-&-M3o|B2Sw5wEzNgh54ZngG6^>+732~
zt=<o693T%cA(LPXp7SY3OaeKaQU{eqZfCZQg^4rbJVL@H4L7=GJgr+OP@34+bS~}!
zKea@#?cj25>VD!@W#dw$Xpw5!nmL1GTfVtuj_E^%wiL55v1s5JYv!gu^e!;fCmVJ(
z1{Ww!N1_x{$<6-i&7v;dIKjwB?t|3n;9gha*kKL32y-fw=)t;Zesi9H!qiqhsX<`7
z6@c>790zM0L3<B)b=VZvKf1v+`a)Ab|JuXxTEv2-jncfnZ0+<)(a7zqEvEWsve(Gp
z_3gev)ggCsM(hCY=m<<3RTjrtM4Au!N~(+E4H`B0+U8J(^<^(2G~=|$t0pK{I9I@s
zBZ$9{#VCeLY5jYG$Nu&d9Z%9ca$81jO^_F3nfFSqK6rR%{o-6jn&A^;GS<^l>mg?C
zN@+jiv%8M(j0FoVn|_j9r!lK<H+(Ow7NjT7AZY0=k}q%!m~NDA{ozJT)xb^>jXf}g
z+Dj%eZ=k=3PSo*p%{vUY`m)L3>YAE^of>OPj-D63?ExV}Lb*XakF9bSnm0Ll9nC`w
ziOLoiV#naC1W~S8=hVi_UF7#ZBDg^7)0Tp&F-QV$S3(K;%ZHEjsTezEVY+YMW@q^&
zq;h6A;+|0N7=t|=SHaTpG1qi>gOrXM5H#O;p}<*-<zZEU#pkve1Z-#){&FSD^p1pA
zPdBYEg=$3qV5w4W`EuCa$KW^vi5PS^TjC;y1Vfv^lxu@%S{UP#DN48qNt}5`hT6_v
zEU5x|#zZ}L)sm|PwnJ5F#MH!8h1JHad>o6IFs=o9XFPx-*k6@EyPv#gTW&}FqGo6w
zslLQack?Z(Y(8J}YxzDu`eo&!*xBqV7T>qLc9umy8A9JlLhf?!GNCBmNrQ``;1L$-
zcDY8BB>39$?2|;^#_W0K&57LZs3Kn!uJrnc^2Aye3F{#9G$#Ty{Z=6}x%u|0up)tm
zf!6?3PEd5J;B-n9WwJ?+9G>j2ax|Z)P}L5x@|y|W5=Cqh`wU_-nSKF02EV$aRBJfQ
z#<Q1fwBKikN7b`T*~q+B8_EAfiDN*9q7bE`rlL#|Qrn&y#_M++?_?BN#nHO?Bg^^f
z#J;YGoB4~9;i%d{|H<ZG<D@mWdlJY|x$h$kFk%brRzEH5C9Jq+j8~RkuGeT2r@PA8
zR2PvV@MyxwbO(^@+=~w`G3vIWe9M;`56bK|Fg0*u1y|MtZSK@<TM<myG7*0p89}jI
z56$pFdG<btx8bZh&sf&(3mMj)69m_l!)ApzIh9FDe1`gEgRoJ>(oXeT{30_sdES%C
z{-wz*T?cTR+s9N?I6{wD7Pf?=%UT&pA+2nMBNdP2gr{Q1arniHX6vR-zeo%{q#)h(
z0{G6@`YG_auDNJZ=SyfhhU^OvsmI-AvgyFlDw@{pz5bFNum_n^o2CtY;h$fhji$`5
zg94Cs*i`jSzYr(;uS{uF*z{}reTMRelS$IlZiQFQ%Ni=ABdw$*E49vtk@^f72kX*r
zK4H^s-xVNb+%BV-5uM|ze!m-IlBm6`!0hhG?=q+_$}PK$n_Y_DLF4O%1j~iKuRCbj
z-r3oCGx3W<BqRgVi1EwZ`lqu>SBB}m8{a=W#iv`5y79=K?xC=_16-MDNHIZ7$>5F(
zCVn}kHU3U+^;h||9+9O^7ZYN)zO(8Rbh*`9B0#-UU?{2!V~|6Q%<iN~mgrXgYDXi#
zr!S@B?Lm`WZ|~~<yek`b9hlv%Mxn>(x+mPpod0uPXnc`AXF39KIJy9X!vl$(05{7l
z+sz~TS12iD+(kno^@0L(Zj(DMndQlgBiKeS>-`d;G_m)fkPUpdE?!pGm60HN?Y)w}
zx#CYb{E(XM>A03D8B;Y|=P85NbNWw~G*&zO#D2+N#*tpG^a&gmxE-#o^WFpeZ(p^@
zOwfPp#0f;qzcgYTzx3Ez%Wjyf+p?Br`<m&DD4tOZ(O|&Q$UDQ<_ex&pSo6EL>rwPd
zd_^m}+t?_U+Z$1Db5hODK(br|P>hXQP04J7CHTOuhwU>vJ6#{KdVNo>IS2Vz#5|ZS
zalD!+u#1-3cyU0QG)<>q@8Dc;8nv3(UVb_L5xZzC@5j|}{w3AeWR#s_@HeIa{k!}X
zP<xHW6z!fBFPge#gMCB{r2V>YobGEbP8P`d8|Ce+7+|Xo7@U);hDGMla#s%)ex!yg
z`rkZ{BU}{rNdV_6asIf>y0)O_GzcizlKgQ!z&FQvyOekFqvFe;I}Umuqt>r}F#)$h
zA>cb)dV=TM!8Iro@y53I1TR|}PJNT>Loy7UywxvWMK0VZEL3#qwTUXf9+zZ<)nF89
z({=TtbflZ#Xx=EvvubJtXrSLkQQ(O_UP`k=iu*}Thj+N_5gjt03vo3ycGWo4OjwDW
z9zA+n$?@T~mw6>tS~|`P+&p3}2YJ>(A<_9Ilx;7UPL;sKdc16?<Lmp8<r!0cTVfh9
z^PV~7D9Rt*wK$`Kq@=7omIi9P{HGoYVkuaE4W3Jra{YB;quFtzJL8jGlvdJ1vJ(k*
zcBEsF$r%(TzAg4v@H~8DRX^9B-Ds|lBhJI|{^#M{;MoZW*5}KOPtXm(nGO$nn&`Gf
zw;--DoQu`lykYHy`igm~2g;)jPGJCsEi?z)Qpl)X@>+>);>Fg%e6iG$nslGL-|)Jb
z`LV%L{aE7mcmluIWMl>lNzXG3ZL97e&TfoKMfL`-)5qmEKRI|KqjlE)OA0x^nzgD`
zMe;?9iw457AM1ql0gE=Oqk<}A^~<OE(Rr80ET^{BI`p^g?-vE+#wgWKjt6+i@;h2x
z5Y1ac(2OkJCf8Lrx0Ugohr>2~=Z-Jm-nEOfDwYwZ>Ud-2_ua96)R<6x;M2?{g-D{Y
z{;jKDhWWGgT=n(q$+xtv?|7lAv24Uk^xYq`FA3?W+zX-nUoa(~rqA?ny%ps4el?pC
zYhW?Sk||r;i=jNPa)FvXKM^IE?x9G5j#64jmFqjqf1CE1%YJOGYp<3snP!scd|Bs8
zBM#rHMpI)$L-WTF7Xv}@PQ2mBAeVKs4|IO2zbL4v*v(3}4`;_PAfk>)`8KhZeLEpE
zbwGDlaK=^ol)^6Q41G<WNNh%L#2aS~J=Glc5KIM}oclb2f;#XUJLub#PnM$aiNAbO
z7UnG)c$pn_vm5Ljn6C+JPlmS`eU4hoEV=KRr|?eRe;}_?N(8-g?F&1Ll0*l?2<kB^
zpaZR4rB3ps(j-$zYej7-?uzsdjGhBOqPJ=lf(W<X=%b37!>ZaBb_P-PW!%<TLvlS_
zrBQ^dPM;>!l76YrO+^{x)#mWsn_w$Wb1B_^ZbU&HOU-e~YIsB0fkU`^EtRszTI^3@
z$1lz!F!N;A<}CGZJ-}I<=Pa41v)_>FIpu^s-GN_bk0wj6KMk9b<L!6I&ZT!%`}4nw
zy^`Op-y|E(?%wSY-BXj)7ve~J7r+ryv)fX`?a1D^Ccs*JQK)zDwoPVp4T)(GWrJnT
z1&#K6JRyar@2V)<fIj9C3nm5C<GMq^B(bgFT^vMmK8VsQAu`Ri6<>)DSbrW`U#tx;
zu`;3^>U{8w`VT9==G+L8&37ReS=VB6i-%a6ZJQXPqQN}BulZq4sHtVX`ub3RHE;7e
z!cS{xkJ09;8M=!`X-q3vyQsBYk2l3TUU4g;YfEP=qNH+)t*fRV2vxZ@`j0sNCk;Ai
zDOdmd5TVG?4?-h!T&VfeK-9N}>gPO;qn?XClh2RJIWd?<TbfY!pwUK_jEoP2DvCx#
z@#`*yW+(q*^jKqT*u0jbZ86-_jyOlXG>thYXw@NVw}I9o7up^DR4+Vu+k5oN?YFf^
z%Sz&fN>J(nPo}^G@{hZ1W4j7vBc(qxILa2Sl-k#}fBGyDnOb%zt|Pu^g03SrjTbAt
zbV2!6>?*hDHoBm%WpjNYSnoPBu}|ZKp>H<15O>vL-)1^@@l5W!DutHDLfUT{@>WXS
z{u4v*0w^VU9aXZ|1Xg!F4pt5>3Mpl5CyZ?;%$6<@S-K;Pu%TkRKr$i5Ol@!lc$wf<
zaBsRj!D8_h&MB4rz?VTdbU-Q*|H|z65odsiOlr%sFm>l;0*Zb~U7B(S{1YEqDm}GL
z_<>;qc-NJc65G0Vl84nm873FH@o7MXVH&(BD+YmC(e~uadZNsS@`x*|XfQfP<v#Gs
zxK9+l2*b^TdYZ%^i<{sdE8@-+WhHd2XIA@aVsF_!;1;e*z)1TsMmg5d%XDMdr7T6J
zhz*(a32SB#G9qXNXUaf5U9O6IcLnUus-MU<(cpJXNuSO2r2O@8e>P;FY2GXg>nbCQ
z=0!{wF+Lt4EOUC7<-#7tX`qQ(lxdmkd?rr*QG8RFia|&2hvRrw%K;!9G{o{n`M!;M
zj)V0H<7Y9%iNuugPy*QLc$kahPrBr)UEYW;V)!2-mcxV!I{}@-N^jWmD%YB484Rvr
z2<e3kKCD;@^NsuB0q{~_0-vypO5F5Kj;kAMj}eBlP}^1PNm-bFYE|+Rgw`6?V9ir)
zg&h10X;4hlLJQ6>zK4|=sEOR`$ulNZw+efHXp|}hr{?fC;bYp;-6bvJCF^qiaFVol
z@k*3h7tbhNX5URUhmVSP48bSv(p8O1X#ug9bCY#?PgzKh01v)_eUbzxPf2voUdTW7
z3^unUG|$YI7pBC-Y9eC&r3fxIC6b%fXEFNvoR}L!aJQ_+nlB%R>u0-1G$>%|ybzc$
zTsD3?V{yQ_CP*)fyGSsXYSVH?n-7^Tnrk;bBQ6Gq*vAY61q7bDaIsDMlg{7~zFj|%
zely7Xa3pg>prNQYfWrafC1FV;)sw(DgNvnDv8FE96~dz;7j;ObIvzPQ6lF_2a!2?F
zEH|oJ{I9%F*LZQln}+apGRq^ERW675-tr!{^B9lX#pyDq&s^AAVoVnG`9YW`>N74Q
ztnFY407MYAamA_Qcy(PA#f~4Yy(v-G4)<v>{qhCco_tjQBTedXHfq3LEmk=fEJ&LI
zsIV6{$Z|=`sF;>+*mddlZLEPSZ0|t*=>;Rd_Cd*u3XdOem2Ue-i>r<4I(_rbN|D=D
z??7v&vgVc2In2lFRPzv&>D_(TXBI3$LkjU+cDLT_8N|mMuw56x)2WKpt4RtV<=|a$
zY828Jj(%S++<B8@xv*VYr@K?z;YAZ`R?z;^*ghM&s*gO&fgSzy8KLQMPvw*7^XgWr
zr7!@|uqKiLtHXh^`x~-l^GTW!B@*(<uU^)|mWey{oIw`34;-()4P<!PcDj$qqJoKp
zlZNvi=Av$oet+c-+gn_`zT7&5C-I)6bUrd|Js}>PMXSRSDy~O(lBC^Hb=bkUimuVy
z%`lv~%`xa)uG%pMa0MGPH6!6?*;<jnn`8AzSDcNrP19+MAx&X(bKd%OMTyaNZ-LLU
z%?9ySt!mbnF>=RJ4`EmRnTO?b>E%|HE*s_?2Nxt@I-^+eR$@QyYLYsyBVJ0F9&U!A
z!Dmc0>n%QcFs|Ud%5xT5Vt+|5@kb4&zWcV(Iyfixn+6L;ac3+h?aw7it+$pxaP4DE
zmQ?L+Q5TjbTS`IQQ<@+B1(;>U2JSb@o3;D17dIA*yikCJN+)A@;?TYYDAV7YFb?lV
z0H(RS1>RXy6cSGn9vMa0_grm00xnG-+f$I}#u(po-Bw0%(@34WBu;6M`=eOEy)Up?
z#E8#4>fl%Y!8PiiwOFqo{2J}LUZm4{HE}lOY{myiA?#8r)R_`1EUV^B$*+(=0;V~0
z97Gmu_BPGu<5#<pLp*rn5?z?uvAt=&nx?7kZ{X}6bpBo-hk3l^8)$1hUk1uBEzT%D
zr9ur7Wp9<IV_v*?@hT|g2J!4#2bqP{iPY_O!@iPX5Ppt=mZaKsH8JSv&C+k*zRh~8
z+wzzDh%W=!L=ybtYYMYwx6jfXtxO$t!R0*_`xds6R|OuxALd5J9?msteDR^@^2b2r
zfV+<6@f~Yb0?#^ok2_cFdfMJ<7H|LSV3$sNnE)+4xJ@5MDPd14*6LwF1iwjtDv0^R
zt0nmDi)G2CZDAe~IoA|1KhF-4ot$^VzYuMQ_LN5T_;&U+dVKl5CwFasoydLU4%4s8
zvp-k*be^*;$DGU>=N|A8i^jZhPQ8vE?WB&43gW86z#zna4qu7}Py7PW@r_uS(=Hn>
z3h8Y<gP&#7h&Q<XX%ZiT<yn5&;-s9{3J|C+9m-D_6P-RHvjizzkwMy(uKV7rYh<I5
zq8lG0tlNaMu?{=zuK0Wi?21A2?A&W*l|Up6qI5TCx5zlo=_vSgt(y18PN@9GZVmi{
zhOng~@GML}iFb!=@)*QVzp|65@&*y;H^*)8-+;-PowWRM$Xs>R$Xti@iup!tuPJ-g
zb5()@C)DGPz?mLLeW=aicw@S{y4Kvx!5W$mun@Vzr>V=nZI{v7Z?f#Edxe$9R{?ao
zFnZ$gsDsuz1cN}qg2zu^kIdVc^KYp4GT%wKur88UVQF3%rX1{tUoDYurT*=>(K|d4
z5+K|#frl|Uiy;h6&z??K76^yRI@U`Na3E@;Hp093XyeR#;iSg_$+NB`Ij~#_j;$Z`
z%?_Im3K-_L<#7J$iN#_$)hLpTzLNh1r`~oUw<OZi`ZY_^>Q8);pI^h8t?2zcmx|Qw
zmQ`lPHf@;`fUeyotYP;--vO<`6Cl$*2o#D&NoS2nw_6-dQS0Ofy*5IUH&6!k7do;p
z=sQnE^i~R%8a9Rb?;;Sf#q<;=<{Hh{Y3c%D_4P*~O4!^tYlA~Ge=794_H1+myIK9E
zK~0UOX^x4iO<u&7*v<{^@|oEfc6aI*)(h@c;M5YfbngyPqmz(kGMitJ+g0tJl4g}b
z;6I6^m3`zfamsxaujFQG%G7LLKNRLp5H_PHY`;4lXPq}yJYG7jYwx(MW=A=s!bj4`
z0ng&JkRuIe49@I|nH~oya>eiQ10C{B>0)Mf^7#DoySHC87m&o98T^ClsVDMmsPYVz
z;17Ys+H}8f6Wc|jpTYAh%0T~KzVBP(uq8jPyn&d&o=ui!^;hRnnHzVbe#DIAYf&cz
z%=F)&lyxk1uU~aa{OR~gU^VIdrr(Uf)GIY&0hY%Ik$U&a8TBULs$D>AB3AVGbC~bB
zVmRWj_>X;Bb(i3%{yFZyOH4s7lUt4@d)IPOB&|~Wd?<P>v#ADmd@Fjjc*B;H4OA0q
zBqNiJ!7P67`*gUk>PHTlRlX|5j<~7sS7&1|gm3mTZgsxi8GBNJMqQlelVsM~!vM!^
zY2^+2pnnoHn6=<gI1bz+As{QF^7&C!WS6G7p=kBvo@&*#2?Acn0lhj0+0Ev_9dhd(
zC{eSu6Zey^BWjd!@(zQw%7ptg=mRL@w5IwuViWkR_0eUi1{--Y>BdyVXf@{LVMXy!
z`xX&QTLw`3NqGp_;+>uej}%jVwF_lkV)HnHiE9ve>_t)mCYiNuLHVN!%N+A#6JwI@
zsu0zILVii^7nI9IZC10w7gF}UfDQ-<P06Enz)ME+r^c0E6(8AIbZsglJvO{@f&Mf(
zdr`|kNXbIuqN9$l%Q8Qfr;H?geFs&Z8*!|Nj_yzVrpU9M-xGhH2&78qwjAB-*}Ea^
z*xPa`vBPS(K8?(udCp|;y%F9ZE4!BC&<r9d3d@XfRBZh5RKr5{8A2aSPq0J>G~~_d
zmS}YXF(6KddgHQeLn>AiwE87^qJx<x+3Ci&(qWuh{4wLX=c?*^#!x+a!|P|;C6d!1
zv)u7NYeI#|-FtnzVs|yfqtf-DXHTl;r3+ESIenvy2lLK@x7Mnl3=9t(Tw2&qr~TQ~
zMYvok0t~g<-bYI#c?=b$hI(Tz+Zj6+s-=ND2hjCI&U<vdmL#dxqac1ev1v98*~kEy
zI%T;#bh0l;|Kjs^$zjRDn{aQ8>!ZCkE{Z*QQ>^A<Y@WHL<&_L#vd00*u?aDH6*T5G
zz~|zuP<aZ7p)mQ$f98cn690r6Uw<`}$m8qh_qiznLG;3e412uS7NJCS(AZovA=P9V
z^*Gp5Q6eP`nT`|!%C_2iwy(vtxsYK)%b}>~oWxXOQN3=SoL!l#I@_b1?os`2L*5lE
zy9=IqiBGc!sKJ^j%%!+bwkZpvft)ytN}x&b3p5i&BX$%RWgxg~JISgTN0Us1+cfq`
zYSUrNjDGve2_7p29am&7HwRB6z{R!PiGB#WD;gdT9S#kgCbMcREG#)k6{#8CUxy=G
zN6+To-G>)Eygf%{6qW3Np3)zGpzD?Ud=P{}k@g(<QTC#WangD{!=<k#A9c$xmZ`bk
zNM6Fc(R+_jy}$!+8Q2fACHb@KwU+>CUGI$t#!i-}Eg=TwElAiW$cvIS%2yl6)u>-|
zQ=z(pvmPs5l36cd*fXSbWEAI8sO>$Go|mP=ymXvfpjA&opvQ%}>?D`@wqm<?M!7H$
z39<$Y@ZYF+(Cxj8WXLM9jGsa1RQ*>JLi73;GV-_-ZzRh=jj7;OmotS)f3v|uf_zX@
zsV8TPgS50mSowz0H4%k48;Ag*t*j)Ot195C&zo2nsvf)If-B!7Qr7u;T&99rE+?x$
zn@7LcCC^+iE{EBQ2nAxKXCyXEc}^E6Mu!U`6+}SEMx#zSom0@xvIjl2371B!IxjXV
z993qa#oLQp=J+YzQ)~vF%2Iss6&HnepqKjl2L3!D9cks~0m}uHxE}oM%@EO0(Z5u;
z+<S-NqFU9qvYvAJmoSMd;Q`-L+nQpHR)Y<#FA0A_%W6|y7F~N8(i<O4GskE3MPMT~
zH;TRLNB3!;aZoo_Ae_`V{wXwFa>``Wj_jqK`IK|iRm#~Ek4I*xqABL{pEN%x)|z*k
zgHg4%1|>)3_m8K!+Kj03OY}L-F<YEuSgjkEdgFN5%nBv18{}Fk9DjBBbNY7gyF$s#
z!nek*I!T3P4Hg@(b2adRd@GAF%bA-x=xzWUToWooKi$vbytv{D#PG9+r>5X4E8s<h
zhz?|u`cBgX9c1&7##y547%gP+TPzF*^i<DT8PbMM($PFz<|QqmsvV~wTg@d&s+S9R
z-Ww~o0^4Zvzf*IxSXq1smQsyX66Oag^7bm(H-iq28JbRP(4@4-@?;Znt=)iZ(XB6Q
zwp#OZPieU6OVBdku$y_RbYJhHurdd`p7ZiLV&~j<AN=`0`POCN<Y~LfmG%aR*@P)6
zoD7Ye?5NjUC{TIHWktlY-8V*$_};f1bf5pOny=5cEa_EylMobvif;A(Bn%tzxjesE
zD>C|~4w>9F4)VBjez;|!ZBm_-pr0+0AbbB6-MPg%Xt4%6`0}+`@ko_UzjbjkON}yH
z`@p^MwgO<b^!a<1l_@V){po$LC&Hj>ss|YYp<J{16j;@4S2GK-WH%n7-+qR0n>DRU
z6gJyv6)Y0#i~fw|c?DB&>K?tEUej52PXHmc_BQ+HBeG7$*?YYD8oRqE0;b+vTJHy+
zBsT{~Wj}e`w<9_9Xy+%OQQ7%<-*dlYX}g)^mQKqBH6|735}ga8_q%B3{+>z*08?`2
z;7UT%i-@F?DFhrWX9cyae<~9Tt4o;q;RHX!b@n2EI?l)gP`}LR?gw@KDxoj}s);r~
zly5V)3QtRDBeyt*MaV1eRZh7WuiR~w@Q*gGx9`VVqYEPDp!CnapJ15pCTwr40GdS9
zn)V7Ue_8Xp={LEIwZ=iuTnSBqs**w-hw-}?fccwGpOdgW5rHO2P0B9?3XUv36V-F*
z<}K~84q?u+%x@sPdw=fY)HX;Kv{aNvEyg+U!mGnC@n0iVXPiLB2_zCj!rx&_jSD45
zL`K-w2DisX@SOEUV$924Po=ob{L|mPd*{bgt!(7Olvxe6uF4KF<?+5Bb0$(UE9@rO
z2n>)2FAUOl-hg;!d+wK=jx+m#VgGH4KC`x}Eql)uf|Rvv+NIUCADpi=IxD7QEue~=
zN%5s3<I~+11(t8NC7Az}y|)_L&C;$PdLM4X`{O%#e#QB46i<YgrcQg<GW4aVlh*RF
zHVacD!X~CH;3nu$Y-t9lLlK~k$DhuHde-^^F6}=iIm-Gzby7FINfO^;`+FG{yKd=Q
zy1tyMTL#c*qx6fKR2Z7J)p&U1S;O_sFRzG_@u<oXwsoInvuchryg0GMC)lRQ8uJyn
z0H1+TDb90e^4km$IV~@N1pjHYMD4R<UMQ^V@^_@ZzEV@na{AnKjWpCnY;$W%YhW&g
zTQ2M=sYk6@DrqZtPHNm^6?K`-?hjXgZ8~(ioqBIpn)RasdGG<^%}=LPm4MFHioyp;
zdN<70lLQ&##$WW`8Ah1Yh5k10Gs0W8;Cigqf7vB~r{J(XI>Qj9t7?=XuoPzZ2{l4$
z;QEe*1B<2;AAa<uWciB|M7Q?Y-8T!8C_7S9qikIKX5H;JVjSkllAa`=f9W5UWi@Iy
z&rly5&v(Z$Hk>oD+h+oZlcK>US!H{1W4r10h<wb0w0HcgPh-PG|44SpsN)@t1YN2<
zsa`pk6jb7ei9NZBM%iQi!lhqc1iKi42qi)GE+|*n%o1lp!}_>8SG@AHRV^U3x~ze;
z{SWfH-nggL9wCDfp5YSTe@F-T9KU=Z3`%PTKObb+`r4au<V48ME1DR2FzhmnNHXM5
z*f=E{sbQg#0h27=jSmZ7pAoQUWyoGbYzNXN=`{Vd2(%E;Npcfe#);lS6nu7`cmkbu
z$u7V6@qfvSWIhDl5y~WQO&HkQDraKXk~KLfSI;Nu1h|%ty>!}MODlV**G<3QbRj}i
zj(}$7nG$v(;=PdVH=a*~L43StTFIfNtZAnk*>GdZvmL-*o@riM<E~V6SN&SiFDj=x
zJU;aii;<Q6MgWn!5z)W@_{M7$2bqeJSxMtH$-^>#>W*FgJXOYyyiRw(4TYG{b63K*
zw3ZmX4RHq9AwnE9jItm4>)}e9*-LV99B|WXEg|jC>4W*f;dvsaJa(uPK10_m-ml-M
zVzPu{g+AyKRNhVJf3KvzJ`bJi$#~-Q;T%TL*-qIQGXm}9Z#i8ZfUneyf4eEH@uRjo
zGs(#vr@~C8N^D|#e3%%IA&(`|Wkc@HXW9M-Y~++$1WA{I`Cq3dmLlp3^!Fww3RI11
z?A@g`vrLccb07K>vNxQeMrlxJhD+mPyZ(5Qrk>-@Y6q;E%(Lt`g`|nI>I@B`G+;+J
zPm;BBb9bxrv5<hIoc@-W-_xSy+;XM4FONUd`leQk**Uzr6?<9HoJuT;ahusja!Yt)
zzPCzyivG!LgVl(fN>#7Ce=hqNGCA?T-0u~*tee|GZe98AV|rh(zUt*CR2kc^#1y=a
zVd^sgXZx4=bIT%lYA8mPe?FdZmqUv=C5N6#^*wOO^zV>FQ3+^UHMT@)%CNoGko!r|
zkwkTK0gqI*qkGjg@UgzIJC8wvDqD)02)BXWb#PvApNQ1UZWOD+`c~+~fPXNrRpU?E
z!HYQr*`{deGjk{LE%w%^P4dCdSNGZ*pFEm&BdFZ}wa{3l)$4DtmEltNQ%P_xHQlY^
z8bcrp`m9HU)_P<`stQGW^%s*|(%g0k_Mdk;LWH6!351@A&~j?SnNh5cgucqs?$2L@
z^ZNDpIq^E-N4-1OVGlOSM*y{DA{>`9L(d5U)s61VOq8#F8=s)QY&&xtkN#8-rz~SS
zD!%aEX8Rb>uQiEetG<Y|OF3jlbWhjVIj$<PVZ5#vzlF$<u~BD7F~SRWHDCDrQa}sO
z0!t(Bm#|f%@OOkJP`9PM@!Abmdd%~dr;BGk9`hfQ>n`TuD<hW`O}s^Bpsbo5pRp}R
zwS%a3`P?60lUq^WJ}(W|hQHVg!82awo7T_bNCjJl&JI=;i!#DPTK|*Rg@bzXd`xDX
z#~ZKIHH)7=kxeFH=NoRt74&+B9ZWt`?sMCqi*OrqwQsKx@qo-GS#`Z}J==+)mN$$$
zM_FR6aXoi^+!oe?Nv{_&v?z!4Pf`@MXG~87FbXsEnV-dr=~c%<7*->x=6C3F*OPYD
z-SWz&V_C@Y#Xz40Kl!=sCcV6Ag|=&SG~48;C*h0SOD$<p`Z{5EZ?hInP%(Ye47kcL
z5xBx?%+{Pq`&~PHS`$6;O<R90>&lc`{#YStB+wMi=iL;x$|ouE=Gn+XgsXGQI(e`x
z`cj{OpjKlaDB^|hf5xFX0P2)QB#E@_vS``m+%+__*SsdieUC>n3dC<&vEu+v?bChq
z;bg;RvspK;F;FqMz&5>Tqwfy6kT2>T?l|`Z206C83&v(j-@v`yhITFU#mYww)tQ`t
zOavh8OUp07ZH<GaG<nQY<swr(r;{2)o<_N?0LSidWc6bO&)VtPARw`Aub9<hSuPP=
zs_2YBjJAW^26!Fw#0p>mUJOtp>ssRlxI)Ii<9Mdy49rOfx_!I5h<gGdRva4r$QxtC
z@ml#WdpdqmeP9qB3-g|_tJbg+Fpxy>&4S!c8~t!k?+CPQT}4?7J=Hfgq|^O(l9s&z
zcCpqz;%eJRH#m7m4p*P!t*WYOs21arkdR36juN24M7L&kbJ>C?=r<<JA#j~3qI|`C
z;*ghjgC3gyi(JMxIoNIlzKs6OSjhRX<aC&52$JFm*#vuobRk(o4z@doEb`zMghd{w
z^0W+5C3)li5t>muBEfP0#vAv3i0K7N5bz?xp?!)UPPqeKk{2l`xIwv>o}bTAaW7Z^
zSfZCk=*p&Vu&Y`SS!BT6W~uOen#duUC^Q1D4o%g`XNNDLcq#eGS8%EgO28c)sQ)UM
z>%x|+^ui0}(#n3PxYhJkkp3A`o-PS=!4!P1Kibb6{_h)S!3f5Fo#=O49X15+99CWB
zJ?913!^Ph-|8KTBhg3dps5Wo#QGYn%IbjjsE;M}^Bj|wAE`4;5vq)$@^Gz$H0^^6(
z+r|YT^*MP~ixCrEY|#Jz$Wtho3boC163+9Tf6i5h{Eg&J%=l!~c4J?@fx(}>f%qPd
zm~xxU0r$uZ(B_KQ5Aa#sLEJqJ{zQK$7S0_$?$ZjO?ST%k0MSF7)+0T7I$->eAeQN6
z-c{y9A&MxQq1N~d?<U~l&rV-*I()?6Y5qvFk3NZe-pR>4ff-1gk>K{hiO`W^gdXP!
zEaVjYJ6E8;cq~qYM{<%c$>1Bz15s?ezq@S=P$DucM6eF}Pq>2xve6RBoPf~a_NZeX
z`AV^YAb9A0a_Lc^fai}dv%Ff`DgWeHf1@?Z$s38TR*FG&pwuB#yhDg{I5sXs5c?<2
zYGj9hJ(mE%Y<#ilWnxS~YaNpDNNurrSu|L{{~i62o5b^o%^C0;jY}u+<m1Bt<-|es
zsMeF<ZO+BdZ~6z>1I)azOK#<rg$DHMd>S%?Pb-uwnMH8;W`+phw!iOQ06RZ&jE|xl
z9zmkmivU&c(>yoDBP6qK>=hRZuw$ZmPh{|8e}b&qaQ7|F5%t6$SoNvq4_R;z|0Fbp
zd%??GjRdFjg=E(CKcPYYIFEmd<SLh<m}^G<gMfuS%cAr$cUSQIWBv2*Pk}EH%y0w1
z@9-L6wZ``L+|fg{Ai+ELPZGQ%*Qei}?;|gS+}#?>WAW#BoB#g(tu`>r1)y1JD>Uoz
z4;IKoOs?Wi{Pk&MMBM4(pl!O*@BO=zmn0(;EIRgiw2dKf@&y0><Xy*}r7E?ZlI61g
za-HkRrX_(=E|;WoTe~gr(~JR9G;RpIh(z5W-ia2l5*wuX-+9R+H!ADSBcru1x8y0w
z$eJHsxQ&=BoWfbXv6(|WO~~};U^)`;9;RG~XE25W2O_I<`I!5gAZz;gHvWYekvgN7
z%TpsC7h0?r{!j_pBCVjbKiI+{RAX3bjrQU`ckd`TTfptDkizT^C0%AQ$>UOQ8(QCC
z-kW{fNa)oBWzimnUg}wR9LKRJ?t-0ZM55oD!ze!HiBAvNxk?2#gIoIe`nGka$+ZF9
zqkw!m`m8v}cO-4wymeFi==v1Fu42fHP1f!3L({MV3IaVEV*6&obLn?}%6G)kjqlsP
zrh1r`{l|`O5Yl&u|2iE0Ii#^|R9G<4yVa)uJ`(?U0oY&gWV=%wotMWB#)1!|q^1A<
z7%*nOXVOTCdwCRk$r9K=!|^F)((vUj10M}bnSYnIhek)Oz(@Vh(f^-i+oP+mt<rhD
z6%;nY*NfWE0iC=BnQVcCBL;5pXs|#t;X88$N<x!o|2qks@HPH3a0L+uQ(%ij8iK=r
zo@lW(e6{~@E8e@UcrzL5Uvc5uYZ6tVCvHmKA~Y3%Fpa!ae-wb{keQ-5ng0<j5%@Ws
z_jG~_bMcAPhy8^{K9+{oS&`E%xI&yKj5YKU9$s4Akajnop1|h>>_)cZlgeTAKNZ6w
zMx3%Tk%>L*Ml*&TG>G^Q8O{2^-iv=<Q$Sk<>{i4dtRVI!!Qng5C_4d`I2iZn?%i70
zKoARVsy+SvQKOuV@z^MVk8EKU?2~jw=ifQPZVR-ch06Si!`zEB7{&~eB8MMnmK#Hd
z!5vB_<6l4K(km(|dQkn{zn(K8E%QIjtBj%jw=i=Xo(qJ)HoNq<*X&cVJJ|3N#cAVK
zOqg&CuKl%hAU4YTp~!EQ7XGETMlI(N&XWr`R?W_uau!J_x8*ZKQ8Y;Rw{JMh2|qn$
zU-Cs1#Kv*esw>#2joC-|7~6<WFpA^LVKqXd3x*Zw5gTxp`F2Xy@V6u)BZiD~h?A4v
zvcUVihPLZ3$`7bLJKLm>d=bBj)4=Qtz<NHW097LsbQfIlLgmg`x?dq@ov^0)cyyhQ
z!0k8fbH4HLZ3y{22^}r~;nyJcpIt3qjdH$?{{iRlU#$O*v@?a#5O@Ke<M>B)5Q3Q_
zv=8Hq1jlROHfQwT7rG98Ioe?4$1@khe^jjy&Wa(?Mh<Qr1Bj~`1<X^7hYHR9Z-#C;
zR-SHIcWWG9M{A<+{|sGFkzRq~x;6UhC7ozg8p*>qT<6o)p8xZh^51!E0u^W|(!V5y
z5HjQEzrRKWLu&z4ZAIk22r=ufxo>i;ff{78_n@Brn^5Y!hgobnc)z^7TwX~j<RT>{
zUb-Xp|5x=iSk@U^TIO^*&>q#sPsO?ristzQi_fcY#EbF&_RxY;Cha^Yh<C~DVf8qo
z9frmBV`uR2jDAn(&;RT-bkr8q!%4Yw=G;*=s(j(MSE&(!m(q5yZztrK3_hheMMDVp
zHrcZemk(<Ic@{Y0FdoXfa`@*bC}BU{J@*Ls<4=Pkr9bO_c2=%u#C#8(SX+ai?%}EH
zbcXh$`L%jNe{t`TMeO`pG2g5XMt9!B6FkAj?N?G3%4jp8wt)khUBPb(V#f~uC#N|4
z&!Mp~>qrg*5pfmeqxTd`-b0E6uD`H-^)e5B6fhZ!TZV|m5)fP39-UXej${;?@WHiZ
ze#_Z{LA9^|q7?)Mp!218aQ@2ePBS#*R%q+MFX3*m$S+eau<f)1|C^%1>LvSz*XoId
zP+k7_cZUC2x`oxtSM~JtU~5Jr2trS(2|*PTiy;o$puY~AL#v=jV16p3EZjIsq{lp8
zpT*Wtrlg`ev}1)8-laeT*6F&6MS`?*c7I(BKobHhMYwo*HUHdE{#&=%e{GAOVtJCV
zYDP22MMgge7HB}|vUO?;@`xH^<PVDLHf@uH2qZ&88o)yu?*FwzZ|$?+u&z^j6^^-c
zOG`ZKGaY;YXZ)io{}=m(!#mjYD%{WKPt;Q$M)o-$u}s0%d2}{{%pjh&dKV(k5;1wr
z-&7`e+p5elc}#_f7*hv9AOE17fG#)Ghfe<J^#oub6?w@jTLKeZJdR(&$v*jL*ypog
z65J&-C0BV4dB&dha2qbItnh(FywbmvC%^ev1_lO3!yY2UEBPh}?jOF?wWB4?Dtb}W
z1Gk%_4s3fme8bNO$CQ<M4Ftr1RRJ0T70zqqF!RKQ3#gMbM1Jn@jz0#g(T9*BcF<eq
z(}&lY7chRw)a?k*9DZ*xnp+sOr%~cw=5$P$=*!9V3WVwt2*v9OGM|m&OFMdXe#jkl
zX1H{}nLC5^V}L63a$BP?W%xJ7#_+ZO;i8;ehYcH#>tQQF3~bKf5)>qpUjmVi#c9a)
z|Hf6BU;OvG&^r(J2v#|U>92^C4S%`{B^xQ!cOh2Vpe_B2Sb-7_3amEJ+y40sunG2u
zP9TUcs1*J<S^6DC0n}jB6_enwe4_%tCEYUHv=~0?#2Dcx#l9JRF?5;6CS_(lmkTTh
zuW^ATD0pV?{%$Ia++%+jR{ix$9?=i@W?}W8g0;0ZeLDVsp2Y#s1h)^~-xY@ceWl6&
z`&ISR+DQhaj$qFH0Qvj>*XG`^0D>1P=0Asi(X6gSFf~~EIQ%_q=|nhCXcj(LYV!v1
zIO+<hCnSb}faXA@a{&UR4BqplYy}>B<o}&a3(h~Sy;*8)U>}29XG@16shog4e?0Qy
zyf+063(!Makn}^rjDK`+XdwOIb}K(WLbcc!!Fbv>)ym;T@{s1pxAP6&6c2|9wPyge
zUE1YM_c<&kLKOkMEHG}n28*I({w32w6WHaN_WR|Bc#r&_fN;kI<u@-={Hq=Pe^Qsg
za>v$&Aaca9A2{icGc@wkHgHswM=PD}dMzA5@5l+REO+D<h80b&$WF<S|1G`&2L0ec
zDPF`zVl6=nX)*Ng>lk|)PXW8>=mf{1CS9O_{;g;Ie>t^l*fqk6S(0-0-?A$qlOV#4
ze{F{+fWXmx|LisTI~@JTeh`Xyz^Zb@xzw@5c}}FdXcMY0bbX`1Tmw_|-|TAOQY=NT
zEdU9QnnPEd->|p!zi0=MVuN_xdr_OAl@^L)=#tG?FplJW=OM7Ht}2flU4{3spQGxY
z1D5k5$Um9vezv@rJUFtSed<L=+vy1Z&q`X+^QiqqFgGzWFU}*>ZV9T`q2S82lYqIM
zGqFjqp(QO@dQtfkK4QR_PcL$bAH$4AwkaS04KQZ-W-wU~FM321j6lbQKxT_YP#nZx
z#1hbf|M!|eRO2GZb^@gaP4A&zUxxP2QsD0|LPAqqPr{SvV)aTqZzw-+{IhZJUp{?L
zOfbebOjk@WKhLgv;cttH|MH3ko`V4}B*gvG5M&+>ZqW3VQAGa)Zd6+Kt>X0vv`hiP
z^cRaFK-XPSF&y|`3;eI*cY)ny466#Kp^CnWd;ZGz@4bR*fO1^<b1U<YhVZXz`hOnL
z{0ZS}+udGo>+Do^tgE#JgogLl3)1%{5uCna_n-znpPq$%^jLJ{;ZW9#*&Hcd!QTvD
zEjnEm5Aa5kmPw({-q-(VXH9%mC;#ryc=#8#FC<ZZt1fwZY*d@_f*3jbB)+9OO8o*x
z&}kGDHto74keUqJhhG>7w6#3OcGswpaw05vv1i>J;9bc#Q9%@UkbPq`M5>y!twERX
z>4DJTttv9VGNPK(3>;(5+idq+gWf9dk(t#=x$;f$1qA&A(9<{xlTc*E$6Zxqg88NO
zU#suMbK>aR;$D;DU+t21ber5eWJz@&c_Ma!$2^U(2b|4cFIWSB7t`vHYrOXuxcJ6v
zS@Mo_ORerp61K)Q&Cx&xu%I`te~B)ckK$qW$#IX!f7v^Cm9N`<X}6uHdw*l7AmM8N
z4t`(2Rp-2(8oQuEt4sZkOf_~Xt~0KAR_GK>p+Vo5vs-<qQBC>mJBMlIm}l^1u!QT6
z+5|tRZlK+uyEZH(r-Bh8+{#JT%;JxFa<125Oro;Grc;>7uz$H(=7CbdSrVh#DZ0#4
zBmB?%Rw3uGo*b8=8Y1%7uewe~<iB>#liaMvkI>)TPcT0$r-p~<Y6edB#+#h$Ee>le
zAyRo~G11c=aSM8kO%{aW8|W3rJ4YYpICkn;Y!zjNkU}SExM+sY{C+<XlB#dK&G-H3
z@>)H%RCVFDuxL~0g77iMJzqL*nsZe{{^kpYLg--+wC!%L6hG;dP}DgGij$Q2TYLCZ
zf;Owh+m@+&<~A~{jIUO7_2D<7N?px-T*c5qG9~EOINBUNVdaw?yJ<Ck;kSo4jy;>`
zfynQ4k1;b`%1jqC>q^O;Y~1gXHV->y_m~?8p9{P2=~MUW;x8^1>*;(|O<;SLwfL^D
zBbHKCxt;5a)Fo%0DE+?eA32?Oe{fis(+5|s@e<ozy7K91w87qG?vJmD+Jg!WDixq;
zDvlkq*lE09zvZ+%QZ`ko`CZ1U-|^EfOWV+$phpkstuMus-y^aPA1ZR$kjK8t_mQsS
z7tcq5PthQ?S=1uuEuPA|I;|Wee(&sI(x>74@VoT%2!@@-lwLPqrEO37Wo}>p1>wGl
z{+>mln+a$CarepxU$8v;c{Wrm>hdDL3eQrd(2Ri~6%A(@&g?JQCuOuM8;C7k8;}bR
z7lQz-$W<n;^5p0~sm1&Y2w(_PZ9Zold4{l-<w!xHET=5ly86_g8B-NIGb70*r6F8j
zccA_l>XT;w6ZXU?D{Qga5V4^E+&w%O09K-pAcASFV|~8|hZ7NC4I8L)4GvJx_0@Cj
z%V45cFTOm1;Y@VvN$pMfwg6uD|H0mO$5Y+E|3{KNQYpKLqEvLua3qu>vmzs9MMhR3
z<5WgvRQ8Gprz3>yl@?jqdzGwXmKhy>*ZX~{`@Z{pzMs$c_xSz$`^SCDIq&ytUgLRP
z*YgsR#TDX(59__YqHU&pI(~9s3o$_m_obC1(_i?O_2bsBP&fl%!mcgnp}($vj@ili
zOV=uIZBj&e%6CeT@td(R{bL`QmDNG=9`OI=2X;c$knnR>Ct-&d8*veA4o=!cbCqwt
z)Ey?Z*M^{<6jg6t8k-%H9#xz0X)e&dkbA%+L>7dkQ<m)QNB&b(nT3kQiLU81RaYg|
zqk~7R4^Veh(g4}5AVOqz=X06^y3|ovB;$m5^N@MX)?CQ*IKvk|dtU|mcn5WFF~zex
zUJyRBiv>OeaS)n>Bq2H6@~n4d;&tNFb+ru_DQTXYRP;|m?RMQ08H&k$ThP-Y7_mX7
z?(Mgw1Kf&jkr(|topxP6p`;!3nl<3#p0vSn?sJ8L0)v!ngT5*_&X7h~zgJL~ZP))T
zq9E#L7&-VE`?jBcd>}MwvN(Dy>+wIqwKDfhlm$Q)&f;}K*=d)5E5}>0XqvbFoHjVF
z11idp`qiz(w-2AQ!_cEphmPtq)nl)fD|0|PN1=#e@`XbIwoKGkG_ofSLH@HrBW6r(
zDtGsMbJ`{3^MW2H0dS!$o7FuWmJatcQH}ZQ))QQ}unknerKO)sTI}J+kJRm4`YG&o
zBjV+N*vA`_G=ys$>R0*k5)_PVz_R9ZJg)FX_wItipL3x)18~T);BGAW{bp3h48!Q4
z_tL>#|8t(i-#vV8m5b0HJT*PNb{=@{pakr{g391vZ02(C(a*Ei`x9C)D!q}t6CgGq
z)WJa4!TmBLiJuXqf$7&hCg-sJf&~Ef{J#zSF;m9ys-3hi0yD73Ds~-??jma=X1V#)
z!D#5PQ4QWxi0mG+#kzy4e5q_E8tL2YHa8LteVsS6ZUtY^0&x)a^RGrnAHM7tt>EqT
zOWD)>fhNxK9RhTeUvU%)0vJX@&W8{$(&3J=Hs+<-G^TWvO}mT$Ym{@zO*FW?fl3Cl
z6VU9ltiJypvLPD?9)&+<Z`(d-F4uMp!(K@aQ#1YSfb`4Ii&Dyvt=a={+RK!4>c`3+
zlnMJDrlLcV+4&fiqqvdcx}lgbx@`gQI2^})O>}voe0S_n(M8)>u#xhgFqQC$etFU3
zKPD3kAF?bsm`9H!cNE$xnZ=)hLk*o->@i4}ESZYWKSPRqv);&A_TIMruaI~00&~uV
zkw}PmE9CortMfI)kL>PbLL%O#jA|})(7z+^kh*dr_X&8UwW2>*^Vct`Kl^EL@Wq48
zBNO>D`JWcY%l)<reu>?#Ure`G`*K;7u(W-F&RsIn-O&bqFf~l6#IB?d@MM>7FEJRh
zXX5<NFrus+n<_s7=l$7ii>^l-oX0L_vR#S|r4kq`d<nGu9O@f1(Cd5=LP{gRba(B@
zRVygA#uR{(^q@=K3Id)F4?Yzl8$k;5i6Dmh^KAde%m708l0gWZAnnz>wzKB7V5sqp
zg<W|;f%`#Q3IwT=P_uEYh;L!L5S+s)$f)V&X$Yf4g*l7Woj)3-1x1a-u)}he!We%N
zG{b|T^9EUQ!yeJ|eWs*ThuEU+7B0xnL&Y^0d}4$~QnLfT#dw=6_m2Scc-~2eD8<B0
zVCP)ez{<{I^$4r2iBKz88bcoi!7@|mUqcQPh#-;q{3N^O6A+4sY1lKTg(SdL)8HbI
z7wyxCQ%r<L(gem&yTz#t0C3Gqk-A3y@+JY6&q+}K+EsC4zgl>_p6cjv&8e}G6h{hh
z7a0q$(PRwKT4_n$l@UGcPvEZ<qUu&11cE6HQ1E!ZEdC03<SJdN0C^aQLMC(Lrkwv*
z5c?$a-!ns?PiEV(BW#MUd*OeWY|j()p#Lqw*IM+}Pj7V#1lpS}eSoyb(4ucOA{Wm0
zG&liAx|?mcZV+6WSOS9zP|rbIQawhc?~4T$wnyR=k>*=skkTK*{z)j|W-}nrVV+R4
zQd_#(H<JlHdw74mVqj*D1(MR^>8I6x$Uy3Ny+qGdo$$zw8GB}-^I8nTEY#~*dte0k
zurXjE^%QRU0ZF@JKmZg{1i#j;|M<8b<<uaZmKpSIz_~?z1n4)#RxUp+DUn^_xd|lE
z))zSaMe#tjUU39^4<)++5#;U&`<p|YXz!<vVVVAuMz!C*hbH_Gs1cq<_(OE+W4`k|
zmHB*8iAvy5KP0AW&5T2xLJve1E*g3mIV}x%dpq|b<z^#LRl?JuQh~H)m4up+05|CN
zT;J2bD1l_-DGy{Xmt8SbeZ2E0tcL80?;dbwI#cgc&VU!tY3eS(OqsCSJF2$Gsr9$c
zU~1To_*KgRDhca;=QV_?(HXei!zXWFH{u2UUvFvocSz5y_WFe3FOZ8Na`Q1z#p+og
zJxHpDcIY22Fx&Km@PlhsXci$Qa|R<S%oQd=w<u#QqF<RcKYqr$_y|z1({WC9oI(<5
z_kEXhuzmh5<B;yhQu9&g`Xa}Lt_El5xe=)$_32KtH#B_f*pFmkW(1t6yBS&&)X_y}
zwM{hkJCCl^#k75t2(=ed<RVgrG()mVK&u}WWCgOEr}8=?CJ*H{^R9j3J=-~Mm@(fo
zu$J%qmQ_{v%NZ{rFVcibhnW5MYa|~0%~i*Y;qssSQP92t!NJ<~vo9%+Bb8-c>}yD?
zg)6e5$GzW-*=O%gTa@3lEm}9Iv*>}1&!<CFj)E-S0k&z0?no)J!WzOUNo=2m?lEuI
zEobarJtq+5g4fMY9IN-S3HW@&boRj4O=612HwhH$#@I*RE#Ce#$>9`F%Y`u0L<`n^
zVFpP-!<<CTXv&&0TAV$eFGfu``L-0t-P9{*%bz+;ugn;fcp6(KKwpB~Mfm~J(L<w?
zW$pbop6${b=VD#j8&;l_aNr1j)RkVN)C5~wTm33#nX$y@qa4Sw_0GMjv#S+j3O@4;
zy8xQ?!1*lQM1pB>^w$`@gG)ux`y!iONK9vxQ0j>fwa!$0uXk;-i-~^fCCwdbO+~P1
zzkYUkJA<x>C->H=4V#bALuK1F{mZrkw^JzF5a{tDcg6)_O%gQfi_kk$Q8nU~fHd|P
z%U3;MwXtnvWCWj<BoE!0eKt_u-NNTjt6&KE2Qf?Q>~klpX2UOuo=oHrchJ`O$SJ*#
zUs<+A@S?X>dsZHU?m~f;yZRjKn&?Rv>UdjA45eP#Af#~)C5TIE#)GE!o`m}K+3vL5
zf?MJom<N~AGkqAHTlH@<%8sN^>dj@FWf`uok;-=3ujoyL)9KcWQLYs!EERn;uiL_6
ztXfy)Ri@vvSnN8P-cvi7>vArUAnstRfH`s?@5+XZisi;{Z8W+I1{`tS`tK^SdLeq>
zWE$PCHc<giGZvJ+=BVg>#ms~kdC_4{dzGRFjkas_ULpIp;fWpwAG12>8mK#F(ii2P
zo7m*B;@j0?CXo5l6bdF*obS+&T=BNfZ>AKbCaic1aVt0}5t@Up|JW;;BDO9boRjUU
zx>b(2TeHZg-?aCp_qGz_c-kAn3i<&tBmP(2R>srb)oCuYSG8j1yw>ItEZbU34j`>-
z^jEL0o=zlYWEGgcp|)piMVh8Trv?8L1LdB%HJcMkq2o-F&4L`TKIH)0NG+tq@r_*0
z@Jdim6ow9(ndjc!DaZk{)gwe5KbkRP`EdcrTmv!-2(h%hU*JOI_<t)WV)E+WbuR!p
zs<N}^iIOSE2p2P}W|?^wtm@%WGRBeM%6Y`<L5J&VX<SxYVLR~ik7a}uLTu@Kqrbes
zW}X{~lR$&e3{A5hsUKdiuAT8(l5KJ)TFYx3T;XUb_nf`WJD)Li^7M_-c%6<6%%{MD
zo;wE}mp}RU>~=G;<(4~i?elSaPYt$qH=CJ}Z4$E1wyI@pskFE+Q21KQ!EaQ14SIMi
z5~BB5y{&sB;(U)V)v{#baDSDQ*3q0N>|%7OL^M@jOc6ka6BRqlJeDGWb-&izdklv(
zAVr8c+%6|wXKBNTxp)qxrV8$qzgdVdihGfIH;*LuT#8tJ0NCZd;P7(0V5!eEhbiKa
z&UP2Xoq)_qzil!qVorwUF?K)+;@hmA1D!M6l|w7%2QC+@JnwS_mW3vT;&GX*k;uJ^
z59Y_3l#u>(q9!w5zHytivQl`8<;P(ZyFi1*FmTJXdN320YP4e`JfRAB_*vTHM^H<0
zftq2*V9=BDrMiRRPoXQhep^P#^h-NII1Yy!gm>IXvPrYn$E-ko-e_do%)%lU?(>=w
zseB$G-TvqdnPQGz=8<m0FM|+S^+V>G+zT80h}XxE3HLZ)6ogLK6jYCd4%v1expL*o
z*XehMy?!izad|boU6yBAX5gp=UPj^gkwfn&Q!KXVmGl%%%tg;ktewUWUUs_t<oT)1
zA;a?RU0Mj(C7gA}d3$dNv|YM2%G~gju0a!Hnq{(<8f#Z0n`R``*ju=FCaUi%&`eY-
zR6$EDosyn)!bH``qGH0t)suaQ3b<8OUmyCz>5eDMf<dwIbJ|$?d{xT_(GM~%>b&d+
z`@`tP<EfHN)eBd1#XDbKeYTTOv@jJ@l45};P8_p*Mm#@iR~fR~1PY^y{MptGE&B}C
zwN0Ez_sRsPq@7(aKT(f&>Ui5GQ~a?!($Nx6{ZZ$`1UAz8kYZNH8KdGSjRtY9ex<-3
zOPFEzOz0Y|%o_1?u1xjy;#x3}xh$|WG@EaP)N71DiARmi{qpw+t;zEH{F~F~;=_%u
z=9-_X*R0P-pBU_MA6_fD^tv~&AtFDEo>Y)@mQ!5UW>38fy4EZ-LWeBscC1?_J77K^
z9RAGEw(MaPEAqn6`nV-!TY!(QD&H+^k;1Y{p{~kCrqzwtpJf#{TyhZ6Yv-wD+<uGq
zLY_IDCnGh1qpn-bm+~~{Td&V-2{aN1C$gibMiY`XY388XZKY}|Z$6#+U~yN-(aj;_
zml@M)`jQM!-QLS6=hX9X^{C{H);%1rTU+L~(v`dn38%VZaPV{@VVKUAS#JIT?}$~8
zp@BoWriVyZ8t>*B?uug{ogzPv32U}FeJQuZTs`PK-Rs%Lq`MG#xYA~<aKvt<oFO7R
zBrh|XWITKRnaPGPj}?}W*5uVO^xv}0o@&clk!!14W-=B_o(Q3?-Rg1Gg4(i;UOwC9
zGdzRZN8)&1M{8Ezk1-CMwZj*;KoFUHKG-F?v4egK>y#_>j*$|SZObaRv6!R-IP2qB
z0zZplVg!!R5YX&fYCD*^u1S3UbdYy0d!n?T%Db_>%c%lLzQjj{x}yiV-`rO4+1&A=
zH9BnMa(^^XJ$A*0FC#4yMXu6N6S^fl4wQXTn@;U$A1V@GzBP<cz1VYO%QF2o`AXP&
z9GY*GD1*{0W39QveRbn$3DX5iwaT<h!ewV)cye@)^_9AmX~n;jgoe`<o*aHrA>ONf
zKj7lc@V4k%j9i}D$$gzWcW~0kWxwlCn!#p5C7)4G3hC5@h5TAP#%c8yy=b-j`gmY<
z?=@O#f=tOg_^5SAW3y_c(ZhU|f&CH70^6Q^b6>NSm>XdAi5EA7>R)=5mD~=?@8W63
zd5L&X$a&=Qw1vFg<EBGe1IgXgq~t7{lIUT%A0z`6bw{OB7xL4y!i`T2=u{?ZS>BtF
zu`D*J$m%qiw(5ZfH&c@QQg6=ocnt+fy1UG;66BfIv`<xKcUl<1-q6>Zb-0d}Q{T_a
z<yf^%w2simzaN*{OjheO$5|XOnwV2j=JS@+MoNz}ilM(es~oK^KczM|j)oGHxvltA
z)kP=E*Ih$JCR5By9Ft=VCrZW{R6QnbOjcw1;*re!ih<{!#%Btc>tsCFzFKc4xHFPM
zeAf2mUAcI1BO7CcZAG1d|60}4g&UY#zK&Tbw#<I&9Jc^Au+{gzYL&7UY3O)&K(DMR
zaJpr1>ZA8sZpDE73(1P6Id+e!HZjdIU_T{w+{$0&99Z_VwYIK^aMBT0et{zdKU=%B
zyRgedj7lMi+fYW47eohNem695kMLCJ8ucv~g#;ho)xpy?x$+Se>zfsp6`v5yXGE3^
zIJ~-NY^`Hg*XTKHq&c}_^WsVFzFzyDin~s^^>laNTYM97S5^}<A#RDZ?7Sg9KeeP(
zmha_FEAFQ(tNEUS6E7hl5utS-D##GyfRDxBCmm<a<vAR;&cde|w5$d{Xim6L?WWmE
zpSL?Mr<M>-O&CelbsNoBxHEBDsH98(JO|ab?mG2LKQwpkz*AdZAFr}?lH}Zuv{@_i
z7k1}~t9Jv8-D8x7dQW9yP7*zpaJbWf_%`c};fG8xuiDiuzjl2?n%tIo7I|MOOrMY?
zI#RvMH=rV{6nA77z<nJ_m3bGlwbsjtO?YM{uRPZ}1#a8Dx<2PAle`ue`6*X$3*1-d
z%k(GN=_Ou-h-l{7?px67Sjihum^_Ga_DiR(>K2kq6+fMx9qy5FOkDDIR$FhCtNO)~
zFDLe$GYVF;oQmw8{655_d)u~gXI*7<fp3HjO+?7bdCTZdFXB0;me=^Sr@!q-hr|B6
zG#S^o7(nQM$$Mvsv*B`#UATZ|kQ9$-`9pr!yF?lR6}})Gf$=y*)eIcCCIbaXyJU9Y
ziK^XG%$*!qYDq@Qsch@|ORYQOBpRDo&Rh0czew(AB5}OdwGkOL$SPkU44ms3)i#-m
z$*FXIXQ#V0RpHuZ0`YlP(8YwSiQK84ZNj1LSUvlQr}-Q)+j}Mt%BgMg4lifAXO!Ex
zHW(D<d=_8MG&zyfeM%t=*7}-#59>&}(Og&mD`My6kOQrKjjNLwo1J*;p<9y8B~S&g
zR8a8f78M~JuLT|1S+IQSl%gYts0g1-OyNg{wo&stNk>BEI2R=aZkj>Y<-O$-^$tsh
z{Mt1KUROT?DImx1(aOW-UUh}{HXnB~=;p=6`w~nw9r}*1YPn=YdTe~$I~O^3RO(c-
z*G+>gEsY1Pi|)gQwbUiu+l_|KD)1OihBWNYjm>!3oXh-+3m~mldh8pBJ#G`Dx%^H?
z>Tv2Srgmy66CTu-mE$(YJKUy%>wU3$9Y5;(`hiqD&~L~!cP8zHxGu08iLH}Tp6E@d
z+TgmEOs;Px6kBAIQvAwYrzXxVb-4sN^_Yy$IcB=Fs7roV;RxRDG1Enou_zul$hS_P
z8pts}Rhdd$!g#K48Yn2*G_@Qy=ec*sj?ER`qmN5ko#L<9WI+e@bCJRv-*@1w&5miH
z7w3Tkg=3T%Gat@6rz*=jV>HMRqo#2O+eI0^Hh5vtmvY-JNuy%G5XwTlZF~}Wx8>tT
zMitiG>=`xW?7UyKztXbjN{J<n70RXZy0i_RDY&@fN#)qN@^9N_RAg2*7B{56bO9AA
zBO_9u*}-1=*zZ>H`&1H7%CL(?yirzT$@vD8V!J2Qn(j(*$82_tY+bq~!a3IIIzA`0
z{b<L7{XhUG`MCMP_-p!u^9(NP28>|~eiu3B@v5_a7p-kqO-b_&<G`(7Hzr2s&L-e-
zbce-6-4VtN%qxV~)I^1MLY^d5zLvJDaNGu4We_sKD=V&F(e3GebVqM@N2Kj-FSEXg
zwk#QOO%pbx>U~x_`{;Bg7E}22wqN5)d6R7KMASr-0jCiMmUOimpIKavZ{TX2;gp$n
zO?_7WF|(K`Jma~Q;oZC;Kqfg$tWJ#fPL|XY(k{{ToP%e-$y9T5d1erUX67**7WCzU
zwgdMM=(CR8OsTn37SUg=u5RlwzxqUGd}(WZ5FsS(Ov2-)1-*sUq=w>l^&ngBjI0S>
z8#hlm8z20@z~rjH4hx$?!jR<v{UVov%F>nd1e*ac8oHa7QX^>Bj-BXGZu4|6yXcUf
z#VwbXuQ)KXQryDXjzp$+#y_;~N$u!3!|=A~ng3Lp7YSU7t)s(Il4K>+K--ycZGONF
z@vHj7?qT!hquT1V`m0^UTb)}Kanm%qw-x3$dnYo=xAQhEZ2mSVzk1$MipV^$=r(Jk
zs_LJ1wZoNxM`U3c@hXW|UJw*7X6Hz}JRlKP8H>4s=dg&Cy>8dO@S0lDv?2b2;AGyB
zBTUo+?~XcqJ2~Zg?~EsHkE1XkJSIhzl~X(2?mxEhGLTx#mELc_x2DI_pux^GGkIY?
zkA11#2uE0~^*Ykav2BH;^r7}_VL#tV%LVGCuBsJ!n!eY;K^bYaR=O)Pv!cr%D~^;F
zjJ9T(dOu@*o*ZKPiu34o49|qy8)F;rINZ#gCb~x}V`t(4j(S2IZ)R}KoiON`+qO!m
z*pVG;oO`}#aBXnO{}_omA}{2&ZEi>UVn;95WvuZ@TRGLJ5?K2z*jHEJ>dd}zf=(!I
z>oeJ*9<MtBvjlL~H}mhxh}(ZT&d4659*w<DMNq?wr%g^utSY_qYTK!{(&QEY1dyuh
z7WL<whd92C2VHhn(Q%Ln<!R9;xTZ!>C%Xp?7EN9D7}~yS_-0R;R(Rk1<xfT8`j~*+
zC+P>yjZJ`CHDRK2JLu2_2laEdp$!`M5%Fui!;q)ay(`N)Dff$neSch=VVR0UD&c^A
z1@XB|{P!Kbj+Ua`Ih9#OyYBbDeBbbxA%b4MeQ1T%sbrHSX;UqDAlCvtCK|AMmJhen
zm%Bs|);?n`w~wq48B7?Podcah##i+%IlI+AKD?k3?_zSOv;DAs`OTtp8IL+}Wskk;
zJv|W)zREYsya|ACt*yF3y@YbD-Vj#0!yU>;OLxW>Wt~|ywEQe;@N#du{z@%NJV}2t
zFLTv}`k9d+R%9aR^Q82B&o-O!oK3DRoUYqE7`vRtq(;4*O_Y`{RlIDT8Lu?)>9l$G
zH5`j!(rdVj==}!*6r?@hqYhvGwr2A9seCIT1CRmeR2C-bC1Wc1p~toR#$1AGKHa7|
z=8a;Ipm|uQ-W(#K+aCK`)9`6#`haDlw`a+%y)x=?)Xp=P+9|?t8#mvhHG675P_9?@
zh}L8n9Hwxf!BKHvnm(Z9$M6ur*O*-x=d`XJeiDyx{pJ9IPy?n^hq;{iZEc<#gA|7q
z^N=Eq=ElZGbM6lMC?AzN_rOcXq_PwM6^)fNmN}bp8gdejddxK$X|c0?sRrgwQ6OJQ
zn=r~ex$4KvD=O#%#3s_p9Gsjfvi;s8L#Hl9Y?ys-2_AZc;%(}aA0Nx5w|jf?jxA4k
z^is|Dd)Py_j!+&scikzDA|bI|gy%*jXH|kYhRd!yg$R8uitZXOTd#|0yd)npUA7LM
zFkhMbf^auCso&D8>(loM53PxD56z0rXPpA?OC;Xkh_t(3>K2zAm@7Xxnb%>Q11u9j
zG3@SNzu#u3q+Oi%F>xim`2l_jvf~Si_uQ-fN`e;SB8SCw&3`O^E%JBcQS`xLC?B8Q
zLpH>Ox6=5TTI9z??Udd2_%3<zshcg{Czu2c)URwkisf^qpls{BTZH;88z{HYjtSiM
zBCULY%0dwWuj(GkHYwfpYZ~C(<%5Mx@hlkY3#Rh6*IJA?VHy_vS&D}`e40--;H-Te
zbjkOVPlbZy9P0J@<9^mX7ANy}Wa;)lqt&gQ6HYsVvz9j4RI>%Y9c#)?i2;i-gF-A4
zn-~CJXWik-uelM|i*0*8AY9Bq!AX{pG|mZNVG<#=>22>1T`0TUeS0(Ww0z=L<7h1A
zbK(}f3e{5?%58Ko5}I>E)+5x02w?D`_F#o!d$^0|I{+Uq$gh13iPQ^uoyQ6<4038s
zALDpEqN06i`YACZh->j?vBvYvP>&{M_He_ABa`3vo-=}BxPYXH=rHjvD`x+6^{2I~
z8G%^4`tgYl`mhG(*M`{zHR~l!nJM#-srWk1+i*c=(M!{>^_%V@l?_OIQrYF%)-0d0
zec@$vHh2Ttf^vCaB@?^a`Qjd@3-HIdN#fI&sdNL?u}XSnrX2v7kM_N=>B+avHoDKo
zkD3aDF|VN1(;NctVxB9`(MR2K*y{N;85J1Zow~#uq~4T#7=enM*XNv{gAja6vFq2y
z!;n}q-?z;N53eM|Ls%*OxyZz&X#~kc`gobQB|F$s%`**gjCBTK+tdmd768DXvxt_T
zZ&Wh>sEN=Vm33tR;8S)XJ_%F)N+hdvH$t~?L_$Ihlut#%X7Z{$HG1`q%MXkBk#0v?
zSrEJQmL~ehl}G#aa|CWGSE<1Uaj~!uRI`>GccI#{3!4}dV4C!B-!?u?_3>BD8OLc^
z@PgKbR{N3N#%|kGZAu8}VockuIYsTve((cl*E_8z>I;0>L<8pJ)4WFTH5V5%t8;Sj
zwe}XPmUGBOz$I>wKCQ{`7DB5VDru4ZGD^~M|MR%5IMGcsx+MZh8z{5k)9+Pk@3PVy
zxQM{J7FTR-8I3}2k;gA!<ijQSgGM7(Lfyc?zzn*6iUEi;yY2FRfO+}t9%<BbN}o(t
zzR)57gHLUGdhjG048A*{ss9L^y3}jB<80{QcTlj~t8Gi|>(hGw4jUe;?NTLDS!uHc
zFC(@0$TLx@BaPWTATP}zZDnzv2`iS*g2J<+&!xj?O%Do+rxZpgC}~<%e6V>E4ThUg
zWD6%iU$_G=*d^ZSYNxiX6+d_wXT8ZvS20nI(%tgw;OREu_=py=8#Y+F0Y{iqI=+z=
z0mO)>FG3+(bIEo4ozjj^#pQ%FgY5Ux=<;{}ye+I$@XMgqHq>-)f|GogN1h`D73*TS
zh6ar*R#p!HO1xrke?1k{=vT%~09;)71@FLJr8-L3jT}t@gICmqWR;*tkku(zeDDYl
zdE8Iw3pwz>_HY_9MED>*{$b_q%|h{A&lcI>Ffjd3&w;em=BB2HW95xE$l~ph=%96C
zjPtq3o->IY*PLO@WZxXb&@BEQmWslNS8YAco%k%tlnJCo>shQl!x9jf)<6PXiud|j
z4qy-65<cm>&|B0PM2WydK#E}~cshzNSv6$(eZ-H6W-7vZ&KR)?+U5=u(lZ}DQSz^O
zdNQ<FczW_0j8sV}k%RRC5#R!FY<*Pqz_AVFlx(2a1(jfIxyHu%L{?XsPm6M;mcf&`
z+xDl-=#w87A&akP>_6SVoC_6RWq~^mzoyrT+^@f-Jn8Zt02`PB?%RI}FiMi4ybH2P
zy-u^KF$@lC>Q(HxMiT528^-X+c66JlDb6|VAW0TqpXnEK{8GKcJ^r!I7JRDZe*T5G
zZIX^pHMDl(?tnZNG)uIyAW~gTbcc+r+J~w<(Zy>`yQmDeH-@YxK$9mgw&*8Bomte7
zO4OjLHhm=5NJaR-Q$fgkEa}*`SDKa5oSp?Ap{}xtim>|yU=?noMZ*ynzT9Ecz}HAi
z(^AshVXJ1;UFx<uhyeR<^isEyr@D^?iIfB&m<ALho`$Hy-Z%ct%qEB5KcXTW{wz4&
zTKX{bh`~#YEhPsY7+UY=01ZY;hRytDinL8I-cP>lH4vzaw-X6c;<KeJ!Vx>OFPkqA
z+pbIqk`&1U#=}k5iYeU)EC8Vq5!>~lEF_RuL4M}O^+`U>DJGZASs*eh6`!1R;ZMX7
z86|rM2h0t<@n%XJa5pyHGHn3n=mW4vkFVbr?)&Jsf$}STr-@ak2u4!!l#NZgXALS(
zkoA5A`=+_T89BSTH%$2RhTj)wQYP4)&SLDi&r@Maa~i6F>y_3rO2H=9RLVC3u?w4S
zRW!b^6w=Jwa#I-{@CKYAXjj9BMC1-yfjl9frD{f3K(_ho9^K!drbkuGvUlr9r|=K$
zH)^xna5t#t8!A_3jHgu+V?<s%k*|XF!N#txeT<BZ&8@8hQ&UrU_SlCaphj`eUfaPb
z4fv^<sVL%E(|Dzs?CKuTThH#m5ox#5IZ-om2mreO_2BD+n45GwAVG1xCcBBuClEeC
zv(Inu=3IJrk6;qQhF)oq!Z#?+ZPY_#nQ_|`Y#nQrKNho_uZ=Z;FVP0SIM&2J!*EB&
zO<)q6C`?(olOV&dH(*mFK5}DS&o?HN?}5;R)l-Zlg)vC0FLQY-D<ku*CU+w$7$~T$
zh2TmX$l_K?wt%WS42$v$EYe*F;~<W`-?D*cqxUXN3<W2FxOzr%_2?2MReI6xqpEzx
zK8$(`Bo~O&Rs(yUS|Ue$xHu~O1WsR^XwPVFZx;kJ@vFUNZ=Vq#Xh%x>(yxA?W))YK
zW)30&;bjPdAgBx9@5SQ8PeTwYX&t}4=jyD^<P8UJX^N}T<_rE8No*D^(cUXUlV>$#
zYxc#ym56MqaEz<XZoDRv#yuS7l2T7(y%{v(wN-k#`+()nVB~KBPNzRxJ4ufQ<$4cG
zrIpnWpMNVYxJX)nZbGbh1BP;Rd0WGM?n^V^td}DPA*<Q^Z2+s-m1Y4XwQl)dX7~}s
zjTfkJK!krRZOm+>K9>E{eqH4aI5ny|1%v+8k3THLE+2~x{BGLekva2WFu}2b`*}y(
zkYmGz=eE9?rWQQD@_CCTjnem~=shp#H2O-|aMpC)TPSJt%A+qCkJ>7u`owdgapaiY
zyTF=juAeYzmkk=WpNqZk);?jAsabJ<>eQ-DL%DoCGk^b5%KP^!CHJ$)DzeqrCtW&l
zZG8%822G6u6~i<#p&)uyA&^!#IWDzlB4SBLsX(%$jJj||#r4d+?YlHRaTgP<KR)I>
zHa6#&e@P*cz1yV0ZHUf@MmJaWNu$)Zo^u*wk5-F?T{ZelvH;M$6EjlYo~w$V&+RQv
ziK}(97y9b?+D3FWPn($cz~gz`lIs=|r?Y-wr_7>iE3qFHU7!C&bZw>6ojQ3W@P+qe
z`5_Z0??VR{j@Es@8ZEHaIDITOk8${F%6pNe^U_b~Kb@6Mo(;LES6{=ez;5icBD2r(
zetLcBNRH~RCDH|=oAk_7;roLPECV&2SeI73mMc3bZRAqp=~4Hy)Oox)aIL}-h=D(e
zyfYL#fD^XC$E-h8?J_hh;aBs<-^V_ps#|g!Z%Xzr*zsNb<58XP)CmqBQ^~p8H#(EF
z8{8J2UW=E_oD<B3qhhsQv-1t?XNTL+)!fIj!5LiiW##k@SL~gn-^&ON-w1QEV{e$K
zvC-&x5^JQCS5%~LaN)vwc4)=VF>OS~bo(ed4)zf8&1#mv4!t=Sm~Cdzt_iNd^6pPT
zTx8)Bz^@t9$VFD*e464QpJKF}cA;{nifZ2PEd=8G-*V(e6kg77N|y$=?vIO9ap9u*
zOFwKL!}DXxw7T@qUNGajLGiS|k4{JLW1$c>k=P&AxSK4x4B0@Ey42&F7r{gqUi#?%
z`}$F~(cFOJ4NH3BB^VV+{vq#CgZHT>Hd35J${}&TacO6CKP&#d<W~5S?Qd`%egOjj
zwO6cQ_{W)fgH+MSMEew(<{UY4<b;k+=*xlv*%ny{z4+bwd*}smj(GIYRoepApq`Oz
zW^yurljoDKzpTMmZCK2Brf5L|_!na=WdGtnSxhj3Fh;9@X%@U6R#@*2HZP2+iY4G2
zyvW$3As89uU?x~+C6|?I(Gc3_ZDf-`VFWs1MzZp&RD-<%#D{x6iO8IN1TXa|TviG0
z+(Y1humhg`mSGzsx=8pOSfp@8X>QcoyhLSKZ)#Y&FZ}*(wM61)yt7iQp(p?KjhiBm
zCyszghY=dRh0Z7#_C`jG<p?T*;ZI)<VT7`Xn`m{@YodEYzp7|VcQ$?d0xDn7n_(Pm
zP*4EoloFcUGS2v`y@lC1p_=rl@WxTmJ$Q46_ho**L|3s(Is>P~X-L3lE58&7hRo%+
z>dnvG_=T>#w6f_2yQM`U3Di0q0KVrwjJmP0u_<|Zn5&o?vMKi9269@Jr2h4^f&&v+
zcv1#q|3x5ZyRCXG%(z=6Yz;kEcfj~(6j7E&juq}D?N|j4GKwN;*#xV{nvz@E%Lt2A
z?*K%pL-_WS5^|b2b@sRnrRuV(2LDoD*hCJB5CXa?cejJNH9y<*)6}%yJ$Nb6AT+pR
za5o&ThyVSgJ$VI<3{p%gyu_SFPFuRGCUOjD!dZFw)9c<1i>~r28A|wJIg`K-VUxQ_
zwvPGCDdXeg0ixhjS66TD@1H(BL!&#yNe5c|mw!zKyZ+bBfc0}BWkU@APXjwYFYj)q
zzRcCmLqLLgra3y^S^;$Gr{4jXe~jR2Vs~ffUT$vgyVLaK%Yx+_r_7`#oV@N*M1nc1
z{#Nq;(<(t!403A2fkUhsRJ-}NFCSPNbjt_;_#cq8?G^yea6d4OHNdc8`(@qI{0eyf
z<JN&2HTEf)eBW594C_p3bl)(K{fj#MsjdHtk-$R^D=90XcbBjqTtahA4S!S$?11z1
zmO-!54@@GnXBF|K4=3#F#<OMN9!v*rI)ECF_zYOdqJ4U{eMKDTwk**j=XoO-I?9P0
zUO%*r9zo2)Ke9nu6y(?PhNh$FLF)uly1Tb`e?~?|wvqpDj~wjKWmW|2e);lcHL%?w
z;L9hG_vhjMs`LuP$4ELpQn{a>st#ke1!aI<{MX<v`h_QPp>OY|LN0ImdyucaVKr^%
zP6hQSCmLLK5cS7m;Y&W1O6cp0w}2O){wXyVJ+(Bt0?0YlQssL7R{hy-Cc6K+zWcUD
z0l(Y)M8_GRQ5pYlXYC(S_;g}!t_}wW$J1xePImY7#E)|W#Hys2$k7z>Z!gpf91*XT
zYzE92f1WW&?ha+-l&FUKezV!Z%9EC^1mwv1Lrk1(SN`M3y}(&tzMSmr>@4tyM=99X
zHjKKE+;CaYOamD2U#=*uTGJcVA@q3crrB>)tP5lld~h`)KZFXYO6e>Rw&48=dLi)|
zX(T3Q*DDn*ZmIFFR^i_--`^JmBF1PWPa*ODS#ARo%8#V-Jmgmue4iuBp_bUqO=L1|
zX_L&kt$TzPM*wkrHa?7OkqaD$neE!BEKWZ7qF5#!m*$wq53qTuz0<~lk~xTGX+8+I
zYVB~UKYC7l!JqnhXnpCk$$2ukt*HENg$|!5Po28|+^|UdeF!h_f0f^`1n2jW1@}{R
z1SttXmQlDszy=>75}^Z5BNNRTRy=|LGG-T{zMzi|Ts#fN%|T?Z?bS&N_KkW%PlT<u
zA^=9EFcm?Ok%wRN0?4^;cP9*T&?&Bi*67<wZ*U8_&^|tNg*S8vmvBVr`ylAKx>(Oy
z2mu3CQE(eeNl4>$B)zDn+Z)oXYN5kWWZs#Cn_usngxO>k3EFsv@W_1uEUUIVa$X1#
zZ4dtKIs2Oez=hE|ySl0=DcwGT!JLG&F!3^x?f5lFw0H<~8G>2y3E%51&46;wZ8c3J
zUKqTo58d9-5T*u*B_rV7Km%ifwDW#WvPK%eKleQ8eGDKAs(-IXIPn^BQd-a+=TEG|
z9eVIp1JY^s>9w!e&E?sr;<9aCaMs_zrTYg6foQ<v^qtte>})L=8JR78>}2@H2iv^K
zkWn{un&`L+?+#}AukwMFYJT7q5X05pp=C3J46mOey)E~2O*8;5!LVcGlodFGf7wm{
zMuPJ*SRG48z&@$pU>`qxEb2+p6ZA;xfx^uY+na_$#3us5pQ!swgVw!0-`rROYEq<N
z&boL7te*p5{m{VqIL^QpbR4nm0uWf6T33S{eV_q>f1E*w$O$g>wE#DMRRje7!$_0R
zjF-BW1lex8|FZ7F__0iE#uJZDi9Q5AwhvGOZ@xVH>63_d&MA1A=v_7T!63tK|74oC
z`#>h8iH@ou?{?4!n5(|NzPY=b=%{n$Co-+K4e*YS(tjE;Pt79X^c!sn7k$N9^paVX
zH9!YOu3^*=gqnh2#1U$fm3mN(@Of(|m{KqE@;*G+rSvpA`*6dlJ!`!P;<gLc{of;Q
z@UY)vjs<@guHDT#IOP8u>lkZknf8BS9shlo_aT^Pmyh2V94VTo+R8M#r^X!^08|0v
zWnp*{-QsDwPKd7uPmGN@6g+teG>XVEl|k}sGB&Q*P3(p#x&JRDH{5(-kii>c8o{<;
z-TYszH?T|4<O5dfBTw|;)HB8XiK%I#BN0Y~>dvqh)CLt$dDX^cG1-|d<}irWFTHW?
z(*7$}!?1DTJ#H8yPkz<O?QN_l?!fBq_PeWsy6$fQh342OoQ57p_>=VM-G2KPf)GgI
zZRx{*V3Vj(Vg<=E!5kR1_DrMLzd+3zRyZ|TdB9uJ1;jh2qs5e|nR&R%cke0L3JBIC
zyfa0Ab&99dJ!~8d<{Ht_4wa6Pe+hI^$?$x#8Lg>@n9*^6U|~0v)m-85cJFyu4P>6B
z3QnU9^&)+3oca@w0shNzUm%tiPr}An!SkDb;L%h0070p~1alui-KYS;#C2-Xzi`rE
z*PO5pLBNoGXmyo3t@p~1RcG)ynH|r!)9czkAeiJCzt9Lcr40oV$H4OS!zR(7MvgEX
z4;|^}&UECnKtQ52;o=i!R0MslcL`RBgim<8%8M9DO-AZx#-?+Te)14$(e#AD5Mv!x
z&|Ncw=Luepv)Y3`kVF&6Eqj)liEA(L3A(1X`~;^@Twqkg=rY=IJSTcvKupe68hTZv
zWMtGc8`%PPoB?h5?*H^uPntiZe>^f<K+J1jFg9p2UI^-XW}33sANw7aY&cd6^`!W?
z|8kNJbH^vKqgQ}NKo{IcJ)t$*zG6K;MovM`mHCtS%-DKC%>$PW>FTu)=m8F%;w<Hd
zb%t4v`NdHnOT!lp53{{`H7qzd*v$3TS#sRx!4?4G+QMdKx)>?j#Qam@^a*tu2iPJ%
z(}ckW8wl=rhgt;Mh@6di@{}JPfjQlm{@T<!V)j-5*L>T?L$}}~q>q|fJ`<D8JAM8h
zza|dOG!jKX9cEwnMcD752I^qxB4HK^4m|=Gzy=goqL9~RY={v4&#!|JMMT%8dud>`
z=G{`9w2lzY)e*s_&{1xKojdEJvJKrvU7AiLCX#YVYMct)$(^v4pRx=0>D@?oJOyJE
zS^W4d#R3`SF<rtg%~Tjz^u?`Yw?61E9}+`KBF~e?c@G@&@WzVZGye?ScykYGnZer9
z(cZ7uHg~o5P1IFNq(uIq5sw-d59r_*T?YPmGMa<Mt}_}!JpFXkAw`uC9;3>g#5Az0
zZ(deU<fzPV+^=Vmrm`Ug&|Wya5wt4T(CM_o+)^#}j-$u?SAa<b#MM<NWTi2bf_kiY
zWJM^C5YV-KdJN?Dwl^n)P`R%yfK>I3cVs01+jvW~@o$LZcCK!8JO^ti@jtEkK9;e`
z$uK`Z>MPdPj}{kQE!~g*azFFP-+x4;`L&tk_w9nX7ZTY3)&!j<AsHNMWMQ=tV*#@M
z%m6=yh$QMqYP-c>mBd+Z^Ha8bmT6p8OlSFdE8NLo*bixgFfv*b3j4tcA#wmt)Qyr4
z{yg~_iMomIbM=z7Krr)d#Ti;eRP%UDKFnK(Mp7b$X0IT7LWCE(ZsgVwL$fbejZDd|
zEe{PtmVQmo$6z8N8ahIAcllr$L1@?n)Tf{$j|2b5#ahaW0X;N~n<7Mwgn@YB0l2)i
zUp|D|+!+2GDZCBe00tP4H-?{c18JB8wcch^6;Kjn4vdKUtMeZqlrCbj{F*&yu>#5n
znjzo&x#!Apqs+q3_ex2Vcz|cDUF4*Z7?LLJnO~<NDzIMdqoCAF^Sk{X-N9B(tffao
z)L|vSWzQYLKR4C+aSCcQkeJG?2|_TJv^HcaOeHAsBFzw>H(rR@RErQ|s+xBgU9a!S
zs?{8^+qju23=C7*>yjVS|8ijSK(}VZW6m8zZH-et*tU$Xi5PTM^APYX!)T`HkqF$E
z%s{FsklZ=2K(&I&l<1PEf!(Yl_Q078?_7nWo$~?DavNsEfREb(h&xCJ15V3K^ym*h
z{(Y0$%BY)vB6OEvje}?-S|Z>^8Ej|wWyQYQ12d0Qxt&j*xi&0HM7JS|1HqrbV;+9Q
zZ2K$-USb<pL`O;Ur)vD0)&%@R4bcKJFP!Od?ns#2Un?~Pot!TW9f1$Y%3+%AJhVxk
zpYFS$63o@*88ldfLDpp_hW!}42hrUZ!sf`jTN&;WQh0-kKceD4{nUvkwJrmylx@Y{
zv>SY|j_~eKS=X0bsF;m`>8l2D>;c;HAXhPKvY7S33c#wJxOw;a>BN|7)8r-^zC>8;
zQYQ2O67|6LBZnrnvKZz$(ZNy(!O-J(_{B)vLcB0TuyqKCNLE*G@I`e$@<ar|A_I4+
zK5LwXH@`bZA2Pia$Rk%Ala{$`2ssojd-B03y5t}12P44=XOS1e9dNFUj)!Rj=px<U
ze&7+X(o_Sbuo<|G8a{yL9cIATQsJ!Ci;93PM;FY?yqK6cba(hp!pKi@9Oy>TQB8Hk
z@tPU{JK*WhxS_Rcq?5ET032hl8uwGNj2Xa}ZTQv5G72$N<A>C;(1{ms2g7ZucsT4t
zD6D^sN@zKsrX@VxMPkqy^(bGlgUA2&E}J=!qO5uGppW2tfMk=r3~>-!QB8~qyp|LW
zvICKW8|X^N%X#d#DQ&>+nHq}Dkp_><ej__9s?m;3-4L$b20_rXZ&mi7!gebnY%j8m
zqX!<ASKus+Gh0dZ4J$l3yn`&u%0Pg$PgJ+qBVU1qbO3*$%~vat5eD!OCZK#(7rkF6
z5dB9-{ZN!VWtD#(9-WqhGTMeP-a<xI2m)S%Yf!a0U<5sOZv_D^hCn>$VIkBj!15ue
zwH2XttAM6@y=F5j74ZywbM1W`4|=eEDE&S;6!gelg@HOJ!&f9fUvEjF)h(fYT)W%n
zPdfkt8eEbFs8z;d^tZqkE9qSlJtrUo$(lV9FxB>0h-*W_1dVQbz=vn(oVURJjlq9P
z%0}O%w-srG00IyZSVBO@Mfe;oqOc8TJ?wfi<78q?>%-(e8a@bymC}%RMh^yWtoqms
z9Xx1tBEBwO_8h=+LQqC;_JD|NBNKJB=<Ww+kv$IHPeK=TObz~BWLGfywWoRrdY;ig
zpZ)?SYRbL{6QqP~Wjl1@2rGI8xBq1obJk*{_K_Dx6n3vzN_e3|kg{!s)5$#$YYpGh
z5f5_)lad9Scp6DOM}xFB(Ruq&Q2qDazG5x?Kb%iK&4qn8(BMCu+zSPE;ecK56f+Be
z43>8Nneoj3l}v@hv<2?=zmp9ASTNy3u9N3?{)}3*2{va6BPxwGR0p=n7PiS|qP`7X
z=w68QyIAhGf=D`YCjNm2Go9du!`@!W*P=|-4A(TK{b7m;`Y|KgU(VGSL8=Osd|)@K
zz)tMA#w~}z5t275<-$tYz`tY$?L{^G0}c4aIas8OeDV>SO04RDS~^7S#<buL7b9DK
z;d_x3s!1eaysrH6k?7$bJBw4kzx@L=fj5KR@CcKeo$dWHDIBDHCuC6(7R1}<x8hon
zR0X>e<1Fa+Pr%C_Xl^At`0+mXafIDYI)v7;RyEfEY@Pt~X3#6<h+a|kCp$2I_}E}R
ztU&QkO{{%+blVSk&z0&Irf+f<$3r2<S3BRfCnB#xHH62pRZG<zG;75=X=WHoS{i!R
zn?k&FT_G71RQZVtnBz)S6wmGl1~8mfrUTsQc*YR1qvX#N4mi52R?HjnZG5T<@05XJ
zz%HPCJ8M}LxRW0y`k_1Rj0PknBVJ+qnsJ%?hYR*);ehfOdVL>ugxZ0TDR1bFP3M>A
zwyEo(4DuSrecZ<qz2L-7SUw*|k2E&_x2DCc@_i)Bq!Za#ay4uCITKGz`qFJ`#z)U|
z(kx&)(D>~B(Z$Zo`l}v`ll!39umMH^IVnO=xT@N}hL}4(6dS)S=JJz329NVzb`QW>
zBYIW#r8DYjC(^?}M17>t`XiV;Nz&o_w_!2U*Hk;@+?}mgPgeB;0^{Ow(+c88VW5W(
z;NI*)#j7sp)>QW!kgQK$6{)}Q@4bj8(CbIGxW|!@$<lJO>@Nv>0VM3y*C#F@VZpl1
zqhj($r>T7a&Mom%iWB*9e1nAmc?@klJ_n~0{@3~%XE0gzUifQIQcH5@nUI@~itf|!
z;B1N~(okMv45Get+aC11H}3%}7Q)iwV`C@IjgB$?9PfX40erb{=+q4Q*n9N*4PKtG
zsEgVK5!+B8=G}I%nw9Djc-=v)kvjZ7*iu;7uPctO=qRqk+V*jC8bFQ+j3Nt$U0NO|
z7YF~95>ngc<8vE6%)~^ej@~R3g;9?xLcM{Xe^TR?xC`=fbG#`3gYB?2FQKRdo!#DZ
zFzwzjf}wN`Dl#Gc4C0OaQNaLhr(Wh7-$dzxm=ck1G=NtYmLu@@LE6SfULYcpC1&T>
zJsu3nWpYQ4z{07)O^j&%1m*yr4>ktI-g2~fo)HzDVSgkKIzCI{QsR?<A$3|w9|qeE
zwu__S&?xF^Ie<iHYkT;_=;z*PGQ&I<4%N)F;=({Yq&i<5-;Tb6jB#lp+l9_50Eu)+
z7-+!py9Vd!{eimg=;#f9X6&*5+^kl_GL;Jy^E}R%0*~IueZCmAm>N_5GR>xPj&349
z6?rrP!pR15ryA~Tpu`{;;$f*s^h!IZ{8q1@PM28y^u*vS{%?j1FaIN!wQp)qpCfEV
z+X8EnCp<+L)_5=DW^^3da1CO<QKWjHtD?LK2=D~NE$SWsrOO9snsP5<2xbk-^Zr?F
z5hV5@>hZjrv@|khpEt-sVP;ZC#~c95i#cno!2=dGMqukZFfrsnA}+nZvmG@t{*nLp
z^G{ECE*wuZ2uapRU_F7LeQMN??9lrY_E$3FffD&`<P|$XfRAXaiW*G`YN~hKXy&9M
zBKkSgFSY>%v8X^uZQOAb#jx-qFwkNSJ4@M9iNwjqMrhXahcbszX7Hb?__-N;v@n;G
zJN9%3Y@pl(XN*4Q1si(Am=MW#@xl1+pPMWt0tV*)+XJsA%0S&TLTtc2A0>(o9hvkr
zXvbpl?JvP9sDYb%fGzMdx+l+Ae@Bg;rkaqA2IOp>xCP6xMdhvp*ddBA+gtl5$y475
zzsT$`D~74SW^4X|0lZ+4WeouFhOskkfV{}x`J>2r<YEUgdeg^>2bnDPD&-#rIvOO@
zMAjTh13+pOz=!nre@`|c_rhJ{7X?58#^wOFciZ@<XR+_0+P+@1MT@E#j(SY_;S^PL
zPM=^^-$-krt|*@;Ed4lVi@;(47IQaQB^Gokct^v&Wa}mi;G>^lcarhWF|@n{%MB;8
z{ZpcBr#YqG=FPr>V24P-tQ|DM$mT%8Z^44`<bFziC(TwcP7YAMnT|oe92R>YzJ!6T
zPJi|RI!42&A0bJwPn(i5&>^<{fnR6Gq>UKT+<|M#sIIICQ&fjLx{;c}A>+zRG#y|A
zu5%Ocz$iEEBp|(~PAACA&A0>l5DNY5kJzjyhsX=R@nft7-RHIZhPMWC(B~FsUc6!Y
z25FrZQ|ptK3L-#Ia=BgJGimC4nY&B-8l;xif`OY0b^_8wy&PCmMy5LCr=UPUe>L*J
z1;K>1`84FOpv3i{PNwiV>0rf=ocWmn1)Bu<@6bctc-DR8%?-O%2=AckU$zQ*r|$=R
zv7vh){oFw}AQwce5G13#;pkjlDG5kSPCTw)ta749dWbGgUy3hT_nh%aC<VkpzocT;
z_h2;u4zXY{aJhYX=m?pj7E=Sz%hL?72?UKeSFxg31o@A1ixt3c&|F|Y@SKF`|2U|C
zME(vf7)29)d=biBw3$8ZF!&0viZ7*e4Waa+g?#qw&_HY-)Ct^dUqezKvb;$X$lt{i
zz6T;H*EZ|Wm_YzCc2r=EavVM#lLl&;xkdZlLM=Zg?wGvwwbhx(H_Qr0Ve(Kh|4`?Q
z9L9bfswg50&eUG{-a7@09g4Iqw<$0->3LWZ@6xW{?;(D@V)4xC&R*x#6#!jsmy}i!
zQ@tvu0MQABvWQLB-9lWBIvYA7gjnkHW$?K;mr|z!HVvst4}0pRi3wE`)uJBA%BAmr
zVq}uABEx8ReZaF|44&K4QC7Zoy9Y=z_*eSt`sP=^Y&&Q8u;c=h^cNO4i0<VsPAr}D
ztQfIFdZiv8h%Jis;;k+lS?DlH=#a=r8xX1r=8ko_hZKq+?U)c)r{WB8DXZ5u(#Z88
zbPU;JQ{bi0I&;6M2igyWN}gTubcE#kBIV(v`=XD4k85<QV<vQkbhI$TAWsdNnT8;<
zSs2(%y1u)Rw2<Xp?@>>@M!K%LPWsTtBs0e62HXa@t`ilOD>EFNl$nS;65H|;(3hI+
zjaZi|?(6$Ycz(&^`>z1{I+e(=<1}U420ynGi7N-0DcijDS@3JM$9P%c=WXnQ$@Zhi
z5)nz9c}I%u*q{#WKYsiG&pbTxm;Z}O|G)iU`K?V5v4X1OfBc}-Gv0_^*MI$i{kzfr
zk1vrQM5F7*__sHq(&pEXT_6{fj%ktOJpcM)kWQFiAN}*icR+SC(kdVRccX=W`|Z{L
zV-d*X`2Umr+fx6vvp;kYAj9i>tw{|+Q1Zb{%uN*H=HU<=KL=s{gGd>N_Ya^g4M!T6
ze*>nd@2k*;P<H04XvyU6DqinW1}>>ag|lP0cHkGWF$XCO>GKlOlP}(U3E&TY@DZ9T
zBOEBl^u7S$91>?&L6R5%X?>2=VmZE|*jx?i9Zxed)RBU%_3sJm`@C0L5=s}#W?$1M
zP33q4wm!8m4ASds!&kVeZdFJ*lunvPICxmwO6@x1t#8yGD5$nA=o^W)F7sUMs+wp4
znY$nQ@;=~lAA7PkzV>5O!DH$sbmJ}cY;W{xi-!y<$L5nlAE1r&U`GkSuEWumk3SGD
zGj|sQV&_=1FYQJNFVuxW8BRC|P}w~bW0e=>vJQEp%J}EG#`b}|+D<yPVFP*^MI4a4
zc_`GFY)LY~Y%Lv6js3#U`#s>?2vUGH3{*+So4#>4SkLNPwXiG!C&}YDs37xYLQ?Jb
zN%lJ2M#xw6VL^XfYr@V4N%mKFC#)g>yCc*9*FyPWq(`;K8syO`g3v7}y{xeQ{bSi7
zi5YA)Hw3Qo;~ZYoBKg=!7;pxe5vnZG5YRpp#Q~Sq+T0~(s8IO=o43Gh^9pHP{odPn
z3@1Zewq7Oy(O{P5!&|GZ%dol!ZUn~v{nBYBfc>x_Jv|SdvA_-Ud6k}3PTUlA=IGbs
zxa6)9NRghYKhdV2&S5~@-wKp@mh565j~polB{dA1<#hkyzU-g6gmhWPM#oawdfD=g
zc3FGq8Toxs^fOm8ezhGk89^P+;+U%;Z@+A3dXepo*~-mj(NQw#b3i)sEi2hY1K!u<
zpl$h)I^>FGZ+Ew_$5dXVNqO()f#vs-2+-h#iN8zesAH7J3|_Uenyne1>^*q%6TNng
ziRY1v@<4q?a*AZbTc;q1D1;e)tfcV#<@o%R7B0dU<%6qG#|`8U1@4q+(Q$>D8PfOF
zf%R$b!{R5!1*_|B!cAEWIRF|{bAp$-Vx=w1y-NK?)b;(>p@xCv6y12l31Iv0%4=XC
zSkzTblZGr*Qm@zDJS}(mn1T1YGP_aOVcxc-2{{Nf?qoAwsZviQ@~*a~$q!FSeo8Mm
zv+4|6w`!jE(Ly5Kq`Y~vyYW=W<1-a&z^<`$rVYwm4vXI;*--aPv#*S9v`u@g)MI2V
z3x#22jusIwOzNoKdU4KkLEh9{$oOrrlv?fItdj=T&AD5IK*hve@2$I9FZ-V9@A4CG
zPhrF#jQV__1}G_gDfK+0cD${J4n@<86%lNBgn0?b93T~bKtXl35)wXsh*{Wn)In_-
zY1CwWrQ^0TsmdZqvm%f>%9S3{et}Hn{eI=a$d<-z%@<Y^5p#M<R&ohz*@_M?)}U@_
zl)L3(Uh8%V%{D$@&fkT%b0;On>QD2{h*e&DNjlpDn1y9s_sgB)_uFJg!EBf#=Ac1}
zUrLGy26kR~RYE`N25)5nLK7JsT}xl${@Mi6q@{kY0@A&=w0oF};JV$?$;qkTZ8G@c
z>~$dfu&@B`k4>H&d@(M%w%Mgw7W;5@Onm888#Zvao;<Foi`Wo(1Hdw4*8M{JWAfDU
zl)ZPXo&+%zX*6s<+EKok7uXQy3Bjte7VAO8!n3Fgda7ppK0iBsCjsH0EVVI$G}8X>
zb?z_M@-XM~aszL((-&J`aE_b0fe`iYbkfNR%wGllPJea?yy(9v$p@y4dUK%yKeI#>
zaY(|X^k^JgZ}`&rj&?&N$T8U^J`qLx=?k35@&aRge_na#B9z+Sut_0)J$10`<yd`G
zo$6<j%~<i%V&e%UdL{T{uKjf0_3VSL@q=rSlcZt?m8XNEGugDe&4EDDZ8dpPGxplh
z`{DRweQQGsYf5#Kou!<%!`s_e+<DmWHPA-SiP=(4QrvEsG~-<nrJs7qg5X}&T3tsB
zH*)RTlm1-(u;MeWo3oW}t1eZ(?8bWq3q}7BB7emwD5xB%UQ-b`+7Y9PNovsNdAHFf
z0!Pj?(A!;-zjqKyG#821Sc2qb;ruC(y#0*$x|NJ1$p$fTJ!K#+kh~^r2?<t2X=og8
z63|~rh>6vAe?_lRfAwQ&3z3n4&|2N?*CceF)kq|a9uhZ}WA_lxa+zqq+s}~RsFG-K
z!WVq>zpM)uKtwQsTg*}8hp5K410Um}UA8kxMyIv+xEf?yee#W^OL~&1Z?EIpX}WnJ
z4Y=ee2C5(Ozq`q0Tvt^`D(Q(0!QU?`3iA#gysbbQKhvCLqF^7{V20tXk3>9(eug%v
zCfju1^}Ey#{9{lpIbccGBE*Wz3s6NYk2FUmjkdYJxHhEH<e;~jwQC-{5|U1ANVnzp
z@0;sxJ5uV&eGFjb|8iI8@vMfaeEz|-1h3+y<&ptXCc>-*(aYfkd1T-^Wfu)M+Vmm(
zD*+AEXFa%=j)@zR*uMU{H{?m4@iAxqY3uP_;4AdAo?eMv`2mzXvmIlIuFDmUG*D`z
z%JV8~XlMxRSM2}s@y&&jmO4V6Oo(Ta%@gaLq+G}0nm}4((kM_cJUy_Tm~N!hR@XgW
zJHZ$|`MtjCD~Y7V6J<^-RxJ8_QS!Pth57sacLz;!8)aVRYjk3C^KQ;kn}7n(ORGM*
z(5kbRhj0;ldpj`ddFT*Mh6=iwpM$!_jAzzHb<ZzW@cbI>&uQ4Hy|W?zFOCbRjU#Mt
zi&gA;G4-wX;75XyuqGzgh~&<(e{FEH!fZkU)cCnco)P06UkaIlD`#BI&OnLTfjLEE
z2JS>><GGDJgF)$66A6mu4y!fi%0HV_J=fzns@pUKYOY%$LI2H>87>PW-Srz!8r#m&
zaN@gTwa25loEsG%hssq!ONSwo+PYVHCE|*zoOog-6#e#z%+0=GUR{9tAiIAVy8JhL
zua!$Epo7J3s`I9QTNf2pD_U~;boI!So2xwoiwZGWRfum`18vWa+Y6_}dYv{_u+Chl
z3)GrP{;~4awQoLoq;g8K;(Jik&WwIh(%z!SGnaW+qplC_>AOOy`Mo)^{EKJY02NNm
zvVp6jxV<H(7+Qo}xpeH*Om^ll;S}|lL8iNopi$O57+-TApG_r5JEiu5MbZC#erQoJ
zZo%$W3h#`<=K%iFko+F=O~6vgJM+VRXfK<Fd&l0TEMU1ifBMDbxz+Z9@;%V$B&rlx
z^d3)qes`Z_&UX6z_`(JHiK<1!eMTzBh%t$+PIlJT-|S;BfR;C<&?BMla5s3Wa!8N5
zy=xvYd3L4s7Zciv;9o9vo4(d90(0yg)u^$wcuCuZwO3hYJV$|#G{>rGw|!)Ewy<C?
zBhSb`<YsF&KsDUV@<u2SS-riHWe%SlT+^@rxeHWPDWBeLZAQM@t|OXlE^?SCxi1ft
zzJOIBY|xiEeudtu52>$#cs^Z8`rtW64Wv4==#wAkz5cv4d+*g)b`39R5<lP0lqb~u
zAg8SC|Fn0l;ZUw^+oq*b7NwSjWz*^<*@SE++chdtOGqu5WK%K<QJ83Cm(s>s>CLNz
zC@LC+kPKtf2H9tfu^n11dm@an8}nYbY`@mu@5gt1>)#y59QX6gGtYfr_jR4;d7XEg
zzEXcO-=hn9brzcmN;rz@Z=n4=s_3d`vu8ZiZYKVyk$WW*c!jKb#~BB)BxjsJ*x(<@
zGr=+_-6;AvM|dox8-VE?yB2JEZw~L|U)M8mm~!UG@YDUdGR%EPv^_^N<Jc*f@Jv_?
zwsOcdkeHB=9xZSrJAbmV;Pk^YcLmEx&od3Jj~$y4jVTV+&Z8-cN)vgNbgUaB>A2)=
zu|DybJuksI&jS*Yi<sBt0voEd6Frd70^avFVsJ7yFJI$fA2kzZ$ngkQIGuh3Pj#Lq
zSdyy$6mWC)fwsz<jP(cD1LKHX>DFiE-kBwuo*f<j_=e(){8%qRew&dfH%Y5Dr>Trb
zxjf&F)ATWCm#=LOCd}KJ%|B`{5&^bH)@?<3N6;1i(-TSF&jP@`G{1JdeRi;&Ja8j7
zH$ns(C2nv-pAo=eQ)GC_DOPWInl;%!7FLHyfYBl=5q!HeT)FSffl!Mj=Q5Ocd0<}%
z@!J7(J2=Tj)mxtNEDjb*iiNquRMeVl#QwHud<=}PMklWc>ubQ2JKBK?AqbL)Vd+k-
z5PRtllHvSlLmRIThB!<#zjd6)ieG`<2v}UW%7Lso4wEZLj9+)5i9gK;n)u&;ye4IP
z-cNoJr<@zT0}2bWu6EYfoGi#q$Ooq7UZ-yx?ENrVF~6=H`vrmM?v?XM!qSs|{Uj{|
z>HKz4WAaxa&o@u}&*&AK<-$Js&%d@JQC(PE{|9FafYJzlm~sL)1)8o>Hwq>jQ47Sx
zmdD><U2nIlfMQko$OAQ}?~9#v-^M@(=|+g!QAGv`?^mpISV;A|u}(?ONzRzdzbO<$
zJ9v^4KyyS%zo{i`&YU$TCV+#<jb+2XD1qOLj2>}pIBpbvM4xpLC`RRlQus7{fD2>F
z!sg(<)ihvkbC{clB-!CgwVE<O2IRnxlLW9!x^s6b?FMM&RJFi!V{`|LRmia+jt|_=
ziIV_wB913F^GGoNs$|qO5s0NAfk1^Z)od{of+(aBXN`CWa05x`>kgZ~qF|YF+suC?
z9CuF(0+l23dY$$`60pW0?|1)NV;TBBU5a~E9r_bW4m~kj`V%tN&4BUs9Wa2oNXJo#
z*&w&o9Cs->AB0ZO@Y>gR$Vfs4wZ8yzS^-tUkEKs`Z39J))x&y4ntje1%V06xebAkU
zxI)6I-Z|C}YsiDhdIgdZ$a#A7gCx9L4?*W(IGxKz+zUAB7Z*qI=tw-&eh}{x+~<(T
zMvWKXyo;SfeDVO^QE37os}t?yIUFd5M#ou$E%d_e72wRPAgDj_uoddjkt%n8s*XRv
zEKb=cAkyCD71XV@E#XXo7SG4K;&V@rkX|=(5Jvg@sJ?A`Q&aEN=yBgVw2h*wT9-33
z>M=c-<LeUZKl4e7)m&d&+Ywh-w|Xdl^wwA(ts>bUDn%k){=yCcdkt*EFcZNVch~QN
zH0u<2;L2MXO8Si*nuOo+DLbG>kF`nZmrE1pGS1SO)^woqUls!4Khm5)K&NehNsUc@
z236Dp%Iv#vj<SMb+B?8zuAAA{J>KuZVY9B0xr&|TWch)Cv4AAgwsAg+b;R)?K!_1>
z^_L%q#f)oRI>xAGHB*v!o|;NzGJ9@4c&~GQUpqK6QYz`|YMBeHM2*h)WKDp+3uo=N
zm=1bw$K`b|Zf%~5{ZJSXP?)bj@kxpksj6nRu7Rp~|Dk&FOql;zLjH^H7*A710TAVm
zwB)h(z!?n0>EtLCdq&x-XTp^y9E0bKr-BVBrQks5jDLM)?ROPsvASBL;1cZHc~=o<
zk!~_PGxaJrS8FucjxB$rsg<nT-L7fN+cqkuQtZrN@m)Yg+<p=J5Ypt)h<WTn!G8Dp
zT=0h)JPa-KpdRuH=|+r6+LNgd1d2gDK>tH}n2&MmC3UR;3Y09|1~6beCcF+8hpcUS
z1}yRrZqLN0M-5xR5{N4kchCPQ5%o(zL4d$$sE8J%p=^g^9lTN#fqGXjfqnCTpbu=S
zlhE*_TGkI4X6?M%d}_DRBwS=L?o)`k+ilM55T}|PqL(xnFxh#J;vj2s7tC<#BuNu4
z&`D(i`ePFzMPL;ad}6@}r5rSIY|UO*hKQ|C@w9_ND>&|-ylgSl3u4gKY59za0~q0-
zDxmcDSN)jEI?joLGa`?lf43QRm)NuuTX+!SiSfl)8PQ4XwjqC~P}K77EcY<8pJ~?k
z2abfa=wM7PY)t&wWO^yE2&MCMeM({s7z&@mdo?7qd4(iZUsvbgShKnyv>i7usoupd
zGt&Qah`KthaF%SxxHP8QAF2NVmqgST&hL`Q)Tz9~J^Q;U2-MYd2l204Dgn~$wcF$5
z+2y9~OM8jySGFOmJKW_hJKYq=nduhIJZ0b(aWghrs5$S}6j*vf#dsm|$?Bb53A47U
zXG13^3Ij}>Zuj9E?o~y0&|3<TEY}~(BjPkO4T@jwgy}3hZBzVO?x9HB?xJ79#%a|*
zGz80HndQzB8YjTA<MW2}_Av5Pc}o3fQK{!3Y0MVB-E4DHVHtcY6c>o2*9iW_|J_6U
z)5ig=m0V2mk!NOp<t-ctB2Ddyf|X*={3lz}M*)@vFs-yR^z1s39b*+|e0+KyUBk08
z9;Ji`gE5SG!<oxLp8>+o55y#6H^(&wj-ppooQVvIho{bueK2zJ+JCjRimyrRN*^9S
zM(oV4Q7)J`>Gq`(T%#S@p`S>J@dZRvlhf+hs<wiWh_s3BO1%<r@VIV=-D7dNe&Rtc
zP&ohE1Iei9r@abGZ;mX4nlxE&{6Cu<wywHl!Vu(j?I5WsYK&2DcR+!cX-G*EvAdL!
zWA~<{Rq55O#N0=RKcC>_3jCbMzh%q^`qk{rq*@_D`ulqFwxwL&5ZB{s(W3w0Y*b#a
z$uZv<pgqg<2J~`HBv!woRkd<eCLebF<EZB+1x`#CpS|q^&>#_~Z&qPxMb<5kKJG+1
zXxk!xDY>x-(P!CGg3o~@Z&N4|BX!yFE;^5K1^u<ICn|v~ndk<lX$5}Mc)|g?IE`@V
zV4ys{;c<>7X{{<dkJ?X;Ay+51A>dcF_*sr@@^e+O)&dBT<=zFO5~|$sksWG2a{XVn
zT>I(IR8bPWK6_Q@S#oSY97H|0sfI>2rJi(qE+bnT0mm;+PA9XGwGnFG9LmDq;Bgmd
z?9lKMuu>bXZy^BUT9E^%$aI`=*Q4@&zpJPv$G(_H^&@G$=+3?1%y+!DRPnI1?77W0
z*0k15CY3w+Y}*h&(%jsfUkdwM+R=S+ky7{evI#R@R8=ilG?daAP^V;XP$^8`=*Q`H
zoANstmVsv9H~TusV|oPs4aHT>MGEzyDQ`Knb`rNdn5`{@PM@Jj#&B=pY=2>XDLA6%
z{p4Ax%J>8gu|AmeIu)~E0|7AtRoukJ<Gl={>fiYiU1@0R1{Q!{RVs+(b}BI_X|k*!
zGJ78-`d0}+=;ATCYMDDan_beFq%&N}gUvu3yuB1-r|H%zhYC8nK1C~>5x74G3Fp+`
z){CEL1KuGX6F>$khT;G&GRS@`KfO`s#}Q=#IVK;DNR?}I%kBI@`Fca`QK|l?n6SG3
z@qs3)FQc)sr|r^#ic74#_6Xo}4YHJ+NsrPPCgqAw(#e*oBRnpY^Yw5NUMG#+OfTrV
zC@7|GJXT^!e4|V$h^<Homq!Nrk+>HKPfN>ZyFkZPyg_P12Ifg!bXYHw*^C-~bhz1)
zBCTsIz9tVuAO}-R4<*(tWF=Vs?`$u2ZWdKNc?F~P!iwdk`8Xu%f-j&C?^<==>nlw$
zMG7abBKCati!vaJ9#tCji5gF-NCM6g@{5yhS$}13hx6h_5k^yK)cj6SN4jh4VD7cd
zSeK&dH&U}IE_PaGl~pNkTXjhl@?8#-C*81}5!CTsLg-oi8D3QkcQ`trRbd297bt@s
z=bM{Z`1~kGkyDUmjF+Dln$8IGOF4Z}?;Pw77556pp4@TV1#5;TvZmTp#yGV=*=a8-
zm%WB=@*bC1qVK?-$F=39QUb<wCr6`Z$^ox~AA9=SRZX=wJCjoyvb~e&rTDJXU65v#
zk(}{X_=pTTm1$Y!T9+bjd><DhO2LII3D1x^p`d>7fpCB@4aZqnrpKfvU@~@QT4pv|
zd=^&o%=$}X>=^m`RK_NSSNFr(ru(a!#{vDdonDu^)dk=GgSs{xuAm0uSb<nD&t&MP
z9VGjcZiPR;r+wn}mEZ5@Mync}EvbB3C0_&Tv!n-s1?>lKAIn?kfuz~e+&q=$8XWEP
zrGWhSS1i4^UPHUxcSz4&uLkOD%Bj`Nt=-zuUI1S(8x#lI^1R=V5NlfpZNX`IR#e44
zFz1JUNb$_m8SR`|Z}+YN&~bF9wvoXd>c;GL%Yu&mKQ*55FW6w?eE~x3$=5a?2xh*_
zzTaLrl5JCWIHZF|BJm~kS-n?CbBSjl3{3kO5dV&h^K;q7*HRqxab$I|tgpsubFUys
zds8DU3ktysWiMKt|2f=e9=KzUivV5E9aQjxS=F4`%sfuHY3*^q2TtDAE$jaPouicK
zo1GH&3x49#9RQD$c!ROR^LZK?lw`!Mqo$EFgjd4d*CsfBc8i_AJ{ONEr12o+SLaza
zAEtznOS&VHiAd11ae})#xWGNesKGyW5$j$iI9PU{MSLIziIe=VpO*gHLT}BhpFx;a
zZd|*9I9ePmbPm*(w8<^;K?^HyC9u$O6P{3EtAWnn-5I1#7eVUOeeF8mN+e_9T7gSy
z>!n9t_Ng1>1P`Qs7MoLKEtIAUgSnO~E&lHI|2+6VZ+yjU{r@@Di#RzBrRrQ<T<c%Y
z<dyIlNhr3h=H?OMHeilikyy1xNE#c+#SIM^iysEQTSa3V@@@R)3nA%q8-$K1n>8Bz
vj}d5SkuBD|&F&P$|7Dk&zvlnK&7_ZTJzKx6I8_QBjBCH4>E7qN?Jxfq;c;}s

diff --git a/docs/source/dev/kernel/paged_attention.rst b/docs/source/dev/kernel/paged_attention.rst
index 6fcadeeec27b6..ba4f7a2718158 100644
--- a/docs/source/dev/kernel/paged_attention.rst
+++ b/docs/source/dev/kernel/paged_attention.rst
@@ -447,7 +447,7 @@ Value
    a whole block of value tokens. And each ``accs`` in each thread
    contains 8 elements that accumulated at 8 different head positions.
    For the thread 0, the ``accs`` variable will have 8 elements, which
-   are 0th, 16th … 112th elements of a value head that are accumulated
+   are 0th, 32th … 224th elements of a value head that are accumulated
    from all assigned 8 tokens.
 
 LV

From fb96c1e98c05ffa35dd48416f68e88edb2f9eb34 Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Fri, 15 Mar 2024 16:37:01 -0700
Subject: [PATCH 115/196] Asynchronous tokenization (#2879)

---
 .buildkite/test-pipeline.yaml                 |   2 +-
 tests/async_engine/test_api_server.py         |  16 +-
 tests/conftest.py                             |  11 ++
 tests/lora/test_tokenizer.py                  |  69 --------
 tests/lora/test_tokenizer_group.py            |  53 ++++++
 tests/tokenization/__init__.py                |   0
 tests/tokenization/test_cached_tokenizer.py   |  20 +++
 .../test_detokenize.py                        |   0
 tests/tokenization/test_tokenizer_group.py    | 100 +++++++++++
 vllm/config.py                                |  57 ++++++
 vllm/engine/arg_utils.py                      |  43 ++++-
 vllm/engine/llm_engine.py                     |  15 +-
 vllm/transformers_utils/tokenizer.py          |  99 ++++-------
 .../tokenizer_group/__init__.py               |  32 ++++
 .../tokenizer_group/base_tokenizer_group.py   |  48 +++++
 .../tokenizer_group/ray_tokenizer_group.py    | 166 ++++++++++++++++++
 .../tokenizer_group/tokenizer_group.py        |  80 +++++++++
 17 files changed, 658 insertions(+), 153 deletions(-)
 delete mode 100644 tests/lora/test_tokenizer.py
 create mode 100644 tests/lora/test_tokenizer_group.py
 create mode 100644 tests/tokenization/__init__.py
 create mode 100644 tests/tokenization/test_cached_tokenizer.py
 rename tests/{engine => tokenization}/test_detokenize.py (100%)
 create mode 100644 tests/tokenization/test_tokenizer_group.py
 create mode 100644 vllm/transformers_utils/tokenizer_group/__init__.py
 create mode 100644 vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py
 create mode 100644 vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py
 create mode 100644 vllm/transformers_utils/tokenizer_group/tokenizer_group.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 6a130f6fadcc3..8badc16d0cb75 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -28,7 +28,7 @@ steps:
   num_gpus: 2 # only support 1 or 2 for now.
 
 - label: Engine Test
-  command: pytest -v -s engine test_sequence.py
+  command: pytest -v -s engine tokenization test_sequence.py
 
 - label: Entrypoints Test
   command: pytest -v -s entrypoints
diff --git a/tests/async_engine/test_api_server.py b/tests/async_engine/test_api_server.py
index ed9017c1e3e9d..248bfbc8ab5c0 100644
--- a/tests/async_engine/test_api_server.py
+++ b/tests/async_engine/test_api_server.py
@@ -25,23 +25,21 @@ def _query_server_long(prompt: str) -> dict:
 
 
 @pytest.fixture
-def api_server():
+def api_server(tokenizer_pool_size: int):
     script_path = Path(__file__).parent.joinpath(
         "api_server_async_engine.py").absolute()
     uvicorn_process = subprocess.Popen([
-        sys.executable,
-        "-u",
-        str(script_path),
-        "--model",
-        "facebook/opt-125m",
-        "--host",
-        "127.0.0.1",
+        sys.executable, "-u",
+        str(script_path), "--model", "facebook/opt-125m", "--host",
+        "127.0.0.1", "--tokenizer-pool-size",
+        str(tokenizer_pool_size)
     ])
     yield
     uvicorn_process.terminate()
 
 
-def test_api_server(api_server):
+@pytest.mark.parametrize("tokenizer_pool_size", [0, 2])
+def test_api_server(api_server, tokenizer_pool_size: int):
     """
     Run the API server and test it.
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 6eb8159837d51..c06b271e6c7f6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,6 +7,7 @@
 
 from vllm import LLM, SamplingParams
 from vllm.transformers_utils.tokenizer import get_tokenizer
+from vllm.config import TokenizerPoolConfig
 
 _TEST_DIR = os.path.dirname(__file__)
 _TEST_PROMPTS = [os.path.join(_TEST_DIR, "prompts", "example.txt")]
@@ -258,3 +259,13 @@ def generate_beam_search(
 @pytest.fixture
 def vllm_runner():
     return VllmRunner
+
+
+def get_tokenizer_pool_config(tokenizer_group_type):
+    if tokenizer_group_type is None:
+        return None
+    if tokenizer_group_type == "ray":
+        return TokenizerPoolConfig(pool_size=1,
+                                   pool_type="ray",
+                                   extra_config={})
+    raise ValueError(f"Unknown tokenizer_group_type: {tokenizer_group_type}")
diff --git a/tests/lora/test_tokenizer.py b/tests/lora/test_tokenizer.py
deleted file mode 100644
index 6c4c91fce8127..0000000000000
--- a/tests/lora/test_tokenizer.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import pytest
-from transformers import AutoTokenizer, PreTrainedTokenizerBase
-
-from vllm.lora.request import LoRARequest
-from vllm.transformers_utils.tokenizer import TokenizerGroup, get_lora_tokenizer
-
-
-@pytest.mark.asyncio
-async def test_transformers_tokenizer():
-    reference_tokenizer = AutoTokenizer.from_pretrained("gpt2")
-    tokenizer = TokenizerGroup(
-        tokenizer_id="gpt2",
-        enable_lora=False,
-        max_num_seqs=1,
-        max_input_length=None,
-    )
-    assert reference_tokenizer.encode("prompt") == tokenizer.encode(
-        request_id="request_id", prompt="prompt", lora_request=None)
-    assert reference_tokenizer.encode(
-        "prompt") == await tokenizer.encode_async(request_id="request_id",
-                                                  prompt="prompt",
-                                                  lora_request=None)
-    assert isinstance(tokenizer.get_lora_tokenizer(None),
-                      PreTrainedTokenizerBase)
-    assert tokenizer.get_lora_tokenizer(
-        None) == await tokenizer.get_lora_tokenizer_async(None)
-
-
-@pytest.mark.asyncio
-async def test_transformers_tokenizer_lora(sql_lora_files):
-    reference_tokenizer = AutoTokenizer.from_pretrained(sql_lora_files)
-    tokenizer = TokenizerGroup(
-        tokenizer_id="gpt2",
-        enable_lora=True,
-        max_num_seqs=1,
-        max_input_length=None,
-    )
-    lora_request = LoRARequest("1", 1, sql_lora_files)
-    assert reference_tokenizer.encode("prompt") == tokenizer.encode(
-        request_id="request_id", prompt="prompt", lora_request=lora_request)
-    assert reference_tokenizer.encode(
-        "prompt") == await tokenizer.encode_async(request_id="request_id",
-                                                  prompt="prompt",
-                                                  lora_request=lora_request)
-    assert isinstance(tokenizer.get_lora_tokenizer(None),
-                      PreTrainedTokenizerBase)
-    assert tokenizer.get_lora_tokenizer(
-        None) == await tokenizer.get_lora_tokenizer_async(None)
-
-    assert isinstance(tokenizer.get_lora_tokenizer(lora_request),
-                      PreTrainedTokenizerBase)
-    assert tokenizer.get_lora_tokenizer(
-        lora_request) != tokenizer.get_lora_tokenizer(None)
-    assert tokenizer.get_lora_tokenizer(
-        lora_request) == await tokenizer.get_lora_tokenizer_async(lora_request)
-
-
-def test_get_lora_tokenizer(sql_lora_files, tmpdir):
-    lora_request = None
-    tokenizer = get_lora_tokenizer(lora_request)
-    assert not tokenizer
-
-    lora_request = LoRARequest("1", 1, sql_lora_files)
-    tokenizer = get_lora_tokenizer(lora_request)
-    assert tokenizer.get_added_vocab()
-
-    lora_request = LoRARequest("1", 1, str(tmpdir))
-    tokenizer = get_lora_tokenizer(lora_request)
-    assert not tokenizer
diff --git a/tests/lora/test_tokenizer_group.py b/tests/lora/test_tokenizer_group.py
new file mode 100644
index 0000000000000..5fec3f179925a
--- /dev/null
+++ b/tests/lora/test_tokenizer_group.py
@@ -0,0 +1,53 @@
+import pytest
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
+from vllm.lora.request import LoRARequest
+from vllm.transformers_utils.tokenizer_group import get_tokenizer_group
+from vllm.transformers_utils.tokenizer import get_lora_tokenizer
+from ..conftest import get_tokenizer_pool_config
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("tokenizer_group_type", [None, "ray"])
+async def test_tokenizer_group_lora(sql_lora_files, tokenizer_group_type):
+    reference_tokenizer = AutoTokenizer.from_pretrained(sql_lora_files)
+    tokenizer_group = get_tokenizer_group(
+        get_tokenizer_pool_config(tokenizer_group_type),
+        tokenizer_id="gpt2",
+        enable_lora=True,
+        max_num_seqs=1,
+        max_input_length=None,
+    )
+    lora_request = LoRARequest("1", 1, sql_lora_files)
+    assert reference_tokenizer.encode("prompt") == tokenizer_group.encode(
+        request_id="request_id", prompt="prompt", lora_request=lora_request)
+    assert reference_tokenizer.encode(
+        "prompt") == await tokenizer_group.encode_async(
+            request_id="request_id",
+            prompt="prompt",
+            lora_request=lora_request)
+    assert isinstance(tokenizer_group.get_lora_tokenizer(None),
+                      PreTrainedTokenizerBase)
+    assert tokenizer_group.get_lora_tokenizer(
+        None) == await tokenizer_group.get_lora_tokenizer_async(None)
+
+    assert isinstance(tokenizer_group.get_lora_tokenizer(lora_request),
+                      PreTrainedTokenizerBase)
+    assert tokenizer_group.get_lora_tokenizer(
+        lora_request) != tokenizer_group.get_lora_tokenizer(None)
+    assert tokenizer_group.get_lora_tokenizer(
+        lora_request) == await tokenizer_group.get_lora_tokenizer_async(
+            lora_request)
+
+
+def test_get_lora_tokenizer(sql_lora_files, tmpdir):
+    lora_request = None
+    tokenizer = get_lora_tokenizer(lora_request)
+    assert not tokenizer
+
+    lora_request = LoRARequest("1", 1, sql_lora_files)
+    tokenizer = get_lora_tokenizer(lora_request)
+    assert tokenizer.get_added_vocab()
+
+    lora_request = LoRARequest("1", 1, str(tmpdir))
+    tokenizer = get_lora_tokenizer(lora_request)
+    assert not tokenizer
diff --git a/tests/tokenization/__init__.py b/tests/tokenization/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/tokenization/test_cached_tokenizer.py b/tests/tokenization/test_cached_tokenizer.py
new file mode 100644
index 0000000000000..181e800325128
--- /dev/null
+++ b/tests/tokenization/test_cached_tokenizer.py
@@ -0,0 +1,20 @@
+from copy import deepcopy
+from vllm.transformers_utils.tokenizer import get_cached_tokenizer
+from transformers import AutoTokenizer
+
+
+def test_cached_tokenizer():
+    reference_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    reference_tokenizer.add_special_tokens({"cls_token": "<CLS>"})
+    reference_tokenizer.add_special_tokens(
+        {"additional_special_tokens": ["<SEP>"]})
+    cached_tokenizer = get_cached_tokenizer(deepcopy(reference_tokenizer))
+
+    assert reference_tokenizer.encode("prompt") == cached_tokenizer.encode(
+        "prompt")
+    assert set(reference_tokenizer.all_special_ids) == set(
+        cached_tokenizer.all_special_ids)
+    assert set(reference_tokenizer.all_special_tokens) == set(
+        cached_tokenizer.all_special_tokens)
+    assert set(reference_tokenizer.all_special_tokens_extended) == set(
+        cached_tokenizer.all_special_tokens_extended)
diff --git a/tests/engine/test_detokenize.py b/tests/tokenization/test_detokenize.py
similarity index 100%
rename from tests/engine/test_detokenize.py
rename to tests/tokenization/test_detokenize.py
diff --git a/tests/tokenization/test_tokenizer_group.py b/tests/tokenization/test_tokenizer_group.py
new file mode 100644
index 0000000000000..d0788ee87563d
--- /dev/null
+++ b/tests/tokenization/test_tokenizer_group.py
@@ -0,0 +1,100 @@
+import os
+import pytest
+import asyncio
+from unittest.mock import patch
+
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
+from vllm.transformers_utils.tokenizer_group import get_tokenizer_group
+from vllm.transformers_utils.tokenizer_group.ray_tokenizer_group import (
+    RayTokenizerGroupPool)
+from vllm.transformers_utils.tokenizer_group.tokenizer_group import (
+    TokenizerGroup)
+from ..conftest import get_tokenizer_pool_config
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("tokenizer_group_type", [None, "ray"])
+async def test_tokenizer_group(tokenizer_group_type):
+    reference_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    tokenizer_group = get_tokenizer_group(
+        get_tokenizer_pool_config(tokenizer_group_type),
+        tokenizer_id="gpt2",
+        enable_lora=False,
+        max_num_seqs=1,
+        max_input_length=None,
+    )
+    assert reference_tokenizer.encode("prompt") == tokenizer_group.encode(
+        request_id="request_id", prompt="prompt", lora_request=None)
+    assert reference_tokenizer.encode(
+        "prompt") == await tokenizer_group.encode_async(
+            request_id="request_id", prompt="prompt", lora_request=None)
+    assert isinstance(tokenizer_group.get_lora_tokenizer(None),
+                      PreTrainedTokenizerBase)
+    assert tokenizer_group.get_lora_tokenizer(
+        None) == await tokenizer_group.get_lora_tokenizer_async(None)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("tokenizer_group_type", ["ray"])
+async def test_tokenizer_group_pool(tokenizer_group_type):
+    reference_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    tokenizer_group_pool = get_tokenizer_group(
+        get_tokenizer_pool_config(tokenizer_group_type),
+        tokenizer_id="gpt2",
+        enable_lora=False,
+        max_num_seqs=1,
+        max_input_length=None,
+    )
+    # Send multiple requests to the tokenizer group pool
+    # (more than the pool size)
+    # and check that all requests are processed correctly.
+    num_requests = tokenizer_group_pool.pool_size * 5
+    requests = [
+        tokenizer_group_pool.encode_async(request_id=str(i),
+                                          prompt=f"prompt {i}",
+                                          lora_request=None)
+        for i in range(num_requests)
+    ]
+    results = await asyncio.gather(*requests)
+    expected_results = [
+        reference_tokenizer.encode(f"prompt {i}") for i in range(num_requests)
+    ]
+    assert results == expected_results
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("tokenizer_group_type", ["ray"])
+async def test_tokenizer_group_ray_pool_env_var_propagation(
+        tokenizer_group_type):
+    """Test that env vars from caller process are propagated to
+    tokenizer Ray actors."""
+    env_var = "MY_ENV_VAR"
+
+    class EnvVarCheckerTokenizerGroup(TokenizerGroup):
+
+        def ping(self):
+            assert os.environ.get(env_var) == "1"
+            return super().ping()
+
+    class EnvVarCheckerRayTokenizerGroupPool(RayTokenizerGroupPool):
+        _worker_cls = EnvVarCheckerTokenizerGroup
+
+    tokenizer_pool_config = get_tokenizer_pool_config(tokenizer_group_type)
+    tokenizer_pool = EnvVarCheckerRayTokenizerGroupPool.from_config(
+        tokenizer_pool_config,
+        tokenizer_id="gpt2",
+        enable_lora=False,
+        max_num_seqs=1,
+        max_input_length=None)
+    with pytest.raises(AssertionError):
+        tokenizer_pool.ping()
+
+    with patch.dict(os.environ, {env_var: "1"}):
+        tokenizer_pool_config = get_tokenizer_pool_config(tokenizer_group_type)
+        tokenizer_pool = EnvVarCheckerRayTokenizerGroupPool.from_config(
+            tokenizer_pool_config,
+            tokenizer_id="gpt2",
+            enable_lora=False,
+            max_num_seqs=1,
+            max_input_length=None)
+        tokenizer_pool.ping()
diff --git a/vllm/config.py b/vllm/config.py
index de687395a0001..f792e89095246 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -3,6 +3,7 @@
 import os
 from packaging.version import Version
 
+import json
 import torch
 from transformers import PretrainedConfig
 
@@ -389,6 +390,58 @@ def verify_with_parallel_config(
             logger.warning("Possibly too large swap space. " + msg)
 
 
+@dataclass
+class TokenizerPoolConfig:
+    """Configuration for the tokenizer pool.
+    
+    Args:
+        pool_size: Number of tokenizer workers in the pool.
+        pool_type: Type of the pool.
+        extra_config: Additional config for the pool.
+            The way the config will be used depends on the
+            pool type.
+    """
+    pool_size: int
+    pool_type: str
+    extra_config: dict
+
+    def __post_init__(self):
+        if self.pool_type not in ("ray", ):
+            raise ValueError(f"Unknown pool type: {self.pool_type}")
+        if not isinstance(self.extra_config, dict):
+            raise ValueError("extra_config must be a dictionary.")
+
+    @classmethod
+    def create_config(
+        cls, tokenizer_pool_size: int, tokenizer_pool_type: str,
+        tokenizer_pool_extra_config: Optional[Union[str, dict]]
+    ) -> Optional["TokenizerPoolConfig"]:
+        """Create a TokenizerPoolConfig from the given parameters.
+        
+        If tokenizer_pool_size is 0, return None.
+        
+        Args:
+            tokenizer_pool_size: Number of tokenizer workers in the pool.
+            tokenizer_pool_type: Type of the pool.
+            tokenizer_pool_extra_config: Additional config for the pool.
+                The way the config will be used depends on the
+                pool type. This can be a JSON string (will be parsed).
+        """
+        if tokenizer_pool_size:
+            if isinstance(tokenizer_pool_extra_config, str):
+                tokenizer_pool_extra_config_parsed = json.loads(
+                    tokenizer_pool_extra_config)
+            else:
+                tokenizer_pool_extra_config_parsed = (
+                    tokenizer_pool_extra_config or {})
+            tokenizer_pool_config = cls(tokenizer_pool_size,
+                                        tokenizer_pool_type,
+                                        tokenizer_pool_extra_config_parsed)
+        else:
+            tokenizer_pool_config = None
+        return tokenizer_pool_config
+
+
 class ParallelConfig:
     """Configuration for the distributed execution.
 
@@ -403,6 +456,8 @@ class ParallelConfig:
             parallel and large models.
         disable_custom_all_reduce: Disable the custom all-reduce kernel and
             fall back to NCCL.
+        tokenizer_pool_config: Config for the tokenizer pool.
+            If None, will use synchronous tokenization.
         ray_workers_use_nsight: Whether to profile Ray workers with nsight, see
             https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.
     """
@@ -414,6 +469,7 @@ def __init__(
         worker_use_ray: bool,
         max_parallel_loading_workers: Optional[int] = None,
         disable_custom_all_reduce: bool = False,
+        tokenizer_pool_config: Optional[TokenizerPoolConfig] = None,
         ray_workers_use_nsight: bool = False,
         placement_group: Optional["PlacementGroup"] = None,
     ) -> None:
@@ -430,6 +486,7 @@ def __init__(
         self.worker_use_ray = worker_use_ray
         self.max_parallel_loading_workers = max_parallel_loading_workers
         self.disable_custom_all_reduce = disable_custom_all_reduce
+        self.tokenizer_pool_config = tokenizer_pool_config
         self.ray_workers_use_nsight = ray_workers_use_nsight
         self.placement_group = placement_group
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index c3dccdd5bb50b..3e146d2e6c0c4 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -4,7 +4,8 @@
 from typing import Optional, Tuple
 
 from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
-                         ParallelConfig, SchedulerConfig, LoRAConfig)
+                         ParallelConfig, SchedulerConfig, LoRAConfig,
+                         TokenizerPoolConfig)
 
 
 @dataclass
@@ -40,6 +41,9 @@ class EngineArgs:
     enforce_eager: bool = False
     max_context_len_to_capture: int = 8192
     disable_custom_all_reduce: bool = False
+    tokenizer_pool_size: int = 0
+    tokenizer_pool_type: str = "ray"
+    tokenizer_pool_extra_config: Optional[dict] = None
     enable_lora: bool = False
     max_loras: int = 1
     max_lora_rank: int = 16
@@ -249,6 +253,25 @@ def add_cli_args(
                             action='store_true',
                             default=EngineArgs.disable_custom_all_reduce,
                             help='See ParallelConfig')
+        parser.add_argument('--tokenizer-pool-size',
+                            type=int,
+                            default=EngineArgs.tokenizer_pool_size,
+                            help='Size of tokenizer pool to use for '
+                            'asynchronous tokenization. If 0, will '
+                            'use synchronous tokenization.')
+        parser.add_argument('--tokenizer-pool-type',
+                            type=str,
+                            default=EngineArgs.tokenizer_pool_type,
+                            help='Type of tokenizer pool to use for '
+                            'asynchronous tokenization. Ignored '
+                            'if tokenizer_pool_size is 0.')
+        parser.add_argument('--tokenizer-pool-extra-config',
+                            type=str,
+                            default=EngineArgs.tokenizer_pool_extra_config,
+                            help='Extra config for tokenizer pool. '
+                            'This should be a JSON string that will be '
+                            'parsed into a dictionary. Ignored if '
+                            'tokenizer_pool_size is 0.')
         # LoRA related configs
         parser.add_argument('--enable-lora',
                             action='store_true',
@@ -312,14 +335,16 @@ def create_engine_configs(
         cache_config = CacheConfig(self.block_size,
                                    self.gpu_memory_utilization,
                                    self.swap_space, self.kv_cache_dtype,
-                                   model_config.get_sliding_window(),
-                                   self.enable_prefix_caching)
-        parallel_config = ParallelConfig(self.pipeline_parallel_size,
-                                         self.tensor_parallel_size,
-                                         self.worker_use_ray,
-                                         self.max_parallel_loading_workers,
-                                         self.disable_custom_all_reduce,
-                                         self.ray_workers_use_nsight)
+                                   model_config.get_sliding_window())
+        parallel_config = ParallelConfig(
+            self.pipeline_parallel_size, self.tensor_parallel_size,
+            self.worker_use_ray, self.max_parallel_loading_workers,
+            self.disable_custom_all_reduce,
+            TokenizerPoolConfig.create_config(
+                self.tokenizer_pool_size,
+                self.tokenizer_pool_type,
+                self.tokenizer_pool_extra_config,
+            ), self.ray_workers_use_nsight)
         scheduler_config = SchedulerConfig(self.max_num_batched_tokens,
                                            self.max_num_seqs,
                                            model_config.max_model_len,
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 691c9e83d59cc..71798ab7d17c0 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -17,8 +17,9 @@
 from vllm.sampling_params import SamplingParams
 from vllm.sequence import (Logprob, SamplerOutput, Sequence, SequenceGroup,
                            SequenceGroupOutput, SequenceOutput, SequenceStatus)
-from vllm.transformers_utils.tokenizer import (detokenize_incrementally,
-                                               TokenizerGroup)
+from vllm.transformers_utils.tokenizer import detokenize_incrementally
+from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
+                                                     get_tokenizer_group)
 from vllm.utils import Counter
 
 logger = init_logger(__name__)
@@ -102,6 +103,10 @@ def __init__(
                                              parallel_config, scheduler_config,
                                              device_config, lora_config)
 
+        # Ping the tokenizer to ensure liveness if it runs in a
+        # different process.
+        self.tokenizer.ping()
+
         # Create the scheduler.
         # NOTE: the cache_config here have been updated with the numbers of
         # GPU and CPU blocks, which are profiled in the distributed executor.
@@ -152,6 +157,7 @@ def get_tokenizer_for_seq(self,
 
     def _init_tokenizer(self, **tokenizer_init_kwargs):
         init_kwargs = dict(
+            tokenizer_id=self.model_config.tokenizer,
             enable_lora=bool(self.lora_config),
             max_num_seqs=self.scheduler_config.max_num_seqs,
             max_input_length=None,
@@ -159,8 +165,9 @@ def _init_tokenizer(self, **tokenizer_init_kwargs):
             trust_remote_code=self.model_config.trust_remote_code,
             revision=self.model_config.tokenizer_revision)
         init_kwargs.update(tokenizer_init_kwargs)
-        self.tokenizer: TokenizerGroup = TokenizerGroup(
-            self.model_config.tokenizer, **init_kwargs)
+
+        self.tokenizer: BaseTokenizerGroup = get_tokenizer_group(
+            self.parallel_config.tokenizer_pool_config, **init_kwargs)
 
     def _verify_args(self) -> None:
         self.model_config.verify_with_parallel_config(self.parallel_config)
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 2600ea2642da2..f7a1a19a89bcf 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -5,12 +5,48 @@
 
 from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
-from vllm.utils import make_async, LRUCache
+from vllm.utils import make_async
 from vllm.transformers_utils.tokenizers import *
 
 logger = init_logger(__name__)
 
 
+def get_cached_tokenizer(
+    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
+) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
+    """Get tokenizer with cached properties.
+
+    This will patch the tokenizer object in place.
+
+    By default, transformers will recompute multiple tokenizer properties
+    each time they are called, leading to a significant slowdown. This
+    function caches these properties for faster access."""
+
+    tokenizer_all_special_ids = set(tokenizer.all_special_ids)
+    tokenizer_all_special_tokens_extended = (
+        tokenizer.all_special_tokens_extended)
+    tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
+
+    class CachedTokenizer(tokenizer.__class__):
+
+        @property
+        def all_special_ids(self):
+            return tokenizer_all_special_ids
+
+        @property
+        def all_special_tokens(self):
+            return tokenizer_all_special_tokens
+
+        @property
+        def all_special_tokens_extended(self):
+            return tokenizer_all_special_tokens_extended
+
+    CachedTokenizer.__name__ = f"Cached{tokenizer.__class__.__name__}"
+
+    tokenizer.__class__ = CachedTokenizer
+    return tokenizer
+
+
 def get_tokenizer(
     tokenizer_name: str,
     *args,
@@ -64,7 +100,7 @@ def get_tokenizer(
         logger.warning(
             "Using a slow tokenizer. This might cause a significant "
             "slowdown. Consider using a fast tokenizer instead.")
-    return tokenizer
+    return get_cached_tokenizer(tokenizer)
 
 
 def get_lora_tokenizer(lora_request: LoRARequest, *args,
@@ -88,65 +124,6 @@ def get_lora_tokenizer(lora_request: LoRARequest, *args,
 get_lora_tokenizer_async = make_async(get_lora_tokenizer)
 
 
-class TokenizerGroup:
-    """A group of tokenizers that can be used for LoRA adapters."""
-
-    def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int,
-                 max_input_length: Optional[int], **tokenizer_config):
-        self.tokenizer_id = tokenizer_id
-        self.tokenizer_config = tokenizer_config
-        self.enable_lora = enable_lora
-        self.max_input_length = max_input_length
-        self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config)
-        if enable_lora:
-            self.lora_tokenizers = LRUCache(capacity=max_num_seqs)
-        else:
-            self.lora_tokenizers = None
-
-    def encode(self,
-               prompt: str,
-               request_id: Optional[str] = None,
-               lora_request: Optional[LoRARequest] = None) -> List[int]:
-        tokenizer = self.get_lora_tokenizer(lora_request)
-        return tokenizer.encode(prompt)
-
-    async def encode_async(
-            self,
-            prompt: str,
-            request_id: Optional[str] = None,
-            lora_request: Optional[LoRARequest] = None) -> List[int]:
-        tokenizer = await self.get_lora_tokenizer_async(lora_request)
-        return tokenizer.encode(prompt)
-
-    def get_lora_tokenizer(
-            self,
-            lora_request: Optional[LoRARequest] = None
-    ) -> "PreTrainedTokenizer":
-        if not lora_request or not self.enable_lora:
-            return self.tokenizer
-        if lora_request.lora_int_id not in self.lora_tokenizers:
-            tokenizer = (get_lora_tokenizer(
-                lora_request, **self.tokenizer_config) or self.tokenizer)
-            self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer)
-            return tokenizer
-        else:
-            return self.lora_tokenizers.get(lora_request.lora_int_id)
-
-    async def get_lora_tokenizer_async(
-            self,
-            lora_request: Optional[LoRARequest] = None
-    ) -> "PreTrainedTokenizer":
-        if not lora_request or not self.enable_lora:
-            return self.tokenizer
-        if lora_request.lora_int_id not in self.lora_tokenizers:
-            tokenizer = (await get_lora_tokenizer_async(
-                lora_request, **self.tokenizer_config) or self.tokenizer)
-            self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer)
-            return tokenizer
-        else:
-            return self.lora_tokenizers.get(lora_request.lora_int_id)
-
-
 def _convert_tokens_to_string_with_added_encoders(
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     output_tokens: List[str],
diff --git a/vllm/transformers_utils/tokenizer_group/__init__.py b/vllm/transformers_utils/tokenizer_group/__init__.py
new file mode 100644
index 0000000000000..adc8d9b90ddb6
--- /dev/null
+++ b/vllm/transformers_utils/tokenizer_group/__init__.py
@@ -0,0 +1,32 @@
+from typing import Optional
+from vllm.config import TokenizerPoolConfig
+from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
+from vllm.transformers_utils.tokenizer_group.tokenizer_group import (
+    TokenizerGroup)
+from vllm.engine.ray_utils import ray
+
+if ray:
+    from vllm.transformers_utils.tokenizer_group.ray_tokenizer_group import (
+        RayTokenizerGroupPool)
+else:
+    RayTokenizerGroupPool = None
+
+
+def get_tokenizer_group(tokenizer_pool_config: Optional[TokenizerPoolConfig],
+                        **init_kwargs) -> BaseTokenizerGroup:
+    if tokenizer_pool_config is None:
+        return TokenizerGroup(**init_kwargs)
+    if tokenizer_pool_config.pool_type == "ray":
+        if RayTokenizerGroupPool is None:
+            raise ImportError(
+                "RayTokenizerGroupPool is not available. Please install "
+                "the ray package to use the Ray tokenizer group pool.")
+        return RayTokenizerGroupPool.from_config(tokenizer_pool_config,
+                                                 **init_kwargs)
+    else:
+        raise ValueError(
+            f"Unknown pool type: {tokenizer_pool_config.pool_type}")
+
+
+__all__ = ["get_tokenizer_group", "BaseTokenizerGroup"]
diff --git a/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py
new file mode 100644
index 0000000000000..99518a606fabe
--- /dev/null
+++ b/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py
@@ -0,0 +1,48 @@
+from abc import ABC, abstractmethod
+from typing import List, Optional
+
+from transformers import PreTrainedTokenizer
+
+from vllm.lora.request import LoRARequest
+
+
+class BaseTokenizerGroup(ABC):
+    """A group of tokenizers that can be used for LoRA adapters."""
+
+    @abstractmethod
+    def ping(self) -> bool:
+        """Check if the tokenizer group is alive."""
+        pass
+
+    @abstractmethod
+    def get_max_input_len(self,
+                          lora_request: Optional[LoRARequest] = None
+                          ) -> Optional[int]:
+        """Get the maximum input length for the LoRA request."""
+        pass
+
+    @abstractmethod
+    def encode(self, prompt: str, request_id: Optional[str],
+               lora_request: Optional[LoRARequest]) -> List[int]:
+        """Encode a prompt using the tokenizer group."""
+        pass
+
+    @abstractmethod
+    async def encode_async(self, prompt: str, request_id: Optional[str],
+                           lora_request: Optional[LoRARequest]) -> List[int]:
+        """Encode a prompt using the tokenizer group."""
+        pass
+
+    @abstractmethod
+    def get_lora_tokenizer(
+            self,
+            lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer":
+        """Get a tokenizer for a LoRA request."""
+        pass
+
+    @abstractmethod
+    async def get_lora_tokenizer_async(
+            self,
+            lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer":
+        """Get a tokenizer for a LoRA request."""
+        pass
diff --git a/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py
new file mode 100644
index 0000000000000..e048ec05bece7
--- /dev/null
+++ b/vllm/transformers_utils/tokenizer_group/ray_tokenizer_group.py
@@ -0,0 +1,166 @@
+import asyncio
+import os
+from typing import List, Optional
+
+from transformers import PreTrainedTokenizer
+
+from vllm.config import TokenizerPoolConfig
+from vllm.lora.request import LoRARequest
+from vllm.engine.ray_utils import ray
+from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
+from vllm.transformers_utils.tokenizer_group.tokenizer_group import (
+    TokenizerGroup)
+from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
+
+
+class RayTokenizerGroupPool(BaseTokenizerGroup):
+    """A Ray-based pool of TokenizerGroups for async tokenization."""
+
+    # Class to use for workers making up the pool.
+    _worker_cls = TokenizerGroup
+
+    @classmethod
+    def from_config(cls, tokenizer_pool_config: TokenizerPoolConfig,
+                    **init_kwargs) -> "RayTokenizerGroupPool":
+        ray_actor_options = (tokenizer_pool_config.extra_config or {
+            "num_cpus": 0
+        })
+        ray_actor_options.setdefault(
+            "scheduling_strategy",
+            NodeAffinitySchedulingStrategy(
+                node_id=ray.get_runtime_context().get_node_id(), soft=True))
+
+        # Carry over the env vars to the actors.
+        # This is necessary for API keys and such.
+        ray_actor_options.setdefault("runtime_env", {})
+        _carry_over_env_vars_to_runtime_env(ray_actor_options["runtime_env"])
+
+        init_kwargs["num_actors"] = tokenizer_pool_config.pool_size
+        init_kwargs["ray_actor_options"] = ray_actor_options
+
+        return cls(**init_kwargs)
+
+    def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int,
+                 max_input_length: Optional[int], num_actors: int,
+                 ray_actor_options: dict, **tokenizer_config):
+        # Store a local copy of the TokenizerGroup for quick access
+        # to underlying HF tokenizers.
+        self._local_tokenizer_group = self._worker_cls(
+            tokenizer_id=tokenizer_id,
+            enable_lora=enable_lora,
+            max_num_seqs=max_num_seqs,
+            max_input_length=max_input_length,
+        )
+
+        ray_tokenizer_group_cls = ray.remote(
+            self._worker_cls).options(**ray_actor_options)
+        self.tokenizer_actors = [
+            ray_tokenizer_group_cls.remote(tokenizer_id, enable_lora,
+                                           max_num_seqs, max_input_length,
+                                           **tokenizer_config)
+            for _ in range(num_actors)
+        ]
+        self._idle_actors: Optional[asyncio.Queue] = None
+
+    @property
+    def pool_size(self) -> int:
+        return len(self.tokenizer_actors)
+
+    def ping(self):
+        return ray.get(
+            [actor.ping.remote() for actor in self.tokenizer_actors])
+
+    def _ensure_queue_initialized(self):
+        if self._idle_actors is None:
+            self._idle_actors = asyncio.Queue()
+            for actor in self.tokenizer_actors:
+                self._idle_actors.put_nowait(actor)
+
+    def encode(self,
+               prompt: str,
+               request_id: Optional[str] = None,
+               lora_request: Optional[LoRARequest] = None) -> List[int]:
+        """Encode a prompt using the tokenizer group.
+
+        We pick an idle actor and use it to encode the prompt.
+        The actor is then put back in the queue for future use.
+        This is blocking.
+        """
+        self._ensure_queue_initialized()
+
+        if self._idle_actors.empty():
+            raise RuntimeError("No idle actors available.")
+        actor = self._idle_actors.get_nowait()
+        try:
+            ret = ray.get(
+                actor.encode.remote(request_id=request_id,
+                                    prompt=prompt,
+                                    lora_request=lora_request))
+        finally:
+            # Put the actor back in the queue.
+            # This is done in a finally block to ensure that the actor is
+            # always put back in the queue, even if an exception/cancellation
+            # is raised.
+            self._idle_actors.put_nowait(actor)
+        return ret
+
+    async def encode_async(
+            self,
+            prompt: str,
+            request_id: Optional[str] = None,
+            lora_request: Optional[LoRARequest] = None) -> List[int]:
+        """Encode a prompt using the tokenizer group.
+
+        We pick an idle actor and use it to encode the prompt.
+        If there are no idle actors, we wait until one becomes
+        available.
+        The actor is then put back in the queue for future use.
+        This is non-blocking.
+        """
+        self._ensure_queue_initialized()
+
+        actor = await self._idle_actors.get()
+        try:
+            ret = await actor.encode.remote(request_id=request_id,
+                                            prompt=prompt,
+                                            lora_request=lora_request)
+        finally:
+            # Put the actor back in the queue.
+            # This is done in a finally block to ensure that the actor is
+            # always put back in the queue, even if an exception/cancellation
+            # is raised.
+            self._idle_actors.put_nowait(actor)
+        return ret
+
+    def get_max_input_len(self,
+                          lora_request: Optional[LoRARequest] = None
+                          ) -> Optional[int]:
+        """Get the maximum input length for the LoRA request."""
+        return self._local_tokenizer_group.get_max_input_len(lora_request)
+
+    def get_lora_tokenizer(
+            self,
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
+        return self._local_tokenizer_group.get_lora_tokenizer(lora_request)
+
+    async def get_lora_tokenizer_async(
+            self,
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
+        return await self._local_tokenizer_group.get_lora_tokenizer_async(
+            lora_request)
+
+
+def _carry_over_env_vars_to_runtime_env(runtime_env: dict) -> None:
+    """Copy over all current process environment variables to the runtime_env.
+
+    The variables in runtime_env will take precedence over the current process
+    environment variables.
+
+    runtime_env will be modified in place."""
+    env_vars = os.environ.copy()
+    runtime_env.setdefault("env_vars", {})
+    env_vars.update(runtime_env["env_vars"])
+    runtime_env["env_vars"] = env_vars
diff --git a/vllm/transformers_utils/tokenizer_group/tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
new file mode 100644
index 0000000000000..3af1334cb5ede
--- /dev/null
+++ b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
@@ -0,0 +1,80 @@
+from typing import List, Optional
+
+from transformers import PreTrainedTokenizer
+
+from vllm.lora.request import LoRARequest
+from vllm.transformers_utils.tokenizer import (get_lora_tokenizer,
+                                               get_lora_tokenizer_async)
+from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
+from vllm.utils import LRUCache
+from vllm.transformers_utils.tokenizer import get_tokenizer
+
+
+class TokenizerGroup(BaseTokenizerGroup):
+    """A group of tokenizers that can be used for LoRA adapters."""
+
+    def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int,
+                 max_input_length: Optional[int], **tokenizer_config):
+        self.tokenizer_id = tokenizer_id
+        self.tokenizer_config = tokenizer_config
+        self.enable_lora = enable_lora
+        self.max_input_length = max_input_length
+        self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config)
+        if enable_lora:
+            self.lora_tokenizers = LRUCache(capacity=max_num_seqs)
+        else:
+            self.lora_tokenizers = None
+
+    def ping(self) -> bool:
+        """Check if the tokenizer group is alive."""
+        return True
+
+    def get_max_input_len(self,
+                          lora_request: Optional[LoRARequest] = None
+                          ) -> Optional[int]:
+        """Get the maximum input length for the LoRA request."""
+        return self.max_input_length
+
+    def encode(self,
+               prompt: str,
+               request_id: Optional[str] = None,
+               lora_request: Optional[LoRARequest] = None) -> List[int]:
+        tokenizer = self.get_lora_tokenizer(lora_request)
+        return tokenizer.encode(prompt)
+
+    async def encode_async(
+            self,
+            prompt: str,
+            request_id: Optional[str] = None,
+            lora_request: Optional[LoRARequest] = None) -> List[int]:
+        tokenizer = await self.get_lora_tokenizer_async(lora_request)
+        return tokenizer.encode(prompt)
+
+    def get_lora_tokenizer(
+            self,
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
+        if not lora_request or not self.enable_lora:
+            return self.tokenizer
+        if lora_request.lora_int_id not in self.lora_tokenizers:
+            tokenizer = (get_lora_tokenizer(
+                lora_request, **self.tokenizer_config) or self.tokenizer)
+            self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer)
+            return tokenizer
+        else:
+            return self.lora_tokenizers.get(lora_request.lora_int_id)
+
+    async def get_lora_tokenizer_async(
+            self,
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
+        if not lora_request or not self.enable_lora:
+            return self.tokenizer
+        if lora_request.lora_int_id not in self.lora_tokenizers:
+            tokenizer = (await get_lora_tokenizer_async(
+                lora_request, **self.tokenizer_config) or self.tokenizer)
+            self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer)
+            return tokenizer
+        else:
+            return self.lora_tokenizers.get(lora_request.lora_int_id)

From 10585e035ec564cd376146c3fe5ffe427a43c92c Mon Sep 17 00:00:00 2001
From: Robert Shaw
 <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
Date: Fri, 15 Mar 2024 19:35:36 -0500
Subject: [PATCH 116/196] Removed Extraneous Print Message From OAI Server
 (#3440)

---
 vllm/entrypoints/openai/serving_completion.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py
index bfd7c9b50cf32..5f2be878a7b76 100644
--- a/vllm/entrypoints/openai/serving_completion.py
+++ b/vllm/entrypoints/openai/serving_completion.py
@@ -309,10 +309,7 @@ async def completion_stream_generator(
         except ValueError as e:
             # TODO: Use a vllm-specific Validation Error
             data = self.create_streaming_error_response(str(e))
-            print("yield", f"data: {data}\n\n")
             yield f"data: {data}\n\n"
-
-        print("yield", "data: [DONE]\n\n")
         yield "data: [DONE]\n\n"
 
     def request_output_to_completion_response(

From 413366e9a2e66adf9280e7a700c3b0017eab856c Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@gmail.com>
Date: Fri, 15 Mar 2024 18:25:51 -0700
Subject: [PATCH 117/196] [Misc] PR templates (#3413)

Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
---
 .github/PULL_REQUEST_TEMPLATE.md | 60 ++++++++++++++++++++++++++++++++
 CONTRIBUTING.md                  | 26 ++------------
 2 files changed, 62 insertions(+), 24 deletions(-)
 create mode 100644 .github/PULL_REQUEST_TEMPLATE.md

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000000000..46fda7eeef55e
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,60 @@
+<details>
+<!-- inside this <details> section, markdown rendering does not work, so we use raw html here. -->
+<summary><b> PR Checklist (Click to expand. Please read before submitting.) </b></summary>
+
+<p>Thank you for your contribution to vLLM! Before submitting the pull request, please ensure the PR meets the following criteria. This helps vLLM maintain the code quality and improve the efficiency of the review process.</p>
+
+<h3>PR Title and Classification</h3>
+<p>Only specific types of PRs will be reviewed. The PR title is prefixed appropriately to indicate the type of change. Please use one of the following:</p>
+<ul>
+    <li><code>[Bugfix]</code> for bug fixes.</li>
+    <li><code>[CI/Build]</code> for build or continuous integration improvements.</li>
+    <li><code>[Doc]</code> for documentation fixes and improvements.</li>
+    <li><code>[Model]</code> for adding a new model or improving an existing model. Model name should appear in the title.</li>
+    <li><code>[Frontend]</code> For changes on the vLLM frontend (e.g., OpenAI API server, <code>LLM</code> class, etc.) </li>
+    <li><code>[Kernel]</code> for changes affecting CUDA kernels or other compute kernels.</li>
+    <li><code>[Core]</code> for changes in the core vLLM logic (e.g., <code>LLMEngine</code>, <code>AsyncLLMEngine</code>, <code>Scheduler</code>, etc.)</li>
+    <li><code>[Hardware][Vendor]</code> for hardware-specific changes. Vendor name should appear in the prefix (e.g., <code>[Hardware][AMD]</code>).</li>
+    <li><code>[Misc]</code> for PRs that do not fit the above categories. Please use this sparingly.</li>
+</ul>
+<p><strong>Note:</strong> If the PR spans more than one category, please include all relevant prefixes.</p>
+
+<h3>Code Quality</h3>
+
+<p>The PR need to meet the following code quality standards:</p>
+
+<ul>
+    <li>We adhere to <a href="https://google.github.io/styleguide/pyguide.html">Google Python style guide</a> and <a href="https://google.github.io/styleguide/cppguide.html">Google C++ style guide</a>.</li>
+    <li>Pass all linter checks. Please use <a href="https://github.com/vllm-project/vllm/blob/main/format.sh"><code>format.sh</code></a> to format your code.</li>
+    <li>The code need to be well-documented to ensure future contributors can easily understand the code.</li>
+    <li>Include sufficient tests to ensure the project to stay correct and robust. This includes both unit tests and integration tests.</li>
+    <li>Please add documentation to <code>docs/source/</code> if the PR modifies the user-facing behaviors of vLLM. It helps vLLM user understand and utilize the new features or changes.</li>
+</ul>
+
+<h3>Notes for Large Changes</h3>
+<p>Please keep the changes as concise as possible. For major architectural changes (>500 LOC excluding kernel/data/config/test), we would expect a GitHub issue (RFC) discussing the technical design and justification. Otherwise, we will tag it with <code>rfc-required</code> and might not go through the PR.</p>
+
+<h3>What to Expect for the Reviews</h3>
+
+<p>The goal of the vLLM team is to be a <i>transparent reviewing machine</i>. We would like to make the review process transparent and efficient and make sure no contributor feel confused or frustrated. However, the vLLM team is small, so we need to prioritize some PRs over others. Here is what you can expect from the review process: </p>
+
+<ul>
+    <li> After the PR is submitted, the PR will be assigned to a reviewer. Every reviewer will pick up the PRs based on their expertise and availability.</li>
+    <li> After the PR is assigned, the reviewer will provide status update every 2-3 days. If the PR is not reviewed within 7 days, please feel free to ping the reviewer or the vLLM team.</li>
+    <li> After the review, the reviewer will put an <code> action-required</code> label on the PR if there are changes required. The contributor should address the comments and ping the reviewer to re-review the PR.</li>
+    <li> Please respond to all comments within a reasonable time frame. If a comment isn't clear or you disagree with a suggestion, feel free to ask for clarification or discuss the suggestion.
+ </li>
+</ul>
+
+<h3>Thank You</h3>
+
+<p> Finally, thank you for taking the time to read these guidelines and for your interest in contributing to vLLM. Your contributions make vLLM a great tool for everyone! </p>
+
+
+</details>
+
+---
+
+Please provide a brief explanation of the motivation behind the PR and the changes it introduces. This helps reviewers understand the context and rationale for the contribution. If possible, please link existing issues this PR will resolve.
+
+
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 93a4de73faa89..8db5e569b6aec 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -45,31 +45,9 @@ pytest tests/
 If you encounter a bug or have a feature request, please check our issues page first to see if someone else has already reported it.
 If not, please file a new issue, providing as much relevant information as possible.
 
-### Coding Style Guide
+### Pull Requests & Code Reviews
 
-In general, we adhere to [Google Python style guide](https://google.github.io/styleguide/pyguide.html) and [Google C++ style guide](https://google.github.io/styleguide/cppguide.html).
-
-We include a formatting script [`format.sh`](./format.sh) to format the code.
-
-### Pull Requests
-
-When submitting a pull request:
-
-1. Make sure your code has been rebased on top of the latest commit on the main branch.
-2. Ensure code is properly formatted by running [`format.sh`](./format.sh).
-3. Include a detailed description of the changes in the pull request.
-Explain why you made the changes you did.
-If your pull request fixes an open issue, please include a reference to it in the description.
-
-### Code Reviews
-
-All submissions, including submissions by project members, require a code review.
-To make the review process as smooth as possible, please:
-
-1. Keep your changes as concise as possible.
-If your pull request involves multiple unrelated changes, consider splitting it into separate pull requests.
-2. Respond to all comments within a reasonable time frame.
-If a comment isn't clear or you disagree with a suggestion, feel free to ask for clarification or discuss the suggestion.
+Please check the PR checklist in the [PR template](.github/PULL_REQUEST_TEMPLATE.md) for detailed guide for contribution.
 
 ### Thank You
 

From 3123f151387d2afa49eaf3130bcee3556f2e87d2 Mon Sep 17 00:00:00 2001
From: Tao He <sighingnow@gmail.com>
Date: Sat, 16 Mar 2024 11:58:10 +0800
Subject: [PATCH 118/196] Fixes the incorrect argument in the prefix-prefill
 test cases (#3246)

---
 tests/kernels/test_prefix_prefill.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index a0be658acac7b..4d051593f40a3 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -18,7 +18,7 @@
 
 
 @pytest.mark.parametrize("num_heads", NUM_HEADS)
-@pytest.mark.parametrize("num_queries_per_kv", NUM_HEADS)
+@pytest.mark.parametrize("num_queries_per_kv", NUM_QUERIES_PER_KV)
 @pytest.mark.parametrize("head_size", HEAD_SIZES)
 @pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
@@ -35,6 +35,13 @@ def test_contexted_kv_attention(
     if torch.cuda.is_available():
         torch.cuda.manual_seed(0)
     torch.set_default_device(device)
+
+    # Need this, otherwise when we capture the graph the process for GPU 1 would run on both
+    # GPU0 and GPU1 and things would hang
+    #
+    # see also similar issue: https://github.com/Dao-AILab/flash-attention/issues/523
+    torch.cuda.set_device(device)
+
     MAX_SEQ_LEN = 1024
     MAX_CTX_LEN = 1024
     BS = 10
@@ -172,5 +179,5 @@ def test_contexted_kv_attention(
     torch.cuda.synchronize()
     end_time = time.time()
     print(f"xformers Time: {(end_time - start_time)*1000:.2f} ms")
-    output_ref = output_ref.squeeze(0, 2)
+    output_ref = output_ref.reshape(output.shape)
     assert torch.allclose(output_ref, output, atol=1e-6, rtol=0)

From 14e3f9a1b2711336ca2e68235eb53bf1b49880c5 Mon Sep 17 00:00:00 2001
From: Ronen Schaffer <ronen.schaffer@ibm.com>
Date: Sat, 16 Mar 2024 06:01:30 +0200
Subject: [PATCH 119/196] Replace `lstrip()` with `removeprefix()` to fix Ruff
 linter warning (#2958)

---
 benchmarks/backend_request_func.py | 15 +++++++++++----
 pyproject.toml                     |  2 --
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index d7cac22ce7a99..51fb8d9e81ebc 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -64,7 +64,7 @@ async def async_request_tgi(
                             output.ttft = ttft
                     output.latency = time.perf_counter() - st
 
-                    body = data.decode("utf-8").lstrip("data:")
+                    body = remove_prefix(data.decode("utf-8"), "data:")
                     output.generated_text = json.loads(body)["generated_text"]
                     output.success = True
                 else:
@@ -158,7 +158,7 @@ async def async_request_trt_llm(
                             output.ttft = ttft
                     output.latency = time.perf_counter() - st
 
-                    body = data.decode("utf-8").lstrip("data:")
+                    body = remove_prefix(data.decode("utf-8"), "data:")
                     output.generated_text = json.loads(body)["text_output"]
                     output.success = True
 
@@ -255,7 +255,7 @@ async def async_request_openai_completions(
                         if not chunk:
                             continue
 
-                        chunk = chunk.decode("utf-8").lstrip("data: ")
+                        chunk = remove_prefix(chunk.decode("utf-8"), "data: ")
                         if chunk == "[DONE]":
                             latency = time.perf_counter() - st
                         else:
@@ -322,7 +322,7 @@ async def async_request_openai_chat_completions(
                         if not chunk:
                             continue
 
-                        chunk = chunk.decode("utf-8").lstrip("data: ")
+                        chunk = remove_prefix(chunk.decode("utf-8"), "data: ")
                         if chunk == "[DONE]":
                             latency = time.perf_counter() - st
                         else:
@@ -344,6 +344,13 @@ async def async_request_openai_chat_completions(
     return output
 
 
+# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix) introduced in Python 3.9
+def remove_prefix(text: str, prefix: str) -> str:
+    if text.startswith(prefix):
+        return text[len(prefix):]
+    return text
+
+
 ASYNC_REQUEST_FUNCS = {
     "tgi": async_request_tgi,
     "vllm": async_request_vllm,
diff --git a/pyproject.toml b/pyproject.toml
index d6fa5d7a035ff..e0a01215ef997 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,8 +33,6 @@ ignore = [
     "F405", "F403",
     # lambda expression assignment
     "E731",
-    # .strip() with multi-character strings
-    "B005",
     # Loop control variable not used within loop body
     "B007",
 ]

From cf6ff18246194c1197ce85028036a462ea9f9269 Mon Sep 17 00:00:00 2001
From: Dinghow Yang <DinghowYang@gmail.com>
Date: Sat, 16 Mar 2024 12:02:12 +0800
Subject: [PATCH 120/196] Fix Baichuan chat template (#3340)

---
 examples/template_baichuan.jinja | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/examples/template_baichuan.jinja b/examples/template_baichuan.jinja
index a1812a6c09ab1..42a8d9270a4c6 100644
--- a/examples/template_baichuan.jinja
+++ b/examples/template_baichuan.jinja
@@ -1,22 +1,13 @@
 {{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
 
-{% for message in messages %}
-{% if message['role'] == 'user' %}
-<reserved_106>
-{{ message['content']|trim -}}
-{% if not loop.last %}
-
-
-{% endif %}
-{% elif message['role'] == 'assistant' %}
-<reserved_107>
-{{ message['content']|trim -}}
-{% if not loop.last %}
-
-
-{% endif %}
-{% endif %}
-{% endfor %}
-{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}
-<reserved_107>
+{%- for message in messages -%}
+    {%- if message['role'] == 'user' -%}
+        {{- '<reserved_106>' + message['content'] -}}
+    {%- elif message['role'] == 'assistant' -%}
+        {{- '<reserved_107>' + message['content'] -}}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '<reserved_107>' -}}
 {% endif %}
\ No newline at end of file

From ad50bf4b25ba4344a560a7919fdc6ddb57c3d808 Mon Sep 17 00:00:00 2001
From: simon-mo <simon.mo@hey.com>
Date: Fri, 15 Mar 2024 22:23:38 -0700
Subject: [PATCH 121/196] fix lint

---
 .github/workflows/ruff.yml           | 2 +-
 tests/kernels/test_prefix_prefill.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index 8f8f5ee3cc70c..a3fc3b2fa647e 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -31,4 +31,4 @@ jobs:
         ruff vllm tests
     - name: Spelling check with codespell
       run: |
-         codespell --toml pyproject.toml
\ No newline at end of file
+        codespell --toml pyproject.toml
\ No newline at end of file
diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index 4d051593f40a3..2b35335a9c92b 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -36,8 +36,8 @@ def test_contexted_kv_attention(
         torch.cuda.manual_seed(0)
     torch.set_default_device(device)
 
-    # Need this, otherwise when we capture the graph the process for GPU 1 would run on both
-    # GPU0 and GPU1 and things would hang
+    # Need this, otherwise when we capture the graph the process for GPU 1 would
+    # run on both GPU0 and GPU1 and things would hang
     #
     # see also similar issue: https://github.com/Dao-AILab/flash-attention/issues/523
     torch.cuda.set_device(device)

From 8e67598aa6ea6ce37c4c8cb470412db0ea523573 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Sat, 16 Mar 2024 00:36:29 -0700
Subject: [PATCH 122/196] [Misc] fix line length for entire codebase (#3444)

---
 .github/workflows/ruff.yml                |   2 +-
 benchmarks/backend_request_func.py        |   8 +-
 benchmarks/benchmark_prefix_caching.py    |   2 +-
 benchmarks/benchmark_serving.py           |   6 +-
 collect_env.py                            | 181 +++++++++++++---------
 csrc/punica/bgmv/generator.py             |   2 +-
 examples/multilora_inference.py           |  69 +++++----
 examples/offline_inference_with_prefix.py |   7 +-
 setup.py                                  |  25 +--
 9 files changed, 174 insertions(+), 128 deletions(-)

diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index a3fc3b2fa647e..cd16cecf21546 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -28,7 +28,7 @@ jobs:
         pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1
     - name: Analysing the code with ruff
       run: |
-        ruff vllm tests
+        ruff .
     - name: Spelling check with codespell
       run: |
         codespell --toml pyproject.toml
\ No newline at end of file
diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
index 51fb8d9e81ebc..7e6f3c3ed4b6d 100644
--- a/benchmarks/backend_request_func.py
+++ b/benchmarks/backend_request_func.py
@@ -110,7 +110,7 @@ async def async_request_vllm(
                             output.ttft = ttft
                     output.latency = time.perf_counter() - st
 
-                    # When streaming, '\0' is appended to the end of the response.
+                    # When streaming, '\0' is appended to the end of response.
                     body = data.decode("utf-8").strip("\0")
                     output.generated_text = json.loads(
                         body)["text"][0][len(request_func_input.prompt):]
@@ -192,7 +192,8 @@ async def async_request_deepspeed_mii(
         output = RequestFuncOutput()
         output.prompt_len = request_func_input.prompt_len
 
-        # DeepSpeed-MII doesn't support streaming as of Jan 28 2024, will use 0 as placeholder.
+        # DeepSpeed-MII doesn't support streaming as of Jan 28 2024,
+        # will use 0 as placeholder.
         # https://github.com/microsoft/DeepSpeed-MII/pull/311
         output.ttft = 0
 
@@ -344,7 +345,8 @@ async def async_request_openai_chat_completions(
     return output
 
 
-# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix) introduced in Python 3.9
+# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix)
+# introduced in Python 3.9
 def remove_prefix(text: str, prefix: str) -> str:
     if text.startswith(prefix):
         return text[len(prefix):]
diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py
index a0307439cd5f1..546c61e847839 100644
--- a/benchmarks/benchmark_prefix_caching.py
+++ b/benchmarks/benchmark_prefix_caching.py
@@ -4,7 +4,7 @@
 from vllm import LLM
 from vllm import SamplingParams
 
-PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n"
+PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n"  # noqa: E501
 
 
 def test_prefix(llm=None, sampling_params=None, prompts=None):
diff --git a/benchmarks/benchmark_serving.py b/benchmarks/benchmark_serving.py
index 3f5e2d9c8f4dc..9404608b5554b 100644
--- a/benchmarks/benchmark_serving.py
+++ b/benchmarks/benchmark_serving.py
@@ -293,7 +293,9 @@ def main(args: argparse.Namespace):
 
         # Save to file
         base_model_id = model_id.split("/")[-1]
-        file_name = f"{backend}-{args.request_rate}qps-{base_model_id}-{current_dt}.json"
+        file_name = (
+            f"{backend}-{args.request_rate}qps-{base_model_id}-{current_dt}.json"
+        )
         with open(file_name, "w") as outfile:
             json.dump(result_json, outfile)
 
@@ -341,7 +343,7 @@ def main(args: argparse.Namespace):
         "--tokenizer",
         type=str,
         help=
-        "Name or path of the tokenizer, if not using the default model tokenizer.",
+        "Name or path of the tokenizer, if not using the default tokenizer.",
     )
     parser.add_argument(
         "--best-of",
diff --git a/collect_env.py b/collect_env.py
index a886db693e2f1..edcbfe73b38d0 100644
--- a/collect_env.py
+++ b/collect_env.py
@@ -1,3 +1,4 @@
+# ruff: noqa
 # code borrowed from https://github.com/pytorch/pytorch/blob/main/torch/utils/collect_env.py
 
 # Unlike the rest of the PyTorch this file must be python2 compliant.
@@ -11,7 +12,6 @@
 import os
 from collections import namedtuple
 
-
 try:
     import torch
     TORCH_AVAILABLE = True
@@ -19,38 +19,40 @@
     TORCH_AVAILABLE = False
 
 # System Environment Information
-SystemEnv = namedtuple('SystemEnv', [
-    'torch_version',
-    'is_debug_build',
-    'cuda_compiled_version',
-    'gcc_version',
-    'clang_version',
-    'cmake_version',
-    'os',
-    'libc_version',
-    'python_version',
-    'python_platform',
-    'is_cuda_available',
-    'cuda_runtime_version',
-    'cuda_module_loading',
-    'nvidia_driver_version',
-    'nvidia_gpu_models',
-    'cudnn_version',
-    'pip_version',  # 'pip' or 'pip3'
-    'pip_packages',
-    'conda_packages',
-    'hip_compiled_version',
-    'hip_runtime_version',
-    'miopen_runtime_version',
-    'caching_allocator_config',
-    'is_xnnpack_available',
-    'cpu_info',
-    'rocm_version',  # vllm specific field
-    'neuron_sdk_version', # vllm specific field
-    'vllm_version',  # vllm specific field
-    'vllm_build_flags',  # vllm specific field
-    'gpu_topo',  # vllm specific field
-])
+SystemEnv = namedtuple(
+    'SystemEnv',
+    [
+        'torch_version',
+        'is_debug_build',
+        'cuda_compiled_version',
+        'gcc_version',
+        'clang_version',
+        'cmake_version',
+        'os',
+        'libc_version',
+        'python_version',
+        'python_platform',
+        'is_cuda_available',
+        'cuda_runtime_version',
+        'cuda_module_loading',
+        'nvidia_driver_version',
+        'nvidia_gpu_models',
+        'cudnn_version',
+        'pip_version',  # 'pip' or 'pip3'
+        'pip_packages',
+        'conda_packages',
+        'hip_compiled_version',
+        'hip_runtime_version',
+        'miopen_runtime_version',
+        'caching_allocator_config',
+        'is_xnnpack_available',
+        'cpu_info',
+        'rocm_version',  # vllm specific field
+        'neuron_sdk_version',  # vllm specific field
+        'vllm_version',  # vllm specific field
+        'vllm_build_flags',  # vllm specific field
+        'gpu_topo',  # vllm specific field
+    ])
 
 DEFAULT_CONDA_PATTERNS = {
     "torch",
@@ -77,8 +79,10 @@
 def run(command):
     """Return (return-code, stdout, stderr)."""
     shell = True if type(command) is str else False
-    p = subprocess.Popen(command, stdout=subprocess.PIPE,
-                         stderr=subprocess.PIPE, shell=shell)
+    p = subprocess.Popen(command,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE,
+                         shell=shell)
     raw_output, raw_err = p.communicate()
     rc = p.returncode
     if get_platform() == 'win32':
@@ -108,6 +112,7 @@ def run_and_parse_first_match(run_lambda, command, regex):
         return None
     return match.group(1)
 
+
 def run_and_return_first_line(run_lambda, command):
     """Run command using run_lambda and returns first line if output is not empty."""
     rc, out, _ = run_lambda(command)
@@ -124,22 +129,23 @@ def get_conda_packages(run_lambda, patterns=None):
     if out is None:
         return out
 
-    return "\n".join(
-        line
-        for line in out.splitlines()
-        if not line.startswith("#")
-        and any(name in line for name in patterns)
-    )
+    return "\n".join(line for line in out.splitlines()
+                     if not line.startswith("#") and any(name in line
+                                                         for name in patterns))
+
 
 def get_gcc_version(run_lambda):
     return run_and_parse_first_match(run_lambda, 'gcc --version', r'gcc (.*)')
 
+
 def get_clang_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'clang --version', r'clang version (.*)')
+    return run_and_parse_first_match(run_lambda, 'clang --version',
+                                     r'clang version (.*)')
 
 
 def get_cmake_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'cmake --version', r'cmake (.*)')
+    return run_and_parse_first_match(run_lambda, 'cmake --version',
+                                     r'cmake (.*)')
 
 
 def get_nvidia_driver_version(run_lambda):
@@ -148,11 +154,13 @@ def get_nvidia_driver_version(run_lambda):
         return run_and_parse_first_match(run_lambda, cmd,
                                          r'com[.]nvidia[.]CUDA [(](.*?)[)]')
     smi = get_nvidia_smi()
-    return run_and_parse_first_match(run_lambda, smi, r'Driver Version: (.*?) ')
+    return run_and_parse_first_match(run_lambda, smi,
+                                     r'Driver Version: (.*?) ')
 
 
 def get_gpu_info(run_lambda):
-    if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(torch.version, 'hip') and torch.version.hip is not None):
+    if get_platform() == 'darwin' or (TORCH_AVAILABLE and hasattr(
+            torch.version, 'hip') and torch.version.hip is not None):
         if TORCH_AVAILABLE and torch.cuda.is_available():
             if torch.version.hip is not None:
                 prop = torch.cuda.get_device_properties(0)
@@ -174,7 +182,8 @@ def get_gpu_info(run_lambda):
 
 
 def get_running_cuda_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'nvcc --version', r'release .+ V(.*)')
+    return run_and_parse_first_match(run_lambda, 'nvcc --version',
+                                     r'release .+ V(.*)')
 
 
 def get_cudnn_version(run_lambda):
@@ -219,8 +228,10 @@ def get_nvidia_smi():
     smi = 'nvidia-smi'
     if get_platform() == 'win32':
         system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
-        program_files_root = os.environ.get('PROGRAMFILES', 'C:\\Program Files')
-        legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation', 'NVSMI', smi)
+        program_files_root = os.environ.get('PROGRAMFILES',
+                                            'C:\\Program Files')
+        legacy_path = os.path.join(program_files_root, 'NVIDIA Corporation',
+                                   'NVSMI', smi)
         new_path = os.path.join(system_root, 'System32', smi)
         smis = [new_path, legacy_path]
         for candidate_smi in smis:
@@ -232,7 +243,8 @@ def get_nvidia_smi():
 
 def get_rocm_version(run_lambda):
     """Returns the ROCm version if available, otherwise 'N/A'."""
-    return run_and_parse_first_match(run_lambda, 'hipcc --version', r'HIP version: (\S+)')
+    return run_and_parse_first_match(run_lambda, 'hipcc --version',
+                                     r'HIP version: (\S+)')
 
 
 def get_neuron_sdk_version(run_lambda):
@@ -342,13 +354,16 @@ def get_gpu_topo(run_lambda):
 #    ProcessorType=3
 #    Revision=27142
 
+
 def get_cpu_info(run_lambda):
     rc, out, err = 0, '', ''
     if get_platform() == 'linux':
         rc, out, err = run_lambda('lscpu')
     elif get_platform() == 'win32':
-        rc, out, err = run_lambda('wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
-        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE')
+        rc, out, err = run_lambda(
+            'wmic cpu get Name,Manufacturer,Family,Architecture,ProcessorType,DeviceID, \
+        CurrentClockSpeed,MaxClockSpeed,L2CacheSize,L2CacheSpeed,Revision /VALUE'
+        )
     elif get_platform() == 'darwin':
         rc, out, err = run_lambda("sysctl -n machdep.cpu.brand_string")
     cpu_info = 'None'
@@ -373,18 +388,22 @@ def get_platform():
 
 
 def get_mac_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion', r'(.*)')
+    return run_and_parse_first_match(run_lambda, 'sw_vers -productVersion',
+                                     r'(.*)')
 
 
 def get_windows_version(run_lambda):
     system_root = os.environ.get('SYSTEMROOT', 'C:\\Windows')
     wmic_cmd = os.path.join(system_root, 'System32', 'Wbem', 'wmic')
     findstr_cmd = os.path.join(system_root, 'System32', 'findstr')
-    return run_and_read_all(run_lambda, '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
+    return run_and_read_all(
+        run_lambda,
+        '{} os get Caption | {} /v Caption'.format(wmic_cmd, findstr_cmd))
 
 
 def get_lsb_version(run_lambda):
-    return run_and_parse_first_match(run_lambda, 'lsb_release -a', r'Description:\t(.*)')
+    return run_and_parse_first_match(run_lambda, 'lsb_release -a',
+                                     r'Description:\t(.*)')
 
 
 def check_release_file(run_lambda):
@@ -443,11 +462,8 @@ def get_pip_packages(run_lambda, patterns=None):
     # But here it is invoked as `python -mpip`
     def run_with_pip(pip):
         out = run_and_read_all(run_lambda, pip + ["list", "--format=freeze"])
-        return "\n".join(
-            line
-            for line in out.splitlines()
-            if any(name in line for name in patterns)
-        )
+        return "\n".join(line for line in out.splitlines()
+                         if any(name in line for name in patterns))
 
     pip_version = 'pip3' if sys.version[0] == '3' else 'pip'
     out = run_with_pip([sys.executable, '-mpip'])
@@ -472,10 +488,12 @@ def get_cuda_module_loading_config():
 def is_xnnpack_available():
     if TORCH_AVAILABLE:
         import torch.backends.xnnpack
-        return str(torch.backends.xnnpack.enabled)  # type: ignore[attr-defined]
+        return str(
+            torch.backends.xnnpack.enabled)  # type: ignore[attr-defined]
     else:
         return "N/A"
 
+
 def get_env_info():
     run_lambda = run
     pip_version, pip_list_output = get_pip_packages(run_lambda)
@@ -485,9 +503,11 @@ def get_env_info():
         debug_mode_str = str(torch.version.debug)
         cuda_available_str = str(torch.cuda.is_available())
         cuda_version_str = torch.version.cuda
-        if not hasattr(torch.version, 'hip') or torch.version.hip is None:  # cuda version
+        if not hasattr(torch.version,
+                       'hip') or torch.version.hip is None:  # cuda version
             hip_compiled_version = hip_runtime_version = miopen_runtime_version = 'N/A'
         else:  # HIP version
+
             def get_version_or_na(cfg, prefix):
                 _lst = [s.rsplit(None, 1)[-1] for s in cfg if prefix in s]
                 return _lst[0] if _lst else 'N/A'
@@ -514,7 +534,9 @@ def get_version_or_na(cfg, prefix):
     return SystemEnv(
         torch_version=version_str,
         is_debug_build=debug_mode_str,
-        python_version='{} ({}-bit runtime)'.format(sys_version, sys.maxsize.bit_length() + 1),
+        python_version='{} ({}-bit runtime)'.format(
+            sys_version,
+            sys.maxsize.bit_length() + 1),
         python_platform=get_python_platform(),
         is_cuda_available=cuda_available_str,
         cuda_compiled_version=cuda_version_str,
@@ -544,6 +566,7 @@ def get_version_or_na(cfg, prefix):
         gpu_topo=gpu_topo,
     )
 
+
 env_info_fmt = """
 PyTorch version: {torch_version}
 Is debug build: {is_debug_build}
@@ -588,6 +611,7 @@ def get_version_or_na(cfg, prefix):
 
 
 def pretty_str(envinfo):
+
     def replace_nones(dct, replacement='Could not collect'):
         for key in dct.keys():
             if dct[key] is not None:
@@ -632,9 +656,10 @@ def maybe_start_on_next_line(string):
         'nvidia_driver_version',
     ]
     all_cuda_fields = dynamic_cuda_fields + ['cudnn_version']
-    all_dynamic_cuda_fields_missing = all(
-        mutable_dict[field] is None for field in dynamic_cuda_fields)
-    if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing:
+    all_dynamic_cuda_fields_missing = all(mutable_dict[field] is None
+                                          for field in dynamic_cuda_fields)
+    if TORCH_AVAILABLE and not torch.cuda.is_available(
+    ) and all_dynamic_cuda_fields_missing:
         for field in all_cuda_fields:
             mutable_dict[field] = 'No CUDA'
         if envinfo.cuda_compiled_version is None:
@@ -647,17 +672,19 @@ def maybe_start_on_next_line(string):
     mutable_dict = replace_nones(mutable_dict)
 
     # If either of these are '', replace with 'No relevant packages'
-    mutable_dict['pip_packages'] = replace_if_empty(mutable_dict['pip_packages'])
-    mutable_dict['conda_packages'] = replace_if_empty(mutable_dict['conda_packages'])
+    mutable_dict['pip_packages'] = replace_if_empty(
+        mutable_dict['pip_packages'])
+    mutable_dict['conda_packages'] = replace_if_empty(
+        mutable_dict['conda_packages'])
 
     # Tag conda and pip packages with a prefix
     # If they were previously None, they'll show up as ie '[conda] Could not collect'
     if mutable_dict['pip_packages']:
-        mutable_dict['pip_packages'] = prepend(mutable_dict['pip_packages'],
-                                               '[{}] '.format(envinfo.pip_version))
+        mutable_dict['pip_packages'] = prepend(
+            mutable_dict['pip_packages'], '[{}] '.format(envinfo.pip_version))
     if mutable_dict['conda_packages']:
-        mutable_dict['conda_packages'] = prepend(mutable_dict['conda_packages'],
-                                                 '[conda] ')
+        mutable_dict['conda_packages'] = prepend(
+            mutable_dict['conda_packages'], '[conda] ')
     mutable_dict['cpu_info'] = envinfo.cpu_info
     return env_info_fmt.format(**mutable_dict)
 
@@ -671,18 +698,22 @@ def main():
     output = get_pretty_env_info()
     print(output)
 
-    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(torch.utils, '_crash_handler'):
+    if TORCH_AVAILABLE and hasattr(torch, 'utils') and hasattr(
+            torch.utils, '_crash_handler'):
         minidump_dir = torch.utils._crash_handler.DEFAULT_MINIDUMP_DIR
         if sys.platform == "linux" and os.path.exists(minidump_dir):
-            dumps = [os.path.join(minidump_dir, dump) for dump in os.listdir(minidump_dir)]
+            dumps = [
+                os.path.join(minidump_dir, dump)
+                for dump in os.listdir(minidump_dir)
+            ]
             latest = max(dumps, key=os.path.getctime)
             ctime = os.path.getctime(latest)
-            creation_time = datetime.datetime.fromtimestamp(ctime).strftime('%Y-%m-%d %H:%M:%S')
+            creation_time = datetime.datetime.fromtimestamp(ctime).strftime(
+                '%Y-%m-%d %H:%M:%S')
             msg = "\n*** Detected a minidump at {} created on {}, ".format(latest, creation_time) + \
                   "if this is related to your bug please include it when you file a report ***"
             print(msg, file=sys.stderr)
 
 
-
 if __name__ == '__main__':
     main()
diff --git a/csrc/punica/bgmv/generator.py b/csrc/punica/bgmv/generator.py
index 66de56d74f3e7..c347d4f2ab9f4 100644
--- a/csrc/punica/bgmv/generator.py
+++ b/csrc/punica/bgmv/generator.py
@@ -10,7 +10,7 @@
 #include "bgmv_impl.cuh"
 
 FOR_BGMV_WIDE_NARROW(INST_BGMV_TWOSIDE, {input_dtype}, {output_dtype}, {weight_dtype})
-""".lstrip()
+""".lstrip()  # noqa: E501
 
 for input_dtype in DTYPES:
     for output_dtype in DTYPES:
diff --git a/examples/multilora_inference.py b/examples/multilora_inference.py
index cd4451481ca83..9f28e16cf667a 100644
--- a/examples/multilora_inference.py
+++ b/examples/multilora_inference.py
@@ -1,5 +1,6 @@
 """
-This example shows how to use the multi-LoRA functionality for offline inference.
+This example shows how to use the multi-LoRA functionality
+for offline inference.
 
 Requires HuggingFace credentials for access to Llama2.
 """
@@ -16,7 +17,7 @@ def create_test_prompts(
         lora_path: str
 ) -> List[Tuple[str, SamplingParams, Optional[LoRARequest]]]:
     """Create a list of test prompts with their sampling parameters.
-    
+
     2 requests for base model, 4 requests for the LoRA. We define 2
     different LoRA adapters (using the same model for demo purposes).
     Since we also set `max_loras=1`, the expectation is that the requests
@@ -34,36 +35,40 @@ def create_test_prompts(
                         top_k=5,
                         presence_penalty=0.2,
                         max_tokens=128), None),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",
-         SamplingParams(temperature=0.0,
-                        logprobs=1,
-                        prompt_logprobs=1,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora", 1, lora_path)),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",
-         SamplingParams(n=3,
-                        best_of=3,
-                        use_beam_search=True,
-                        temperature=0,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora", 1, lora_path)),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",
-         SamplingParams(temperature=0.0,
-                        logprobs=1,
-                        prompt_logprobs=1,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora2", 2, lora_path)),
-        ("[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",
-         SamplingParams(n=3,
-                        best_of=3,
-                        use_beam_search=True,
-                        temperature=0,
-                        max_tokens=128,
-                        stop_token_ids=[32003]),
-         LoRARequest("sql-lora", 1, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",  # noqa: E501
+            SamplingParams(temperature=0.0,
+                           logprobs=1,
+                           prompt_logprobs=1,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora", 1, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",  # noqa: E501
+            SamplingParams(n=3,
+                           best_of=3,
+                           use_beam_search=True,
+                           temperature=0,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora", 1, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_74 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]",  # noqa: E501
+            SamplingParams(temperature=0.0,
+                           logprobs=1,
+                           prompt_logprobs=1,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora2", 2, lora_path)),
+        (
+            "[user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? [/user] [assistant]",  # noqa: E501
+            SamplingParams(n=3,
+                           best_of=3,
+                           use_beam_search=True,
+                           temperature=0,
+                           max_tokens=128,
+                           stop_token_ids=[32003]),
+            LoRARequest("sql-lora", 1, lora_path)),
     ]
 
 
diff --git a/examples/offline_inference_with_prefix.py b/examples/offline_inference_with_prefix.py
index 1aa718b88907c..fbfb384fd4282 100644
--- a/examples/offline_inference_with_prefix.py
+++ b/examples/offline_inference_with_prefix.py
@@ -37,9 +37,10 @@
 
 print("-" * 80)
 
-# The llm.generate call will batch all prompts and send the batch at once if resources allow.
-# The prefix will only be cached after the first batch is processed, so we need to call generate once
-# to calculate the prefix and cache it.
+# The llm.generate call will batch all prompts and send the batch at once
+# if resources allow. The prefix will only be cached after the first batch
+# is processed, so we need to call generate once to calculate the prefix
+# and cache it.
 outputs = llm.generate(generating_prompts[0], sampling_params)
 
 # Subsequent batches can leverage the cached prefix
diff --git a/setup.py b/setup.py
index 4e2bb2ce851f8..a7307949e9418 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,12 @@
 import sys
 import torch
 import torch.utils.cpp_extension as torch_cpp_ext
-from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME, ROCM_HOME
+from torch.utils.cpp_extension import (
+    BuildExtension,
+    CUDAExtension,
+    CUDA_HOME,
+    ROCM_HOME,
+)
 
 ROOT_DIR = os.path.dirname(__file__)
 
@@ -57,9 +62,8 @@ def _is_cuda() -> bool:
 
 if _is_hip():
     if ROCM_HOME is None:
-        raise RuntimeError(
-            "Cannot find ROCM_HOME. ROCm must be available to build the package."
-        )
+        raise RuntimeError("Cannot find ROCM_HOME. "
+                           "ROCm must be available to build the package.")
     NVCC_FLAGS += ["-DUSE_ROCM"]
     NVCC_FLAGS += ["-U__HIP_NO_HALF_CONVERSIONS__"]
     NVCC_FLAGS += ["-U__HIP_NO_HALF_OPERATORS__"]
@@ -144,7 +148,8 @@ def get_pytorch_rocm_arch() -> Set[str]:
     """
     env_arch_list = os.environ.get("PYTORCH_ROCM_ARCH", None)
 
-    # If we don't have PYTORCH_ROCM_ARCH specified pull the list from rocm_agent_enumerator
+    # If we don't have PYTORCH_ROCM_ARCH specified pull the list from
+    # rocm_agent_enumerator
     if env_arch_list is None:
         command = "rocm_agent_enumerator"
         env_arch_list = (subprocess.check_output(
@@ -255,11 +260,11 @@ def get_torch_arch_list() -> Set[str]:
             "CUDA 11.1 or higher is required for compute capability 8.6.")
     if nvcc_cuda_version < Version("11.8"):
         if any(cc.startswith("8.9") for cc in compute_capabilities):
-            # CUDA 11.8 is required to generate the code targeting compute capability 8.9.
-            # However, GPUs with compute capability 8.9 can also run the code generated by
-            # the previous versions of CUDA 11 and targeting compute capability 8.0.
-            # Therefore, if CUDA 11.8 is not available, we target compute capability 8.0
-            # instead of 8.9.
+            # CUDA 11.8 is required to generate the code targeting compute
+            # capability 8.9. However, GPUs with compute capability 8.9 can
+            # also run the code generated by the previous versions of CUDA 11
+            # and targeting compute capability 8.0. Therefore, if CUDA 11.8
+            # is not available, we target compute capability 8.0 instead of 8.9.
             warnings.warn(
                 "CUDA 11.8 or higher is required for compute capability 8.9. "
                 "Targeting compute capability 8.0 instead.",

From 120157fd2a256faf9e4d9941aa580c195735b878 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Sat, 16 Mar 2024 13:35:27 -0700
Subject: [PATCH 123/196] Support arbitrary json_object in OpenAI and Context
 Free Grammar (#3211)

---
 tests/entrypoints/test_openai_server.py       |  50 ++++++++
 vllm/entrypoints/openai/protocol.py           |   9 ++
 vllm/model_executor/guided_decoding.py        |  54 +++++++--
 .../guided_logits_processors.py               | 112 ++++++++++++------
 4 files changed, 176 insertions(+), 49 deletions(-)

diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py
index a5b2bf4c0f0c9..86d9a85af80b1 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -660,5 +660,55 @@ async def test_guided_decoding_type_error(server, client: openai.AsyncOpenAI):
             extra_body=dict(guided_regex=TEST_REGEX, guided_json=TEST_SCHEMA))
 
 
+async def test_response_format_json_object(server, client: openai.AsyncOpenAI):
+    resp = await client.chat.completions.create(
+        model=MODEL_NAME,
+        messages=[{
+            "role":
+            "user",
+            "content": ('what is 1+1? please respond with a JSON object, '
+                        'the format is {"result": 2}')
+        }],
+        response_format={"type": "json_object"})
+
+    content = resp.choices[0].message.content
+    loaded = json.loads(content)
+    assert loaded == {"result": 2}, loaded
+
+
+async def test_guided_grammar(server, client: openai.AsyncOpenAI):
+    simple_sql_grammar = """
+start: select_statement
+
+select_statement: "SELECT" column "from" table "where" condition
+
+column: "col_1" | "col_2"
+table: "table_1" | "table_2"
+condition: column "=" number
+
+number: "1" | "2"
+"""
+
+    completion = await client.completions.create(
+        model=MODEL_NAME,
+        prompt=("Generate a sql state that select col_1 from "
+                "table_1 where it is equals to 1"),
+        temperature=1.0,
+        max_tokens=500,
+        extra_body=dict(guided_grammar=simple_sql_grammar))
+
+    content = completion.choices[0].text
+
+    # use Lark to parse the output, and make sure it's a valid parse tree
+    from lark import Lark
+    parser = Lark(simple_sql_grammar)
+    parser.parse(content)
+
+    # remove spaces for comparison b/c we removed them in the grammar
+    ground_truth = "SELECT col_1 from table_1 where col_1 = 1".replace(" ", "")
+
+    assert content.strip() == ground_truth
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 26499b8d7a66f..9421880411611 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -55,6 +55,11 @@ class UsageInfo(BaseModel):
     completion_tokens: Optional[int] = 0
 
 
+class ResponseFormat(BaseModel):
+    # type must be "json_object" or "text"
+    type: str = Literal["text", "json_object"]
+
+
 class ChatCompletionRequest(BaseModel):
     model: str
     messages: List[Dict[str, str]]
@@ -89,6 +94,8 @@ class ChatCompletionRequest(BaseModel):
     guided_json: Optional[Union[str, dict, BaseModel]] = None
     guided_regex: Optional[str] = None
     guided_choice: Optional[List[str]] = None
+    guided_grammar: Optional[str] = None
+    response_format: Optional[ResponseFormat] = None
 
     def to_sampling_params(self) -> SamplingParams:
         if self.logprobs and not self.top_logprobs:
@@ -183,6 +190,8 @@ class CompletionRequest(BaseModel):
     guided_json: Optional[Union[str, dict, BaseModel]] = None
     guided_regex: Optional[str] = None
     guided_choice: Optional[List[str]] = None
+    guided_grammar: Optional[str] = None
+    response_format: Optional[ResponseFormat] = None
 
     def to_sampling_params(self):
         echo_without_generation = self.echo and self.max_tokens == 0
diff --git a/vllm/model_executor/guided_decoding.py b/vllm/model_executor/guided_decoding.py
index 00984460d79a6..bd09cf9cb6ee3 100644
--- a/vllm/model_executor/guided_decoding.py
+++ b/vllm/model_executor/guided_decoding.py
@@ -6,19 +6,50 @@
 from json import dumps as json_dumps
 from re import escape as regex_escape
 from typing import Union, Tuple
+
 from pydantic import BaseModel
+from transformers import PreTrainedTokenizerBase
 
 from vllm.entrypoints.openai.protocol import (CompletionRequest,
                                               ChatCompletionRequest)
 from vllm.model_executor.guided_logits_processors import (JSONLogitsProcessor,
-                                                          RegexLogitsProcessor)
+                                                          RegexLogitsProcessor,
+                                                          CFGLogitsProcessor)
 
 
 class GuidedDecodingMode(Enum):
     JSON = "json"
     REGEX = "regex"
     CHOICE = "choice"
+    GRAMMAR = "grammar"
+
+
+# https://github.com/outlines-dev/outlines/blob/main/outlines/grammars/json.lark
+# the main difference is that we changed the start: value to
+# start: object | array, so we are denying scalar values as the root of the
+# JSON. Starting with scalars as the root seems to cause llama to generate
+# without stop.
+JSON_GRAMMAR = r"""
+?start: object | array
+
+?value: object
+| array
+| UNESCAPED_STRING
+| SIGNED_NUMBER      -> number
+| "true"             -> true
+| "false"            -> false
+| "null"             -> null
+
+array  : "[" [value ("," value)*] "]"
+object : "{" [pair ("," pair)*] "}"
+pair   : UNESCAPED_STRING ":" value
+
+%import common.UNESCAPED_STRING
+%import common.SIGNED_NUMBER
+%import common.WS
 
+%ignore WS
+"""
 
 global_thread_pool = None  # used for generating logits processor fsm
 
@@ -57,9 +88,6 @@ def _get_guide_and_mode(
 ) -> Tuple[str, GuidedDecodingMode]:
 
     if request.guided_json:
-        if not isinstance(request.guided_json, (str, dict, BaseModel)):
-            raise TypeError("JSON schema must be str, dict, or BaseModel")
-
         json = request.guided_json
         if isinstance(json, dict):
             # turn dict into hashable string
@@ -69,33 +97,33 @@ def _get_guide_and_mode(
             # with the same fields will get hashed the same
             json = str(json.__signature__)
         return json, GuidedDecodingMode.JSON
-
     elif request.guided_regex:
-        if not isinstance(request.guided_regex, str):
-            raise TypeError("Regex must be string")
         return request.guided_regex, GuidedDecodingMode.REGEX
-
     elif request.guided_choice:
-        if not isinstance(request.guided_choice, list):
-            raise TypeError("Choices must be a list")
-
         # choice just uses regex
         choices = [
             regex_escape(str(choice)) for choice in request.guided_choice
         ]
         choices_regex = "(" + "|".join(choices) + ")"
         return choices_regex, GuidedDecodingMode.CHOICE
-
+    elif request.guided_grammar:
+        return request.guided_grammar, GuidedDecodingMode.GRAMMAR
+    elif (request.response_format is not None
+          and request.response_format.type == "json_object"):
+        return JSON_GRAMMAR, GuidedDecodingMode.GRAMMAR
     else:
         return None, None
 
 
 @lru_cache(maxsize=32)
-def _get_cached_logits_processor(guide: str, tokenizer,
+def _get_cached_logits_processor(guide: str,
+                                 tokenizer: PreTrainedTokenizerBase,
                                  mode: GuidedDecodingMode):
     if mode == GuidedDecodingMode.JSON:
         return JSONLogitsProcessor(guide, tokenizer)
     elif mode == GuidedDecodingMode.REGEX or mode == GuidedDecodingMode.CHOICE:
         return RegexLogitsProcessor(guide, tokenizer)
+    elif mode == GuidedDecodingMode.GRAMMAR:
+        return CFGLogitsProcessor(guide, tokenizer)
     else:
         raise ValueError(f"Unknown guided decoding mode {mode}")
diff --git a/vllm/model_executor/guided_logits_processors.py b/vllm/model_executor/guided_logits_processors.py
index 76d41aa37dd7b..2cd1ae1571065 100644
--- a/vllm/model_executor/guided_logits_processors.py
+++ b/vllm/model_executor/guided_logits_processors.py
@@ -16,30 +16,60 @@
 import json
 import math
 from collections import defaultdict
-from typing import Union, DefaultDict, Dict, List, Optional
+from typing import Union, DefaultDict, Dict, List, Optional, Callable
 
 import torch
 from pydantic import BaseModel
-from outlines.fsm.fsm import RegexFSM
+from transformers import PreTrainedTokenizerBase
+from outlines.fsm.fsm import RegexFSM, CFGFSM
 from outlines.fsm.json_schema import build_regex_from_schema
 
 
-class RegexLogitsProcessor:
+class BaseLogitsProcessor:
 
-    def __init__(self, regex_string: str, tokenizer):
-        """Compile the FSM that drives the regex-structured generation.
+    def adapt_tokenizer(self, tokenizer: PreTrainedTokenizerBase):
+        """Adapt vLLM's tokenizer to use to compile the FSM.
 
-        Parameters
-        ----------
-        regex_string
-            A string that represents a regular expression
-        tokenizer
-            The model's tokenizer
+        The API of Outlines tokenizers is slightly different to that of
+        `transformers`. The decoder of outlines, returns a list whereas
+        the decode of vLLM returns an str. To sync the vLLM decoder with
+        outlines internal api, the decoder should be adapted. In addition
+        we need to handle the missing spaces to Llama's tokenizer to be
+        able to compile FSMs for this model.
 
         """
-        tokenizer = self.adapt_tokenizer(tokenizer)
-        fsm = RegexFSM(regex_string, tokenizer)
-        self.fsm = fsm
+        if getattr(tokenizer, "_outlines_adapted", False):
+            return tokenizer
+
+        tokenizer.vocabulary = tokenizer.get_vocab()
+        tokenizer.special_tokens = set(tokenizer.all_special_tokens)
+
+        def convert_token_to_string(token: str) -> str:
+            from transformers.file_utils import SPIECE_UNDERLINE
+
+            string = tokenizer.convert_tokens_to_string([token])
+
+            # A hack to handle missing spaces to HF's Llama tokenizers
+            if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
+                return " " + string
+
+            return string
+
+        def change_decoder(
+            decoder: Callable[[List[int]], str]
+        ) -> Callable[[List[int]], List[str]]:
+            """Sync vLLM's decoder with the outlines by returning list."""
+
+            def new_decoder(inp_tokens: List[int]) -> List[str]:
+                return [decoder(inp_tokens)]
+
+            return new_decoder
+
+        tokenizer.convert_token_to_string = convert_token_to_string
+        tokenizer.decode = change_decoder(tokenizer.decode)
+        setattr(tokenizer, "_outlines_adapted", True)  # noqa: B010
+
+        return tokenizer
 
     def init_state(self):
         """Initialize the FSM states."""
@@ -69,38 +99,30 @@ def __call__(self, input_ids: List[int],
 
         return scores
 
-    def adapt_tokenizer(self, tokenizer):
-        """Adapt vLLM's tokenizer to use to compile the FSM.
-
-        The API of Outlines tokenizers is slightly different to that of
-        `transformers`. In addition we need to handle the missing spaces to
-        Llama's tokenizer to be able to compile FSMs for this model.
-
-        """
-        tokenizer.vocabulary = tokenizer.get_vocab()
-        tokenizer.special_tokens = set(tokenizer.all_special_tokens)
-
-        def convert_token_to_string(token: str) -> str:
-            from transformers.file_utils import SPIECE_UNDERLINE
 
-            string = tokenizer.convert_tokens_to_string([token])
+class RegexLogitsProcessor(BaseLogitsProcessor):
 
-            # A hack to handle missing spaces to HF's Llama tokenizers
-            if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
-                return " " + string
-
-            return string
+    def __init__(self, regex_string: str, tokenizer: PreTrainedTokenizerBase):
+        """Compile the FSM that drives the regex-structured generation.
 
-        tokenizer.convert_token_to_string = convert_token_to_string
+        Parameters
+        ----------
+        regex_string
+            A string that represents a regular expression
+        tokenizer
+            The model's tokenizer
 
-        return tokenizer
+        """
+        tokenizer = self.adapt_tokenizer(tokenizer)
+        fsm = RegexFSM(regex_string, tokenizer)
+        self.fsm = fsm
 
 
 class JSONLogitsProcessor(RegexLogitsProcessor):
 
     def __init__(self,
                  schema: Union[str, Dict, BaseModel],
-                 tokenizer,
+                 tokenizer: PreTrainedTokenizerBase,
                  whitespace_pattern: Optional[str] = None):
         """Compile the FSM that drives the JSON-guided generation.
 
@@ -130,3 +152,21 @@ def __init__(self,
                 f"the JSON Schema specification")
         regex_string = build_regex_from_schema(schema_str, whitespace_pattern)
         super().__init__(regex_string, tokenizer)
+
+
+class CFGLogitsProcessor(BaseLogitsProcessor):
+
+    def __init__(self, cfg: str, tokenizer: PreTrainedTokenizerBase):
+        """Compile the FSM that drives the context free grammar generation.
+
+        Parameters
+        ----------
+        cfg
+            A string that represents a context-free grammar
+        tokenizer
+            The model's tokenizer
+
+        """
+        tokenizer = self.adapt_tokenizer(tokenizer)
+        fsm = CFGFSM(cfg, tokenizer)
+        self.fsm = fsm

From 6b78837b29b5045a71e6ecfa68442b1f4fd2d0a6 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Sat, 16 Mar 2024 16:00:25 -0700
Subject: [PATCH 124/196] Fix setup.py neuron-ls issue (#2671)

---
 setup.py | 43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/setup.py b/setup.py
index a7307949e9418..0531e1f01d33f 100644
--- a/setup.py
+++ b/setup.py
@@ -2,6 +2,7 @@
 import io
 import os
 import re
+import shutil
 import subprocess
 import warnings
 from pathlib import Path
@@ -38,6 +39,10 @@
 # SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
 
 
+def _is_cuda() -> bool:
+    return torch.version.cuda is not None
+
+
 def _is_hip() -> bool:
     return torch.version.hip is not None
 
@@ -46,15 +51,11 @@ def _is_neuron() -> bool:
     torch_neuronx_installed = True
     try:
         subprocess.run(["neuron-ls"], capture_output=True, check=True)
-    except (FileNotFoundError, PermissionError):
+    except (FileNotFoundError, PermissionError, subprocess.CalledProcessError):
         torch_neuronx_installed = False
     return torch_neuronx_installed
 
 
-def _is_cuda() -> bool:
-    return (torch.version.cuda is not None) and not _is_neuron()
-
-
 # Compiler flags.
 CXX_FLAGS = ["-g", "-O2", "-std=c++17"]
 # TODO(woosuk): Should we use -O3?
@@ -400,7 +401,12 @@ def find_version(filepath: str) -> str:
 def get_vllm_version() -> str:
     version = find_version(get_path("vllm", "__init__.py"))
 
-    if _is_hip():
+    if _is_cuda():
+        cuda_version = str(nvcc_cuda_version)
+        if cuda_version != MAIN_CUDA_VERSION:
+            cuda_version_str = cuda_version.replace(".", "")[:3]
+            version += f"+cu{cuda_version_str}"
+    elif _is_hip():
         # Get the HIP version
         hipcc_version = get_hipcc_rocm_version()
         if hipcc_version != MAIN_CUDA_VERSION:
@@ -412,13 +418,8 @@ def get_vllm_version() -> str:
         if neuron_version != MAIN_CUDA_VERSION:
             neuron_version_str = neuron_version.replace(".", "")[:3]
             version += f"+neuron{neuron_version_str}"
-    elif _is_cuda():
-        cuda_version = str(nvcc_cuda_version)
-        if cuda_version != MAIN_CUDA_VERSION:
-            cuda_version_str = cuda_version.replace(".", "")[:3]
-            version += f"+cu{cuda_version_str}"
     else:
-        raise RuntimeError("Unknown runtime environment.")
+        raise RuntimeError("Unknown runtime environment")
 
     return version
 
@@ -434,13 +435,7 @@ def read_readme() -> str:
 
 def get_requirements() -> List[str]:
     """Get Python package dependencies from requirements.txt."""
-    if _is_hip():
-        with open(get_path("requirements-rocm.txt")) as f:
-            requirements = f.read().strip().split("\n")
-    elif _is_neuron():
-        with open(get_path("requirements-neuron.txt")) as f:
-            requirements = f.read().strip().split("\n")
-    else:
+    if _is_cuda():
         with open(get_path("requirements.txt")) as f:
             requirements = f.read().strip().split("\n")
         if nvcc_cuda_version <= Version("11.8"):
@@ -449,6 +444,16 @@ def get_requirements() -> List[str]:
                 if requirements[i].startswith("cupy-cuda12x"):
                     requirements[i] = "cupy-cuda11x"
                     break
+    elif _is_hip():
+        with open(get_path("requirements-rocm.txt")) as f:
+            requirements = f.read().strip().split("\n")
+    elif _is_neuron():
+        with open(get_path("requirements-neuron.txt")) as f:
+            requirements = f.read().strip().split("\n")
+    else:
+        raise ValueError(
+            "Unsupported platform, please use CUDA, ROCM or Neuron.")
+
     return requirements
 
 

From abfc4f3387c436d46d6701e9ba916de8f9ed9329 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Sun, 17 Mar 2024 03:02:46 -0700
Subject: [PATCH 125/196] [Misc] Use dataclass for InputMetadata (#3452)

Co-authored-by: youkaichao <youkaichao@126.com>
---
 setup.py                              |  1 -
 vllm/model_executor/input_metadata.py | 49 ++++++++-------------------
 vllm/worker/model_runner.py           | 37 ++++++--------------
 3 files changed, 24 insertions(+), 63 deletions(-)

diff --git a/setup.py b/setup.py
index 0531e1f01d33f..6f1f2faf54dbc 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,6 @@
 import io
 import os
 import re
-import shutil
 import subprocess
 import warnings
 from pathlib import Path
diff --git a/vllm/model_executor/input_metadata.py b/vllm/model_executor/input_metadata.py
index f0a88ac8e27f8..ebba0ba0a261a 100644
--- a/vllm/model_executor/input_metadata.py
+++ b/vllm/model_executor/input_metadata.py
@@ -1,8 +1,10 @@
+from dataclasses import dataclass
 from typing import Optional
 
 import torch
 
 
+@dataclass
 class InputMetadata:
     """Metadata for input sequences. Used in PagedAttention.
 
@@ -15,40 +17,17 @@ class InputMetadata:
         kv_cache_dtype: Data type to store kv cache.
     """
 
-    def __init__(
-        self,
-        is_prompt: bool,
-        slot_mapping: torch.Tensor,
-        prompt_lens: Optional[torch.Tensor],
-        max_seq_len: Optional[int],
-        start_loc: Optional[torch.Tensor],
-        max_context_len: Optional[int],
-        context_lens: Optional[torch.Tensor],
-        block_tables: Optional[torch.Tensor],
-        use_cuda_graph: bool,
-        kv_cache_dtype: str,
-    ) -> None:
-        self.is_prompt = is_prompt
-        self.prompt_lens = prompt_lens
-        self.max_seq_len = max_seq_len
-        self.start_loc = start_loc
-        self.max_context_len = max_context_len
-        self.slot_mapping = slot_mapping
-        self.context_lens = context_lens
-        self.block_tables = block_tables
-        self.use_cuda_graph = use_cuda_graph
-        self.kv_cache_dtype = kv_cache_dtype
+    is_prompt: bool
+    slot_mapping: torch.Tensor
+    prompt_lens: Optional[torch.Tensor]
+    max_seq_len: Optional[int]
+    start_loc: Optional[torch.Tensor]
+    max_context_len: Optional[int]
+    context_lens: Optional[torch.Tensor]
+    block_tables: Optional[torch.Tensor]
+    use_cuda_graph: bool
+    kv_cache_dtype: str
 
-        # Set during the execution of the first attention op.
-        # FIXME(woosuk): This is a hack.
+    def __post_init__(self):
+        # will not appear in the __repr__ and __init__
         self.attn_bias = None
-
-    def __repr__(self) -> str:
-        return ("InputMetadata("
-                f"is_prompt={self.is_prompt}, "
-                f"max_context_len={self.max_context_len}, "
-                f"slot_mapping={self.slot_mapping}, "
-                f"context_lens={self.context_lens}, "
-                f"block_tables={self.block_tables}, "
-                f"use_cuda_graph={self.use_cuda_graph}, "
-                f"kv_cache_dtype={self.kv_cache_dtype})")
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 7eac576e3f0fe..1ef783da6d08e 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -1,4 +1,5 @@
 import contextlib
+import dataclasses
 import time
 from typing import Dict, List, Optional, Tuple, Set, Union
 
@@ -521,45 +522,27 @@ def prepare_input_tensors(
             metadata_dict = {
                 "input_tokens": input_tokens,
                 "input_positions": input_positions,
-                "is_prompt": input_metadata.is_prompt,
-                "slot_mapping": input_metadata.slot_mapping,
-                "prompt_lens": input_metadata.prompt_lens,
-                "max_seq_len": input_metadata.max_seq_len,
-                "start_loc": input_metadata.start_loc,
-                "max_context_len": input_metadata.max_context_len,
-                "context_lens": input_metadata.context_lens,
-                "block_tables": input_metadata.block_tables,
-                "use_cuda_graph": input_metadata.use_cuda_graph,
-                "kv_cache_dtype": input_metadata.kv_cache_dtype,
                 "selected_token_indices":
                 sampling_metadata.selected_token_indices,
                 "lora_requests": lora_requests,
                 "lora_mapping": lora_mapping,
             }
+            metadata_dict.update(dataclasses.asdict(input_metadata))
             broadcast_tensor_dict(metadata_dict, src=0)
         else:
             metadata_dict = broadcast_tensor_dict(src=0)
-            input_tokens = metadata_dict["input_tokens"]
-            input_positions = metadata_dict["input_positions"]
-            lora_mapping = metadata_dict["lora_mapping"]
-            lora_requests = metadata_dict["lora_requests"]
-            input_metadata = InputMetadata(
-                is_prompt=metadata_dict["is_prompt"],
-                slot_mapping=metadata_dict["slot_mapping"],
-                prompt_lens=metadata_dict["prompt_lens"],
-                max_seq_len=metadata_dict["max_seq_len"],
-                start_loc=metadata_dict["start_loc"],
-                max_context_len=metadata_dict["max_context_len"],
-                context_lens=metadata_dict["context_lens"],
-                block_tables=metadata_dict["block_tables"],
-                use_cuda_graph=metadata_dict["use_cuda_graph"],
-                kv_cache_dtype=metadata_dict["kv_cache_dtype"],
-            )
+            input_tokens = metadata_dict.pop("input_tokens")
+            input_positions = metadata_dict.pop("input_positions")
+            selected_token_indices = metadata_dict.pop(
+                "selected_token_indices")
+            lora_mapping = metadata_dict.pop("lora_mapping")
+            lora_requests = metadata_dict.pop("lora_requests")
+            input_metadata = InputMetadata(**metadata_dict)
             sampling_metadata = SamplingMetadata(
                 seq_groups=None,
                 seq_data=None,
                 prompt_lens=None,
-                selected_token_indices=metadata_dict["selected_token_indices"],
+                selected_token_indices=selected_token_indices,
                 categorized_sample_indices=None,
                 generators=None,
                 perform_sampling=False,

From 93348d9458af7517bb8c114611d438a1b4a2c3be Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Sun, 17 Mar 2024 14:56:30 -0700
Subject: [PATCH 126/196] [CI] Shard tests for LoRA and Kernels to speed up
 (#3445)

---
 .buildkite/test-pipeline.yaml | 11 ++++++-----
 .buildkite/test-template.j2   |  3 +++
 requirements-dev.txt          |  1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 8badc16d0cb75..2c7dd9f304b9d 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -33,9 +33,9 @@ steps:
 - label: Entrypoints Test
   command: pytest -v -s entrypoints
 
-- label: Kernels Test
-  command: pytest -v -s kernels
-  soft_fail: true
+- label: Kernels Test %N
+  command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  parallelism: 4
 
 - label: Models Test
   commands:
@@ -55,8 +55,9 @@ steps:
 - label: Speculative decoding tests
   command: pytest -v -s spec_decode
 
-- label: LoRA Test
-  command: pytest -v -s lora --forked
+- label: LoRA Test %N
+  command: pytest -v -s lora --forked --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  parallelism: 4
 
 - label: Metrics Test
   command: pytest -v -s metrics
diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
index 7c1cf2b5a9b39..b5853a2f39383 100644
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@@ -20,6 +20,9 @@ steps:
     agents:
       queue: kubernetes
     soft_fail: {{ step.soft_fail or false }}
+    {% if step.parallelism %}
+    parallelism: {{ step.parallelism }}
+    {% endif %}
     retry:
       automatic:
         - exit_status: -1  # Agent was lost
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 5502c97d014ac..51fa57f068003 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -16,6 +16,7 @@ pytest
 pytest-forked
 pytest-asyncio
 pytest-rerunfailures
+pytest-shard
 httpx
 einops # required for MPT
 openai

From 9101d832e6fe3811db8faa739f4a7e6e2f32a240 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Mon, 18 Mar 2024 11:26:24 -0700
Subject: [PATCH 127/196] [Bugfix] Make moe_align_block_size AMD-compatible
 (#3470)

---
 csrc/moe_align_block_size_kernels.cu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/csrc/moe_align_block_size_kernels.cu b/csrc/moe_align_block_size_kernels.cu
index 138615a4bfba0..e01b23685ef4e 100644
--- a/csrc/moe_align_block_size_kernels.cu
+++ b/csrc/moe_align_block_size_kernels.cu
@@ -111,7 +111,8 @@ void moe_align_block_size(
 
         // set dynamic shared mem
         auto kernel = vllm::moe_align_block_size_kernel<scalar_t>;
-        AT_CUDA_CHECK(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_mem));
+        AT_CUDA_CHECK(
+            VLLM_DevFuncAttribute_SET_MaxDynamicSharedMemorySize((void *)kernel, shared_mem));
         kernel<<<1, num_experts, shared_mem, stream>>>(
             topk_ids.data_ptr<scalar_t>(),
             sorted_token_ids.data_ptr<int32_t>(), 

From 8c654c045f73198a517becd8b1b23a9b16eae284 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Mon, 18 Mar 2024 12:33:47 -0700
Subject: [PATCH 128/196] CI: Add ROCm Docker Build (#2886)

---
 .buildkite/run-amd-test.sh  | 38 +++++++++++++++++++++++++++++++++++++
 .buildkite/test-template.j2 |  5 +++++
 requirements-rocm.txt       |  1 +
 3 files changed, 44 insertions(+)
 create mode 100644 .buildkite/run-amd-test.sh

diff --git a/.buildkite/run-amd-test.sh b/.buildkite/run-amd-test.sh
new file mode 100644
index 0000000000000..83a56e25aca73
--- /dev/null
+++ b/.buildkite/run-amd-test.sh
@@ -0,0 +1,38 @@
+# This script build the ROCm docker image and run the API server inside the container.
+# It serves a sanity check for compilation and basic model usage.
+set -ex
+
+# Print ROCm version
+rocminfo
+
+# Try building the docker image
+docker build -t rocm -f Dockerfile.rocm .
+
+# Setup cleanup
+remove_docker_container() { docker rm -f rocm || true; }
+trap remove_docker_container EXIT
+remove_docker_container
+
+# Run the image
+docker run --device /dev/kfd --device /dev/dri --network host --name rocm rocm python3 -m vllm.entrypoints.api_server &
+
+# Wait for the server to start
+wait_for_server_to_start() {
+    timeout=300
+    counter=0
+
+    while [ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:8000/health)" != "200" ]; do
+        sleep 1
+        counter=$((counter + 1))
+        if [ $counter -ge $timeout ]; then
+            echo "Timeout after $timeout seconds"
+            break
+        fi
+    done
+}
+wait_for_server_to_start
+
+# Test a simple prompt
+curl -X POST -H "Content-Type: application/json" \
+    localhost:8000/generate \
+    -d '{"prompt": "San Francisco is a"}'
diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
index b5853a2f39383..2ff58cc2e0d3c 100644
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@@ -3,6 +3,11 @@
 {% set default_working_dir = "/vllm-workspace/tests" %}
 
 steps:
+  - label: "AMD Test"
+    agents:
+      queue: amd
+    command: bash .buildkite/run-amd-test.sh
+
   - label: ":docker: build image"
     commands:
       - "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index 53bd11de7c9de..d5a3bd423b6b3 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -11,3 +11,4 @@ fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
 prometheus_client >= 0.18.0
+outlines == 0.0.34
\ No newline at end of file

From 482b0adf1b689a3fb6cdd5374b57ac75f1591d6a Mon Sep 17 00:00:00 2001
From: Cade Daniel <edacih@gmail.com>
Date: Mon, 18 Mar 2024 12:48:45 -0700
Subject: [PATCH 129/196] [Testing] Add test_config.py to CI (#3437)

---
 .buildkite/test-pipeline.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 2c7dd9f304b9d..6ae351130f203 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -28,7 +28,7 @@ steps:
   num_gpus: 2 # only support 1 or 2 for now.
 
 - label: Engine Test
-  command: pytest -v -s engine tokenization test_sequence.py
+  command: pytest -v -s engine tokenization test_sequence.py test_config.py
 
 - label: Entrypoints Test
   command: pytest -v -s entrypoints

From 097aa0ea220b45d82440a8072e8e3a2ce4631fdf Mon Sep 17 00:00:00 2001
From: Robert Shaw
 <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:28:00 -0500
Subject: [PATCH 130/196] [CI/Build] Fix Bad Import In Test (#3473)

---
 tests/test_cache_block_hashing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py
index fb541f38f3489..a3ca3548a37a6 100644
--- a/tests/test_cache_block_hashing.py
+++ b/tests/test_cache_block_hashing.py
@@ -7,7 +7,7 @@
 import pytest
 
 from vllm.lora.request import LoRARequest
-from vllm.transformers_utils.tokenizer import TokenizerGroup
+from vllm.transformers_utils.tokenizer_group import TokenizerGroup
 from vllm.sequence import Sequence
 
 # Make two prefixes with different first blocks.

From c0c17d489628591363ef486fe840d9308ff13dc9 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Mon, 18 Mar 2024 15:00:31 -0700
Subject: [PATCH 131/196] [Misc] Fix PR Template (#3478)

---
 .github/PULL_REQUEST_TEMPLATE.md | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 46fda7eeef55e..262ce8e1530a8 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,6 +1,14 @@
+FILL IN THE PR DESCRIPTION HERE
+
+FIX #xxxx (*link existing issues this PR will resolve*)
+
+**BEFORE SUBMITTING, PLEASE READ THE CHECKLIST BELOW AND FILL IN THE DESCRIPTION ABOVE**
+
+---
+
 <details>
 <!-- inside this <details> section, markdown rendering does not work, so we use raw html here. -->
-<summary><b> PR Checklist (Click to expand. Please read before submitting.) </b></summary>
+<summary><b> PR Checklist (Click to Expand) </b></summary>
 
 <p>Thank you for your contribution to vLLM! Before submitting the pull request, please ensure the PR meets the following criteria. This helps vLLM maintain the code quality and improve the efficiency of the review process.</p>
 
@@ -53,8 +61,4 @@
 
 </details>
 
----
-
-Please provide a brief explanation of the motivation behind the PR and the changes it introduces. This helps reviewers understand the context and rationale for the contribution. If possible, please link existing issues this PR will resolve.
-
 

From 9fdf3de346836e88b310e53b50e7947974fde1d3 Mon Sep 17 00:00:00 2001
From: bnellnm <49004751+bnellnm@users.noreply.github.com>
Date: Mon, 18 Mar 2024 18:38:33 -0400
Subject: [PATCH 132/196] Cmake based build system (#2830)

---
 CMakeLists.txt         | 279 ++++++++++++++++++++++++
 Dockerfile             |   2 +
 MANIFEST.in            |   2 +
 cmake/hipify.py        |  73 +++++++
 cmake/utils.cmake      | 334 +++++++++++++++++++++++++++++
 pyproject.toml         |   1 +
 requirements-build.txt |   3 +-
 requirements-rocm.txt  |   1 +
 requirements.txt       |   1 +
 setup.py               | 474 +++++++++++++++--------------------------
 10 files changed, 868 insertions(+), 302 deletions(-)
 create mode 100644 CMakeLists.txt
 create mode 100755 cmake/hipify.py
 create mode 100644 cmake/utils.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000..29a531d44a9d5
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,279 @@
+cmake_minimum_required(VERSION 3.21)
+
+project(vllm_extensions LANGUAGES CXX)
+
+message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
+
+include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
+
+#
+# Supported python versions.  These versions will be searched in order, the
+# first match will be selected.  These should be kept in sync with setup.py.
+#
+set(PYTHON_SUPPORTED_VERSIONS "3.8" "3.9" "3.10" "3.11")
+
+# Supported NVIDIA architectures.
+set(CUDA_SUPPORTED_ARCHS "7.0;7.5;8.0;8.6;8.9;9.0")
+
+# Supported AMD GPU architectures.
+set(HIP_SUPPORTED_ARCHS "gfx908;gfx90a;gfx942;gfx1100")
+
+#
+# Supported/expected torch versions for CUDA/ROCm.
+#
+# Currently, having an incorrect pytorch version results in a warning
+# rather than an error.
+#
+# Note: the CUDA torch version is derived from pyproject.toml and various
+# requirements.txt files and should be kept consistent.  The ROCm torch
+# versions are derived from Dockerfile.rocm
+#
+set(TORCH_SUPPORTED_VERSION_CUDA "2.1.2")
+set(TORCH_SUPPORTED_VERSION_ROCM_5X "2.0.1")
+set(TORCH_SUPPORTED_VERSION_ROCM_6X "2.1.1")
+
+#
+# Try to find python package with an executable that exactly matches
+# `VLLM_PYTHON_EXECUTABLE` and is one of the supported versions.
+#
+if (VLLM_PYTHON_EXECUTABLE)
+  find_python_from_executable(${VLLM_PYTHON_EXECUTABLE} "${PYTHON_SUPPORTED_VERSIONS}")
+else()
+  message(FATAL_ERROR
+    "Please set VLLM_PYTHON_EXECUTABLE to the path of the desired python version"
+    " before running cmake configure.")
+endif()
+
+#
+# Update cmake's `CMAKE_PREFIX_PATH` with torch location.
+#
+append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
+
+#
+# Import torch cmake configuration.
+# Torch also imports CUDA (and partially HIP) languages with some customizations,
+# so there is no need to do this explicitly with check_language/enable_language,
+# etc.
+#
+find_package(Torch REQUIRED)
+
+#
+# Normally `torch.utils.cpp_extension.CUDAExtension` would add
+# `libtorch_python.so` for linking against an extension. Torch's cmake
+# configuration does not include this library (presumably since the cmake
+# config is used for standalone C++ binaries that link against torch).
+# The `libtorch_python.so` library defines some of the glue code between
+# torch/python via pybind and is required by VLLM extensions for this
+# reason. So, add it by manually using `append_torchlib_if_found` from
+# torch's cmake setup.
+#
+append_torchlib_if_found(torch_python)
+
+#
+# Set up GPU language and check the torch version and warn if it isn't
+# what is expected.
+#
+if (NOT HIP_FOUND AND CUDA_FOUND)
+  set(VLLM_GPU_LANG "CUDA")
+
+  if (NOT Torch_VERSION VERSION_EQUAL ${TORCH_SUPPORTED_VERSION_CUDA})
+    message(WARNING "Pytorch version ${TORCH_SUPPORTED_VERSION_CUDA} "
+      "expected for CUDA build, saw ${Torch_VERSION} instead.")
+  endif()
+elseif(HIP_FOUND)
+  set(VLLM_GPU_LANG "HIP")
+
+  # Importing torch recognizes and sets up some HIP/ROCm configuration but does
+  # not let cmake recognize .hip files. In order to get cmake to understand the
+  # .hip extension automatically, HIP must be enabled explicitly.
+  enable_language(HIP)
+
+  # ROCm 5.x
+  if (ROCM_VERSION_DEV_MAJOR EQUAL 5 AND
+      NOT Torch_VERSION VERSION_EQUAL ${TORCH_SUPPORTED_VERSION_ROCM_5X})
+    message(WARNING "Pytorch version ${TORCH_SUPPORTED_VERSION_ROCM_5X} "
+      "expected for ROCMm 5.x build, saw ${Torch_VERSION} instead.")
+  endif()
+
+  # ROCm 6.x
+  if (ROCM_VERSION_DEV_MAJOR EQUAL 6 AND
+      NOT Torch_VERSION VERSION_EQUAL ${TORCH_SUPPORTED_VERSION_ROCM_6X})
+    message(WARNING "Pytorch version ${TORCH_SUPPORTED_VERSION_ROCM_6X} "
+      "expected for ROCMm 6.x build, saw ${Torch_VERSION} instead.")
+  endif()
+else()
+  message(FATAL_ERROR "Can't find CUDA or HIP installation.")
+endif()
+
+#
+# Override the GPU architectures detected by cmake/torch and filter them by
+# the supported versions for the current language.
+# The final set of arches is stored in `VLLM_GPU_ARCHES`.
+#
+override_gpu_arches(VLLM_GPU_ARCHES
+  ${VLLM_GPU_LANG}
+  "${${VLLM_GPU_LANG}_SUPPORTED_ARCHS}")
+
+#
+# Query torch for additional GPU compilation flags for the given
+# `VLLM_GPU_LANG`.
+# The final set of arches is stored in `VLLM_GPU_FLAGS`.
+#
+get_torch_gpu_compiler_flags(VLLM_GPU_FLAGS ${VLLM_GPU_LANG})
+
+#
+# Set nvcc parallelism.
+#
+if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA")
+  list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}")
+endif()
+
+#
+# Define extension targets
+#
+
+#
+# _C extension
+#
+
+set(VLLM_EXT_SRC
+  "csrc/cache_kernels.cu"
+  "csrc/attention/attention_kernels.cu"
+  "csrc/pos_encoding_kernels.cu"
+  "csrc/activation_kernels.cu"
+  "csrc/layernorm_kernels.cu"
+  "csrc/quantization/squeezellm/quant_cuda_kernel.cu"
+  "csrc/quantization/gptq/q_gemm.cu"
+  "csrc/cuda_utils_kernels.cu"
+  "csrc/moe_align_block_size_kernels.cu"
+  "csrc/pybind.cpp")
+
+if(VLLM_GPU_LANG STREQUAL "CUDA")
+  list(APPEND VLLM_EXT_SRC
+    "csrc/quantization/awq/gemm_kernels.cu"
+    "csrc/quantization/marlin/marlin_cuda_kernel.cu"
+    "csrc/custom_all_reduce.cu")
+endif()
+
+define_gpu_extension_target(
+  _C
+  DESTINATION vllm
+  LANGUAGE ${VLLM_GPU_LANG}
+  SOURCES ${VLLM_EXT_SRC}
+  COMPILE_FLAGS ${VLLM_GPU_FLAGS}
+  ARCHITECTURES ${VLLM_GPU_ARCHES}
+  WITH_SOABI)
+
+#
+# _moe_C extension
+#
+
+set(VLLM_MOE_EXT_SRC
+  "csrc/moe/moe_ops.cpp"
+  "csrc/moe/topk_softmax_kernels.cu")
+
+define_gpu_extension_target(
+  _moe_C
+  DESTINATION vllm
+  LANGUAGE ${VLLM_GPU_LANG}
+  SOURCES ${VLLM_MOE_EXT_SRC}
+  COMPILE_FLAGS ${VLLM_GPU_FLAGS}
+  ARCHITECTURES ${VLLM_GPU_ARCHES}
+  WITH_SOABI)
+
+#
+# _punica_C extension
+#
+
+set(VLLM_PUNICA_EXT_SRC
+  "csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu"
+  "csrc/punica/bgmv/bgmv_bf16_bf16_fp16.cu"
+  "csrc/punica/bgmv/bgmv_bf16_fp16_bf16.cu"
+  "csrc/punica/bgmv/bgmv_bf16_fp16_fp16.cu"
+  "csrc/punica/bgmv/bgmv_bf16_fp32_bf16.cu"
+  "csrc/punica/bgmv/bgmv_bf16_fp32_fp16.cu"
+  "csrc/punica/bgmv/bgmv_fp16_bf16_bf16.cu"
+  "csrc/punica/bgmv/bgmv_fp16_bf16_fp16.cu"
+  "csrc/punica/bgmv/bgmv_fp16_fp16_bf16.cu"
+  "csrc/punica/bgmv/bgmv_fp16_fp16_fp16.cu"
+  "csrc/punica/bgmv/bgmv_fp16_fp32_bf16.cu"
+  "csrc/punica/bgmv/bgmv_fp16_fp32_fp16.cu"
+  "csrc/punica/bgmv/bgmv_fp32_bf16_bf16.cu"
+  "csrc/punica/bgmv/bgmv_fp32_bf16_fp16.cu"
+  "csrc/punica/bgmv/bgmv_fp32_fp16_bf16.cu"
+  "csrc/punica/bgmv/bgmv_fp32_fp16_fp16.cu"
+  "csrc/punica/bgmv/bgmv_fp32_fp32_bf16.cu"
+  "csrc/punica/bgmv/bgmv_fp32_fp32_fp16.cu"
+  "csrc/punica/punica_ops.cc")
+
+#
+# Copy GPU compilation flags+update for punica
+#
+set(VLLM_PUNICA_GPU_FLAGS ${VLLM_GPU_FLAGS})
+list(REMOVE_ITEM VLLM_PUNICA_GPU_FLAGS
+  "-D__CUDA_NO_HALF_OPERATORS__"
+  "-D__CUDA_NO_HALF_CONVERSIONS__"
+  "-D__CUDA_NO_BFLOAT16_CONVERSIONS__"
+  "-D__CUDA_NO_HALF2_OPERATORS__")
+
+#
+# Filter out CUDA architectures < 8.0 for punica.
+#
+if (${VLLM_GPU_LANG} STREQUAL "CUDA")
+  set(VLLM_PUNICA_GPU_ARCHES)
+  foreach(ARCH ${VLLM_GPU_ARCHES})
+    string_to_ver(CODE_VER ${ARCH})
+    if (CODE_VER GREATER_EQUAL 8.0)
+      list(APPEND VLLM_PUNICA_GPU_ARCHES ${ARCH})
+    endif()
+  endforeach()
+  message(STATUS "Punica target arches: ${VLLM_PUNICA_GPU_ARCHES}")
+endif()
+
+if (VLLM_PUNICA_GPU_ARCHES)
+  define_gpu_extension_target(
+    _punica_C
+    DESTINATION vllm
+    LANGUAGE ${VLLM_GPU_LANG}
+    SOURCES ${VLLM_PUNICA_EXT_SRC}
+    COMPILE_FLAGS ${VLLM_PUNICA_GPU_FLAGS}
+    ARCHITECTURES ${VLLM_PUNICA_GPU_ARCHES}
+    WITH_SOABI)
+else()
+  message(WARNING "Unable to create _punica_C target because none of the "
+    "requested architectures (${VLLM_GPU_ARCHES}) are supported, i.e. >= 8.0")
+endif()
+
+#
+# Add the `default` target which detects which extensions should be
+# built based on platform/architecture.  This is the same logic that
+# setup.py uses to select which extensions should be built and should
+# be kept in sync.
+#
+# The `default` target makes direct use of cmake easier since knowledge
+# of which extensions are supported has been factored in, e.g.
+#
+# mkdir build && cd build
+# cmake -G Ninja -DVLLM_PYTHON_EXECUTABLE=`which python3` -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=../vllm ..
+# cmake --build . --target default
+#
+add_custom_target(default)
+
+if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
+  message(STATUS "Enabling C extension.")
+  add_dependencies(default _C)
+endif()
+
+if(VLLM_GPU_LANG STREQUAL "CUDA")
+  message(STATUS "Enabling moe extension.")
+  add_dependencies(default _moe_C)
+
+  # Enable punica if -DVLLM_INSTALL_PUNICA_KERNELS=ON or
+  # VLLM_INSTALL_PUNICA_KERNELS is set in the environment and
+  # there are supported target arches.
+  if (VLLM_PUNICA_GPU_ARCHES AND
+      (ENV{VLLM_INSTALL_PUNICA_KERNELS} OR VLLM_INSTALL_PUNICA_KERNELS))
+    message(STATUS "Enabling punica extension.")
+    add_dependencies(default _punica_C)
+  endif()
+endif()
diff --git a/Dockerfile b/Dockerfile
index 8be03b3567f0e..6a56a33cfe7ac 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,6 +38,8 @@ RUN --mount=type=cache,target=/root/.cache/pip \
 # copy input files
 COPY csrc csrc
 COPY setup.py setup.py
+COPY cmake cmake
+COPY CMakeLists.txt CMakeLists.txt
 COPY requirements.txt requirements.txt
 COPY pyproject.toml pyproject.toml
 COPY vllm/__init__.py vllm/__init__.py
diff --git a/MANIFEST.in b/MANIFEST.in
index 0c897cf147f10..aa16da6500e6c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,6 @@
 include LICENSE
 include requirements.txt
+include CMakeLists.txt
 
+recursive-include cmake *
 recursive-include csrc *
diff --git a/cmake/hipify.py b/cmake/hipify.py
new file mode 100755
index 0000000000000..c4d8450630ba3
--- /dev/null
+++ b/cmake/hipify.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+#
+# A command line tool for running pytorch's hipify preprocessor on CUDA
+# source files.
+#
+# See https://github.com/ROCm/hipify_torch
+# and <torch install dir>/utils/hipify/hipify_python.py
+#
+
+import argparse
+import shutil
+import os
+
+from torch.utils.hipify.hipify_python import hipify
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    # Project directory where all the source + include files live.
+    parser.add_argument(
+        "-p",
+        "--project_dir",
+        help="The project directory.",
+    )
+
+    # Directory where hipified files are written.
+    parser.add_argument(
+        "-o",
+        "--output_dir",
+        help="The output directory.",
+    )
+
+    # Source files to convert.
+    parser.add_argument("sources",
+                        help="Source files to hipify.",
+                        nargs="*",
+                        default=[])
+
+    args = parser.parse_args()
+
+    # Limit include scope to project_dir only
+    includes = [os.path.join(args.project_dir, '*')]
+
+    # Get absolute path for all source files.
+    extra_files = [os.path.abspath(s) for s in args.sources]
+
+    # Copy sources from project directory to output directory.
+    # The directory might already exist to hold object files so we ignore that.
+    shutil.copytree(args.project_dir, args.output_dir, dirs_exist_ok=True)
+
+    hipify_result = hipify(project_directory=args.project_dir,
+                           output_directory=args.output_dir,
+                           header_include_dirs=[],
+                           includes=includes,
+                           extra_files=extra_files,
+                           show_detailed=True,
+                           is_pytorch_extension=True,
+                           hipify_extra_files_only=True)
+
+    hipified_sources = []
+    for source in args.sources:
+        s_abs = os.path.abspath(source)
+        hipified_s_abs = (hipify_result[s_abs].hipified_path if
+                          (s_abs in hipify_result
+                           and hipify_result[s_abs].hipified_path is not None)
+                          else s_abs)
+        hipified_sources.append(hipified_s_abs)
+
+    assert (len(hipified_sources) == len(args.sources))
+
+    # Print hipified source files.
+    print("\n".join(hipified_sources))
diff --git a/cmake/utils.cmake b/cmake/utils.cmake
new file mode 100644
index 0000000000000..bb222bb437b1d
--- /dev/null
+++ b/cmake/utils.cmake
@@ -0,0 +1,334 @@
+#
+# Attempt to find the python package that uses the same python executable as
+# `EXECUTABLE` and is one of the `SUPPORTED_VERSIONS`.
+#
+macro (find_python_from_executable EXECUTABLE SUPPORTED_VERSIONS)
+  file(REAL_PATH ${EXECUTABLE} EXECUTABLE)
+  set(Python_EXECUTABLE ${EXECUTABLE})
+  find_package(Python COMPONENTS Interpreter Development.Module)
+  if (NOT Python_FOUND)
+    message(FATAL_ERROR "Unable to find python matching: ${EXECUTABLE}.")
+  endif()
+  set(_VER "${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}")
+  set(_SUPPORTED_VERSIONS_LIST ${SUPPORTED_VERSIONS} ${ARGN})
+  if (NOT _VER IN_LIST _SUPPORTED_VERSIONS_LIST)
+    message(FATAL_ERROR
+      "Python version (${_VER}) is not one of the supported versions: "
+      "${_SUPPORTED_VERSIONS_LIST}.")
+  endif()
+  message(STATUS "Found python matching: ${EXECUTABLE}.")
+endmacro()
+
+#
+# Run `EXPR` in python.  The standard output of python is stored in `OUT` and
+# has trailing whitespace stripped.  If an error is encountered when running
+# python, a fatal message `ERR_MSG` is issued.
+#
+function (run_python OUT EXPR ERR_MSG)
+  execute_process(
+    COMMAND
+    "${Python_EXECUTABLE}" "-c" "${EXPR}"
+    OUTPUT_VARIABLE PYTHON_OUT
+    RESULT_VARIABLE PYTHON_ERROR_CODE
+    ERROR_VARIABLE PYTHON_STDERR
+    OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+  if(NOT PYTHON_ERROR_CODE EQUAL 0)
+    message(FATAL_ERROR "${ERR_MSG}: ${PYTHON_STDERR}")
+  endif()
+  set(${OUT} ${PYTHON_OUT} PARENT_SCOPE)
+endfunction()
+
+# Run `EXPR` in python after importing `PKG`. Use the result of this to extend
+# `CMAKE_PREFIX_PATH` so the torch cmake configuration can be imported.
+macro (append_cmake_prefix_path PKG EXPR)
+  run_python(_PREFIX_PATH
+    "import ${PKG}; print(${EXPR})" "Failed to locate ${PKG} path")
+  list(APPEND CMAKE_PREFIX_PATH ${_PREFIX_PATH})
+endmacro()
+
+#
+# Add a target named `hipify${NAME}` that runs the hipify preprocessor on a set
+# of CUDA source files. The names of the corresponding "hipified" sources are
+# stored in `OUT_SRCS`.
+#
+function (hipify_sources_target OUT_SRCS NAME ORIG_SRCS)
+  #
+  # Split into C++ and non-C++ (i.e. CUDA) sources.
+  #
+  set(SRCS ${ORIG_SRCS})
+  set(CXX_SRCS ${ORIG_SRCS})
+  list(FILTER SRCS EXCLUDE REGEX "\.(cc)|(cpp)$")
+  list(FILTER CXX_SRCS INCLUDE REGEX "\.(cc)|(cpp)$")
+
+  #
+  # Generate ROCm/HIP source file names from CUDA file names.
+  # Since HIP files are generated code, they will appear in the build area
+  # `CMAKE_CURRENT_BINARY_DIR` directory rather than the original csrc dir.
+  #
+  set(HIP_SRCS)
+  foreach (SRC ${SRCS})
+    string(REGEX REPLACE "\.cu$" "\.hip" SRC ${SRC})
+    string(REGEX REPLACE "cuda" "hip" SRC ${SRC})
+    list(APPEND HIP_SRCS "${CMAKE_CURRENT_BINARY_DIR}/${SRC}")
+  endforeach()
+
+  set(CSRC_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/csrc)
+  add_custom_target(
+    hipify${NAME}
+    COMMAND ${CMAKE_SOURCE_DIR}/cmake/hipify.py -p ${CMAKE_SOURCE_DIR}/csrc -o ${CSRC_BUILD_DIR} ${SRCS}
+    DEPENDS ${CMAKE_SOURCE_DIR}/cmake/hipify.py ${SRCS}
+    BYPRODUCTS ${HIP_SRCS}
+    COMMENT "Running hipify on ${NAME} extension source files.")
+
+  # Swap out original extension sources with hipified sources.
+  list(APPEND HIP_SRCS ${CXX_SRCS})
+  set(${OUT_SRCS} ${HIP_SRCS} PARENT_SCOPE)
+endfunction()
+
+#
+# Get additional GPU compiler flags from torch.
+#
+function (get_torch_gpu_compiler_flags OUT_GPU_FLAGS GPU_LANG)
+  if (${GPU_LANG} STREQUAL "CUDA")
+    #
+    # Get common NVCC flags from torch.
+    #
+    run_python(GPU_FLAGS
+      "from torch.utils.cpp_extension import COMMON_NVCC_FLAGS; print(';'.join(COMMON_NVCC_FLAGS))"
+      "Failed to determine torch nvcc compiler flags")
+
+    if (CUDA_VERSION VERSION_GREATER_EQUAL 11.8)
+      list(APPEND GPU_FLAGS "-DENABLE_FP8_E5M2")
+    endif()
+
+  elseif(${GPU_LANG} STREQUAL "HIP")
+    #
+    # Get common HIP/HIPCC flags from torch.
+    #
+    run_python(GPU_FLAGS
+      "import torch.utils.cpp_extension as t; print(';'.join(t.COMMON_HIP_FLAGS + t.COMMON_HIPCC_FLAGS))"
+      "Failed to determine torch nvcc compiler flags")
+
+    list(APPEND GPU_FLAGS
+      "-DUSE_ROCM"
+      "-U__HIP_NO_HALF_CONVERSIONS__"
+      "-U__HIP_NO_HALF_OPERATORS__"
+      "-fno-gpu-rdc")
+
+  endif()
+  set(${OUT_GPU_FLAGS} ${GPU_FLAGS} PARENT_SCOPE)
+endfunction()
+
+# Macro for converting a `gencode` version number to a cmake version number.
+macro(string_to_ver OUT_VER IN_STR)
+  string(REGEX REPLACE "\([0-9]+\)\([0-9]\)" "\\1.\\2" ${OUT_VER} ${IN_STR})
+endmacro()
+
+#
+# Override the GPU architectures detected by cmake/torch and filter them by
+# `GPU_SUPPORTED_ARCHES`. Sets the final set of architectures in
+# `GPU_ARCHES`.
+#
+# Note: this is defined as a macro since it updates `CMAKE_CUDA_FLAGS`.
+#
+macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
+  set(_GPU_SUPPORTED_ARCHES_LIST ${GPU_SUPPORTED_ARCHES} ${ARGN})
+  message(STATUS "${GPU_LANG} supported arches: ${_GPU_SUPPORTED_ARCHES_LIST}")
+
+  if (${GPU_LANG} STREQUAL "HIP")
+    #
+    # `GPU_ARCHES` controls the `--offload-arch` flags.
+    # `CMAKE_HIP_ARCHITECTURES` is set up by torch and can be controlled
+    # via the `PYTORCH_ROCM_ARCH` env variable.
+    #
+
+    #
+    # Find the intersection of the supported + detected architectures to
+    # set the module architecture flags.
+    #
+    set(${GPU_ARCHES})
+    foreach (_ARCH ${CMAKE_HIP_ARCHITECTURES})
+      if (_ARCH IN_LIST _GPU_SUPPORTED_ARCHES_LIST)
+        list(APPEND ${GPU_ARCHES} ${_ARCH})
+      endif()
+    endforeach()
+
+    if(NOT ${GPU_ARCHES})
+      message(FATAL_ERROR
+        "None of the detected ROCm architectures: ${CMAKE_HIP_ARCHITECTURES} is"
+        " supported. Supported ROCm architectures are: ${_GPU_SUPPORTED_ARCHES_LIST}.")
+    endif()
+
+  elseif(${GPU_LANG} STREQUAL "CUDA")
+    #
+    # Setup/process CUDA arch flags.
+    #
+    # The torch cmake setup hardcodes the detected architecture flags in
+    # `CMAKE_CUDA_FLAGS`.  Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
+    # can't modified on a per-target basis, e.g. for the `punica` extension.
+    # So, all the `-gencode` flags need to be extracted and removed from
+    # `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
+    # Since it's not possible to use `target_compiler_options` for adding target
+    # specific `-gencode` arguments, the target's `CUDA_ARCHITECTURES` property
+    # must be used instead.  This requires repackaging the architecture flags
+    # into a format that cmake expects for `CUDA_ARCHITECTURES`.
+    #
+    # This is a bit fragile in that it depends on torch using `-gencode` as opposed
+    # to one of the other nvcc options to specify architectures.
+    #
+    # Note: torch uses the `TORCH_CUDA_ARCH_LIST` environment variable to override
+    # detected architectures.
+    #
+    message(DEBUG "initial CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
+
+    # Extract all `-gencode` flags from `CMAKE_CUDA_FLAGS`
+    string(REGEX MATCHALL "-gencode arch=[^ ]+" _CUDA_ARCH_FLAGS
+      ${CMAKE_CUDA_FLAGS})
+
+    # Remove all `-gencode` flags from `CMAKE_CUDA_FLAGS` since they will be modified
+    # and passed back via the `CUDA_ARCHITECTURES` property.
+    string(REGEX REPLACE "-gencode arch=[^ ]+ *" "" CMAKE_CUDA_FLAGS
+      ${CMAKE_CUDA_FLAGS})
+
+    # If this error is triggered, it might mean that torch has changed how it sets
+    # up nvcc architecture code generation flags.
+    if (NOT _CUDA_ARCH_FLAGS)
+      message(FATAL_ERROR
+        "Could not find any architecture related code generation flags in "
+        "CMAKE_CUDA_FLAGS. (${CMAKE_CUDA_FLAGS})")
+    endif()
+
+    message(DEBUG "final CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
+    message(DEBUG "arch flags: ${_CUDA_ARCH_FLAGS}")
+
+    # Initialize the architecture lists to empty.
+    set(${GPU_ARCHES})
+
+    # Process each `gencode` flag.
+    foreach(_ARCH ${_CUDA_ARCH_FLAGS})
+      # For each flag, extract the version number and whether it refers to PTX
+      # or native code.
+      # Note: if a regex matches then `CMAKE_MATCH_1` holds the binding
+      # for that match.
+
+      string(REGEX MATCH "arch=compute_\([0-9]+a?\)" _COMPUTE ${_ARCH})
+      if (_COMPUTE)
+        set(_COMPUTE ${CMAKE_MATCH_1})
+      endif()
+
+      string(REGEX MATCH "code=sm_\([0-9]+a?\)" _SM ${_ARCH})
+      if (_SM)
+        set(_SM ${CMAKE_MATCH_1})
+      endif()
+
+      string(REGEX MATCH "code=compute_\([0-9]+a?\)" _CODE ${_ARCH})
+      if (_CODE)
+        set(_CODE ${CMAKE_MATCH_1})
+      endif()
+
+      # Make sure the virtual architecture can be matched.
+      if (NOT _COMPUTE)
+        message(FATAL_ERROR
+          "Could not determine virtual architecture from: ${_ARCH}.")
+      endif()
+
+      # One of sm_ or compute_ must exist.
+      if ((NOT _SM) AND (NOT _CODE))
+        message(FATAL_ERROR
+          "Could not determine a codegen architecture from: ${_ARCH}.")
+      endif()
+
+      if (_SM)
+        set(_VIRT "")
+        set(_CODE_ARCH ${_SM})
+      else()
+        set(_VIRT "-virtual")
+        set(_CODE_ARCH ${_CODE})
+      endif()
+
+      # Check if the current version is in the supported arch list.
+      string_to_ver(_CODE_VER ${_CODE_ARCH})
+      if (NOT _CODE_VER IN_LIST _GPU_SUPPORTED_ARCHES_LIST)
+        message(STATUS "discarding unsupported CUDA arch ${_VER}.")
+        continue()
+      endif()
+
+      # Add it to the arch list.
+      list(APPEND ${GPU_ARCHES} "${_CODE_ARCH}${_VIRT}")
+    endforeach()
+  endif()
+  message(STATUS "${GPU_LANG} target arches: ${${GPU_ARCHES}}")
+endmacro()
+
+#
+# Define a target named `GPU_MOD_NAME` for a single extension. The
+# arguments are:
+#
+# DESTINATION <dest>         - Module destination directory.
+# LANGUAGE <lang>            - The GPU language for this module, e.g CUDA, HIP,
+#                              etc.
+# SOURCES <sources>          - List of source files relative to CMakeLists.txt
+#                              directory.
+#
+# Optional arguments:
+#
+# ARCHITECTURES <arches>     - A list of target GPU architectures in cmake
+#                              format.
+#                              Refer `CMAKE_CUDA_ARCHITECTURES` documentation
+#                              and `CMAKE_HIP_ARCHITECTURES` for more info.
+#                              ARCHITECTURES will use cmake's defaults if
+#                              not provided.
+# COMPILE_FLAGS <flags>      - Extra compiler flags passed to NVCC/hip.
+# INCLUDE_DIRECTORIES <dirs> - Extra include directories.
+# LINK_LIBRARIES <libraries> - Extra link libraries.
+# WITH_SOABI                 - Generate library with python SOABI suffix name.
+#
+# Note: optimization level/debug info is set via cmake build type.
+#
+function (define_gpu_extension_target GPU_MOD_NAME)
+  cmake_parse_arguments(PARSE_ARGV 1
+    GPU
+    "WITH_SOABI"
+    "DESTINATION;LANGUAGE"
+    "SOURCES;ARCHITECTURES;COMPILE_FLAGS;INCLUDE_DIRECTORIES;LIBRARIES")
+
+  # Add hipify preprocessing step when building with HIP/ROCm.
+  if (GPU_LANGUAGE STREQUAL "HIP")
+    hipify_sources_target(GPU_SOURCES ${GPU_MOD_NAME} "${GPU_SOURCES}")
+  endif()
+
+  if (GPU_WITH_SOABI)
+    set(GPU_WITH_SOABI WITH_SOABI)
+  else()
+    set(GPU_WITH_SOABI)
+  endif()
+
+  Python_add_library(${GPU_MOD_NAME} MODULE "${GPU_SOURCES}" ${GPU_WITH_SOABI})
+
+  if (GPU_LANGUAGE STREQUAL "HIP")
+    # Make this target dependent on the hipify preprocessor step.
+    add_dependencies(${GPU_MOD_NAME} hipify${GPU_MOD_NAME})
+  endif()
+
+  if (GPU_ARCHITECTURES)
+    set_target_properties(${GPU_MOD_NAME} PROPERTIES
+      ${GPU_LANGUAGE}_ARCHITECTURES "${GPU_ARCHITECTURES}")
+  endif()
+
+  set_property(TARGET ${GPU_MOD_NAME} PROPERTY CXX_STANDARD 17)
+
+  target_compile_options(${GPU_MOD_NAME} PRIVATE
+    $<$<COMPILE_LANGUAGE:${GPU_LANGUAGE}>:${GPU_COMPILE_FLAGS}>)
+
+  target_compile_definitions(${GPU_MOD_NAME} PRIVATE
+    "-DTORCH_EXTENSION_NAME=${GPU_MOD_NAME}")
+
+  target_include_directories(${GPU_MOD_NAME} PRIVATE csrc
+    ${GPU_INCLUDE_DIRECTORIES})
+
+  target_link_libraries(${GPU_MOD_NAME} PRIVATE ${TORCH_LIBRARIES}
+    ${GPU_LIBRARIES})
+
+  install(TARGETS ${GPU_MOD_NAME} LIBRARY DESTINATION ${GPU_DESTINATION})
+endfunction()
diff --git a/pyproject.toml b/pyproject.toml
index e0a01215ef997..b6d7649477dcc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,7 @@
 [build-system]
 # Should be mirrored in requirements-build.txt
 requires = [
+    "cmake>=3.21",
     "ninja",
     "packaging",
     "setuptools >= 49.4.0",
diff --git a/requirements-build.txt b/requirements-build.txt
index 7e7e48a1313e5..a8efcde590bbf 100644
--- a/requirements-build.txt
+++ b/requirements-build.txt
@@ -1,6 +1,7 @@
 # Should be mirrored in pyproject.toml
+cmake>=3.21
 ninja
 packaging
 setuptools>=49.4.0
 torch==2.1.2
-wheel
\ No newline at end of file
+wheel
diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index d5a3bd423b6b3..c30479e40f521 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -1,3 +1,4 @@
+cmake>=3.21
 ninja  # For faster builds.
 typing-extensions>=4.8.0
 starlette
diff --git a/requirements.txt b/requirements.txt
index d6c33ad85da58..c9a5bd6619402 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+cmake>=3.21
 ninja  # For faster builds.
 psutil
 ray >= 2.9
diff --git a/setup.py b/setup.py
index 6f1f2faf54dbc..88787334be21a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,23 +1,16 @@
-import contextlib
 import io
 import os
 import re
 import subprocess
-import warnings
-from pathlib import Path
-from typing import List, Set
+import sys
+from typing import List
 
 from packaging.version import parse, Version
-import setuptools
-import sys
+from setuptools import setup, find_packages, Extension
+from setuptools.command.build_ext import build_ext
+from shutil import which
 import torch
-import torch.utils.cpp_extension as torch_cpp_ext
-from torch.utils.cpp_extension import (
-    BuildExtension,
-    CUDAExtension,
-    CUDA_HOME,
-    ROCM_HOME,
-)
+from torch.utils.cpp_extension import CUDA_HOME
 
 ROOT_DIR = os.path.dirname(__file__)
 
@@ -25,17 +18,153 @@
 assert sys.platform.startswith(
     "linux"), "vLLM only supports Linux platform (including WSL)."
 
-# If you are developing the C++ backend of vLLM, consider building vLLM with
-# `python setup.py develop` since it will give you incremental builds.
-# The downside is that this method is deprecated, see
-# https://github.com/pypa/setuptools/issues/917
-
 MAIN_CUDA_VERSION = "12.1"
 
-# Supported NVIDIA GPU architectures.
-NVIDIA_SUPPORTED_ARCHS = {"7.0", "7.5", "8.0", "8.6", "8.9", "9.0"}
-ROCM_SUPPORTED_ARCHS = {"gfx908", "gfx90a", "gfx942", "gfx1100"}
-# SUPPORTED_ARCHS = NVIDIA_SUPPORTED_ARCHS.union(ROCM_SUPPORTED_ARCHS)
+
+def is_sccache_available() -> bool:
+    return which("sccache") is not None
+
+
+def is_ccache_available() -> bool:
+    return which("ccache") is not None
+
+
+def is_ninja_available() -> bool:
+    return which("ninja") is not None
+
+
+def remove_prefix(text, prefix):
+    if text.startswith(prefix):
+        return text[len(prefix):]
+    return text
+
+
+class CMakeExtension(Extension):
+
+    def __init__(self, name: str, cmake_lists_dir: str = '.', **kwa) -> None:
+        super().__init__(name, sources=[], **kwa)
+        self.cmake_lists_dir = os.path.abspath(cmake_lists_dir)
+
+
+class cmake_build_ext(build_ext):
+    # A dict of extension directories that have been configured.
+    did_config = {}
+
+    #
+    # Determine number of compilation jobs and optionally nvcc compile threads.
+    #
+    def compute_num_jobs(self):
+        try:
+            # os.sched_getaffinity() isn't universally available, so fall back
+            # to os.cpu_count() if we get an error here.
+            num_jobs = len(os.sched_getaffinity(0))
+        except AttributeError:
+            num_jobs = os.cpu_count()
+
+        nvcc_cuda_version = get_nvcc_cuda_version()
+        if nvcc_cuda_version >= Version("11.2"):
+            nvcc_threads = int(os.getenv("NVCC_THREADS", 8))
+            num_jobs = max(1, round(num_jobs / (nvcc_threads / 4)))
+        else:
+            nvcc_threads = None
+
+        return num_jobs, nvcc_threads
+
+    #
+    # Perform cmake configuration for a single extension.
+    #
+    def configure(self, ext: CMakeExtension) -> None:
+        # If we've already configured using the CMakeLists.txt for
+        # this extension, exit early.
+        if ext.cmake_lists_dir in cmake_build_ext.did_config:
+            return
+
+        cmake_build_ext.did_config[ext.cmake_lists_dir] = True
+
+        # Select the build type.
+        # Note: optimization level + debug info are set by the build type
+        default_cfg = "Debug" if self.debug else "RelWithDebInfo"
+        cfg = os.getenv("CMAKE_BUILD_TYPE", default_cfg)
+
+        # where .so files will be written, should be the same for all extensions
+        # that use the same CMakeLists.txt.
+        outdir = os.path.abspath(
+            os.path.dirname(self.get_ext_fullpath(ext.name)))
+
+        cmake_args = [
+            '-DCMAKE_BUILD_TYPE={}'.format(cfg),
+            '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}'.format(outdir),
+            '-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY={}'.format(self.build_temp),
+        ]
+
+        verbose = bool(int(os.getenv('VERBOSE', '0')))
+        if verbose:
+            cmake_args += ['-DCMAKE_VERBOSE_MAKEFILE=ON']
+
+        if is_sccache_available():
+            cmake_args += [
+                '-DCMAKE_CXX_COMPILER_LAUNCHER=sccache',
+                '-DCMAKE_CUDA_COMPILER_LAUNCHER=sccache',
+            ]
+        elif is_ccache_available():
+            cmake_args += [
+                '-DCMAKE_CXX_COMPILER_LAUNCHER=ccache',
+                '-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache',
+            ]
+
+        # Pass the python executable to cmake so it can find an exact
+        # match.
+        cmake_args += ['-DVLLM_PYTHON_EXECUTABLE={}'.format(sys.executable)]
+
+        if _install_punica():
+            cmake_args += ['-DVLLM_INSTALL_PUNICA_KERNELS=ON']
+
+        #
+        # Setup parallelism and build tool
+        #
+        num_jobs, nvcc_threads = self.compute_num_jobs()
+
+        if nvcc_threads:
+            cmake_args += ['-DNVCC_THREADS={}'.format(nvcc_threads)]
+
+        if is_ninja_available():
+            build_tool = ['-G', 'Ninja']
+            cmake_args += [
+                '-DCMAKE_JOB_POOL_COMPILE:STRING=compile',
+                '-DCMAKE_JOB_POOLS:STRING=compile={}'.format(num_jobs),
+            ]
+        else:
+            # Default build tool to whatever cmake picks.
+            build_tool = []
+
+        subprocess.check_call(
+            ['cmake', ext.cmake_lists_dir, *build_tool, *cmake_args],
+            cwd=self.build_temp)
+
+    def build_extensions(self) -> None:
+        # Ensure that CMake is present and working
+        try:
+            subprocess.check_output(['cmake', '--version'])
+        except OSError as e:
+            raise RuntimeError('Cannot find CMake executable') from e
+
+        # Create build directory if it does not exist.
+        if not os.path.exists(self.build_temp):
+            os.makedirs(self.build_temp)
+
+        # Build all the extensions
+        for ext in self.extensions:
+            self.configure(ext)
+
+            ext_target_name = remove_prefix(ext.name, "vllm.")
+            num_jobs, _ = self.compute_num_jobs()
+
+            build_args = [
+                '--build', '.', '--target', ext_target_name, '-j',
+                str(num_jobs)
+            ]
+
+            subprocess.check_call(['cmake', *build_args], cwd=self.build_temp)
 
 
 def _is_cuda() -> bool:
@@ -55,26 +184,8 @@ def _is_neuron() -> bool:
     return torch_neuronx_installed
 
 
-# Compiler flags.
-CXX_FLAGS = ["-g", "-O2", "-std=c++17"]
-# TODO(woosuk): Should we use -O3?
-NVCC_FLAGS = ["-O2", "-std=c++17"]
-
-if _is_hip():
-    if ROCM_HOME is None:
-        raise RuntimeError("Cannot find ROCM_HOME. "
-                           "ROCm must be available to build the package.")
-    NVCC_FLAGS += ["-DUSE_ROCM"]
-    NVCC_FLAGS += ["-U__HIP_NO_HALF_CONVERSIONS__"]
-    NVCC_FLAGS += ["-U__HIP_NO_HALF_OPERATORS__"]
-
-if _is_cuda() and CUDA_HOME is None:
-    raise RuntimeError(
-        "Cannot find CUDA_HOME. CUDA must be available to build the package.")
-
-ABI = 1 if torch._C._GLIBCXX_USE_CXX11_ABI else 0
-CXX_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
-NVCC_FLAGS += [f"-D_GLIBCXX_USE_CXX11_ABI={ABI}"]
+def _install_punica() -> bool:
+    return bool(int(os.getenv("VLLM_INSTALL_PUNICA_KERNELS", "0")))
 
 
 def get_hipcc_rocm_version():
@@ -99,11 +210,6 @@ def get_hipcc_rocm_version():
         return None
 
 
-def glob(pattern: str):
-    root = Path(__name__).parent
-    return [str(p) for p in root.glob(pattern)]
-
-
 def get_neuronxcc_version():
     import sysconfig
     site_dir = sysconfig.get_paths()["purelib"]
@@ -123,12 +229,12 @@ def get_neuronxcc_version():
         raise RuntimeError("Could not find HIP version in the output")
 
 
-def get_nvcc_cuda_version(cuda_dir: str) -> Version:
+def get_nvcc_cuda_version() -> Version:
     """Get the CUDA version from nvcc.
 
     Adapted from https://github.com/NVIDIA/apex/blob/8b7a1ff183741dd8f9b87e7bafd04cfde99cea28/setup.py
     """
-    nvcc_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"],
+    nvcc_output = subprocess.check_output([CUDA_HOME + "/bin/nvcc", "-V"],
                                           universal_newlines=True)
     output = nvcc_output.split()
     release_idx = output.index("release") + 1
@@ -136,250 +242,6 @@ def get_nvcc_cuda_version(cuda_dir: str) -> Version:
     return nvcc_cuda_version
 
 
-def get_pytorch_rocm_arch() -> Set[str]:
-    """Get the cross section of Pytorch,and vllm supported gfx arches
-
-    ROCM can get the supported gfx architectures in one of two ways
-    Either through the PYTORCH_ROCM_ARCH env var, or output from
-    rocm_agent_enumerator.
-
-    In either case we can generate a list of supported arch's and
-    cross reference with VLLM's own ROCM_SUPPORTED_ARCHs.
-    """
-    env_arch_list = os.environ.get("PYTORCH_ROCM_ARCH", None)
-
-    # If we don't have PYTORCH_ROCM_ARCH specified pull the list from
-    # rocm_agent_enumerator
-    if env_arch_list is None:
-        command = "rocm_agent_enumerator"
-        env_arch_list = (subprocess.check_output(
-            [command]).decode('utf-8').strip().replace("\n", ";"))
-        arch_source_str = "rocm_agent_enumerator"
-    else:
-        arch_source_str = "PYTORCH_ROCM_ARCH env variable"
-
-    # List are separated by ; or space.
-    pytorch_rocm_arch = set(env_arch_list.replace(" ", ";").split(";"))
-
-    # Filter out the invalid architectures and print a warning.
-    arch_list = pytorch_rocm_arch.intersection(ROCM_SUPPORTED_ARCHS)
-
-    # If none of the specified architectures are valid, raise an error.
-    if not arch_list:
-        raise RuntimeError(
-            f"None of the ROCM architectures in {arch_source_str} "
-            f"({env_arch_list}) is supported. "
-            f"Supported ROCM architectures are: {ROCM_SUPPORTED_ARCHS}.")
-    invalid_arch_list = pytorch_rocm_arch - ROCM_SUPPORTED_ARCHS
-    if invalid_arch_list:
-        warnings.warn(
-            f"Unsupported ROCM architectures ({invalid_arch_list}) are "
-            f"excluded from the {arch_source_str} output "
-            f"({env_arch_list}). Supported ROCM architectures are: "
-            f"{ROCM_SUPPORTED_ARCHS}.",
-            stacklevel=2)
-    return arch_list
-
-
-def get_torch_arch_list() -> Set[str]:
-    # TORCH_CUDA_ARCH_LIST can have one or more architectures,
-    # e.g. "8.0" or "7.5,8.0,8.6+PTX". Here, the "8.6+PTX" option asks the
-    # compiler to additionally include PTX code that can be runtime-compiled
-    # and executed on the 8.6 or newer architectures. While the PTX code will
-    # not give the best performance on the newer architectures, it provides
-    # forward compatibility.
-    env_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
-    if env_arch_list is None:
-        return set()
-
-    # List are separated by ; or space.
-    torch_arch_list = set(env_arch_list.replace(" ", ";").split(";"))
-    if not torch_arch_list:
-        return set()
-
-    # Filter out the invalid architectures and print a warning.
-    valid_archs = NVIDIA_SUPPORTED_ARCHS.union(
-        {s + "+PTX"
-         for s in NVIDIA_SUPPORTED_ARCHS})
-    arch_list = torch_arch_list.intersection(valid_archs)
-    # If none of the specified architectures are valid, raise an error.
-    if not arch_list:
-        raise RuntimeError(
-            "None of the CUDA architectures in `TORCH_CUDA_ARCH_LIST` env "
-            f"variable ({env_arch_list}) is supported. "
-            f"Supported CUDA architectures are: {valid_archs}.")
-    invalid_arch_list = torch_arch_list - valid_archs
-    if invalid_arch_list:
-        warnings.warn(
-            f"Unsupported CUDA architectures ({invalid_arch_list}) are "
-            "excluded from the `TORCH_CUDA_ARCH_LIST` env variable "
-            f"({env_arch_list}). Supported CUDA architectures are: "
-            f"{valid_archs}.",
-            stacklevel=2)
-    return arch_list
-
-
-if _is_hip():
-    rocm_arches = get_pytorch_rocm_arch()
-    NVCC_FLAGS += ["--offload-arch=" + arch for arch in rocm_arches]
-else:
-    # First, check the TORCH_CUDA_ARCH_LIST environment variable.
-    compute_capabilities = get_torch_arch_list()
-
-if _is_cuda() and not compute_capabilities:
-    # If TORCH_CUDA_ARCH_LIST is not defined or empty, target all available
-    # GPUs on the current machine.
-    device_count = torch.cuda.device_count()
-    for i in range(device_count):
-        major, minor = torch.cuda.get_device_capability(i)
-        if major < 7:
-            raise RuntimeError(
-                "GPUs with compute capability below 7.0 are not supported.")
-        compute_capabilities.add(f"{major}.{minor}")
-
-ext_modules = []
-
-if _is_cuda():
-    nvcc_cuda_version = get_nvcc_cuda_version(CUDA_HOME)
-    if not compute_capabilities:
-        # If no GPU is specified nor available, add all supported architectures
-        # based on the NVCC CUDA version.
-        compute_capabilities = NVIDIA_SUPPORTED_ARCHS.copy()
-        if nvcc_cuda_version < Version("11.1"):
-            compute_capabilities.remove("8.6")
-        if nvcc_cuda_version < Version("11.8"):
-            compute_capabilities.remove("8.9")
-            compute_capabilities.remove("9.0")
-    # Validate the NVCC CUDA version.
-    if nvcc_cuda_version < Version("11.0"):
-        raise RuntimeError(
-            "CUDA 11.0 or higher is required to build the package.")
-    if (nvcc_cuda_version < Version("11.1")
-            and any(cc.startswith("8.6") for cc in compute_capabilities)):
-        raise RuntimeError(
-            "CUDA 11.1 or higher is required for compute capability 8.6.")
-    if nvcc_cuda_version < Version("11.8"):
-        if any(cc.startswith("8.9") for cc in compute_capabilities):
-            # CUDA 11.8 is required to generate the code targeting compute
-            # capability 8.9. However, GPUs with compute capability 8.9 can
-            # also run the code generated by the previous versions of CUDA 11
-            # and targeting compute capability 8.0. Therefore, if CUDA 11.8
-            # is not available, we target compute capability 8.0 instead of 8.9.
-            warnings.warn(
-                "CUDA 11.8 or higher is required for compute capability 8.9. "
-                "Targeting compute capability 8.0 instead.",
-                stacklevel=2)
-            compute_capabilities = set(cc for cc in compute_capabilities
-                                       if not cc.startswith("8.9"))
-            compute_capabilities.add("8.0+PTX")
-        if any(cc.startswith("9.0") for cc in compute_capabilities):
-            raise RuntimeError(
-                "CUDA 11.8 or higher is required for compute capability 9.0.")
-
-    NVCC_FLAGS_PUNICA = NVCC_FLAGS.copy()
-
-    # Add target compute capabilities to NVCC flags.
-    for capability in compute_capabilities:
-        num = capability[0] + capability[2]
-        NVCC_FLAGS += ["-gencode", f"arch=compute_{num},code=sm_{num}"]
-        if capability.endswith("+PTX"):
-            NVCC_FLAGS += [
-                "-gencode", f"arch=compute_{num},code=compute_{num}"
-            ]
-        if int(capability[0]) >= 8:
-            NVCC_FLAGS_PUNICA += [
-                "-gencode", f"arch=compute_{num},code=sm_{num}"
-            ]
-            if capability.endswith("+PTX"):
-                NVCC_FLAGS_PUNICA += [
-                    "-gencode", f"arch=compute_{num},code=compute_{num}"
-                ]
-
-    # Use NVCC threads to parallelize the build.
-    if nvcc_cuda_version >= Version("11.2"):
-        nvcc_threads = int(os.getenv("NVCC_THREADS", 8))
-        num_threads = min(os.cpu_count(), nvcc_threads)
-        NVCC_FLAGS += ["--threads", str(num_threads)]
-
-    if nvcc_cuda_version >= Version("11.8"):
-        NVCC_FLAGS += ["-DENABLE_FP8_E5M2"]
-
-    # changes for punica kernels
-    NVCC_FLAGS += torch_cpp_ext.COMMON_NVCC_FLAGS
-    REMOVE_NVCC_FLAGS = [
-        '-D__CUDA_NO_HALF_OPERATORS__',
-        '-D__CUDA_NO_HALF_CONVERSIONS__',
-        '-D__CUDA_NO_BFLOAT16_CONVERSIONS__',
-        '-D__CUDA_NO_HALF2_OPERATORS__',
-    ]
-    for flag in REMOVE_NVCC_FLAGS:
-        with contextlib.suppress(ValueError):
-            torch_cpp_ext.COMMON_NVCC_FLAGS.remove(flag)
-
-    install_punica = bool(int(os.getenv("VLLM_INSTALL_PUNICA_KERNELS", "0")))
-    device_count = torch.cuda.device_count()
-    for i in range(device_count):
-        major, minor = torch.cuda.get_device_capability(i)
-        if major < 8:
-            install_punica = False
-            break
-    if install_punica:
-        ext_modules.append(
-            CUDAExtension(
-                name="vllm._punica_C",
-                sources=["csrc/punica/punica_ops.cc"] +
-                glob("csrc/punica/bgmv/*.cu"),
-                extra_compile_args={
-                    "cxx": CXX_FLAGS,
-                    "nvcc": NVCC_FLAGS_PUNICA,
-                },
-            ))
-elif _is_neuron():
-    neuronxcc_version = get_neuronxcc_version()
-
-vllm_extension_sources = [
-    "csrc/cache_kernels.cu",
-    "csrc/attention/attention_kernels.cu",
-    "csrc/pos_encoding_kernels.cu",
-    "csrc/activation_kernels.cu",
-    "csrc/layernorm_kernels.cu",
-    "csrc/quantization/squeezellm/quant_cuda_kernel.cu",
-    "csrc/quantization/gptq/q_gemm.cu",
-    "csrc/cuda_utils_kernels.cu",
-    "csrc/moe_align_block_size_kernels.cu",
-    "csrc/pybind.cpp",
-]
-
-if _is_cuda():
-    vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
-    vllm_extension_sources.append(
-        "csrc/quantization/marlin/marlin_cuda_kernel.cu")
-    vllm_extension_sources.append("csrc/custom_all_reduce.cu")
-
-    # Add MoE kernels.
-    ext_modules.append(
-        CUDAExtension(
-            name="vllm._moe_C",
-            sources=glob("csrc/moe/*.cu") + glob("csrc/moe/*.cpp"),
-            extra_compile_args={
-                "cxx": CXX_FLAGS,
-                "nvcc": NVCC_FLAGS,
-            },
-        ))
-
-if not _is_neuron():
-    vllm_extension = CUDAExtension(
-        name="vllm._C",
-        sources=vllm_extension_sources,
-        extra_compile_args={
-            "cxx": CXX_FLAGS,
-            "nvcc": NVCC_FLAGS,
-        },
-        libraries=["cuda"] if _is_cuda() else [],
-    )
-    ext_modules.append(vllm_extension)
-
-
 def get_path(*filepath) -> str:
     return os.path.join(ROOT_DIR, *filepath)
 
@@ -401,7 +263,7 @@ def get_vllm_version() -> str:
     version = find_version(get_path("vllm", "__init__.py"))
 
     if _is_cuda():
-        cuda_version = str(nvcc_cuda_version)
+        cuda_version = str(get_nvcc_cuda_version())
         if cuda_version != MAIN_CUDA_VERSION:
             cuda_version_str = cuda_version.replace(".", "")[:3]
             version += f"+cu{cuda_version_str}"
@@ -413,7 +275,7 @@ def get_vllm_version() -> str:
             version += f"+rocm{rocm_version_str}"
     elif _is_neuron():
         # Get the Neuron version
-        neuron_version = str(neuronxcc_version)
+        neuron_version = str(get_neuronxcc_version())
         if neuron_version != MAIN_CUDA_VERSION:
             neuron_version_str = neuron_version.replace(".", "")[:3]
             version += f"+neuron{neuron_version_str}"
@@ -437,7 +299,7 @@ def get_requirements() -> List[str]:
     if _is_cuda():
         with open(get_path("requirements.txt")) as f:
             requirements = f.read().strip().split("\n")
-        if nvcc_cuda_version <= Version("11.8"):
+        if get_nvcc_cuda_version() <= Version("11.8"):
             # replace cupy-cuda12x with cupy-cuda11x for cuda 11.x
             for i in range(len(requirements)):
                 if requirements[i].startswith("cupy-cuda12x"):
@@ -456,14 +318,24 @@ def get_requirements() -> List[str]:
     return requirements
 
 
+ext_modules = []
+
+if _is_cuda():
+    ext_modules.append(CMakeExtension(name="vllm._moe_C"))
+
+    if _install_punica():
+        ext_modules.append(CMakeExtension(name="vllm._punica_C"))
+
+if not _is_neuron():
+    ext_modules.append(CMakeExtension(name="vllm._C"))
+
 package_data = {
     "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
 }
 if os.environ.get("VLLM_USE_PRECOMPILED"):
-    ext_modules = []
     package_data["vllm"].append("*.so")
 
-setuptools.setup(
+setup(
     name="vllm",
     version=get_vllm_version(),
     author="vLLM Team",
@@ -485,11 +357,11 @@ def get_requirements() -> List[str]:
         "License :: OSI Approved :: Apache Software License",
         "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
-    packages=setuptools.find_packages(exclude=("benchmarks", "csrc", "docs",
-                                               "examples", "tests")),
+    packages=find_packages(exclude=("benchmarks", "csrc", "docs", "examples",
+                                    "tests")),
     python_requires=">=3.8",
     install_requires=get_requirements(),
     ext_modules=ext_modules,
-    cmdclass={"build_ext": BuildExtension} if not _is_neuron() else {},
+    cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
     package_data=package_data,
 )

From 49eedea373043ee9d1b11b81b6c5b3bc24af5b77 Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Mon, 18 Mar 2024 15:56:40 -0700
Subject: [PATCH 133/196] [Core] Zero-copy asdict for InputMetadata (#3475)

---
 vllm/model_executor/input_metadata.py | 13 +++++++++++--
 vllm/worker/model_runner.py           |  3 +--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/input_metadata.py b/vllm/model_executor/input_metadata.py
index ebba0ba0a261a..01bba70ac10a8 100644
--- a/vllm/model_executor/input_metadata.py
+++ b/vllm/model_executor/input_metadata.py
@@ -1,5 +1,5 @@
-from dataclasses import dataclass
-from typing import Optional
+from dataclasses import dataclass, fields
+from typing import Optional, Any, Dict
 
 import torch
 
@@ -31,3 +31,12 @@ class InputMetadata:
     def __post_init__(self):
         # will not appear in the __repr__ and __init__
         self.attn_bias = None
+
+    def asdict_zerocopy(self) -> Dict[str, Any]:
+        """Similar to dataclasses.asdict, but avoids deepcopying."""
+        # Note that if we add dataclasses as fields, they will need
+        # similar handling.
+        return {
+            field.name: getattr(self, field.name)
+            for field in fields(self)
+        }
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 1ef783da6d08e..27213887ed265 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -1,5 +1,4 @@
 import contextlib
-import dataclasses
 import time
 from typing import Dict, List, Optional, Tuple, Set, Union
 
@@ -527,7 +526,7 @@ def prepare_input_tensors(
                 "lora_requests": lora_requests,
                 "lora_mapping": lora_mapping,
             }
-            metadata_dict.update(dataclasses.asdict(input_metadata))
+            metadata_dict.update(input_metadata.asdict_zerocopy())
             broadcast_tensor_dict(metadata_dict, src=0)
         else:
             metadata_dict = broadcast_tensor_dict(src=0)

From b30880a7626cfd4b3f593c995118513674a98880 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Mon, 18 Mar 2024 15:58:38 -0700
Subject: [PATCH 134/196] [Misc] Update README for the Third vLLM Meetup
 (#3479)

---
 README.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/README.md b/README.md
index 064faa550f267..f57c3f7862ed1 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,15 @@ Easy, fast, and cheap LLM serving for everyone
 
 ---
 
+**The Third vLLM Bay Area Meetup (April 2nd 6pm-8:30pm PT)**
+
+We are thrilled to announce our third vLLM Meetup!
+The vLLM team will share recent updates and roadmap.
+We will also have vLLM collaborators from Roblox coming up to the stage to discuss their experience in deploying LLMs with vLLM.
+Please register [here](https://robloxandvllmmeetup2024.splashthat.com/) and join us!
+
+---
+
 *Latest News* 🔥
 - [2024/01] We hosted [the second vLLM meetup](https://lu.ma/ygxbpzhl) in SF! Please find the meetup slides [here](https://docs.google.com/presentation/d/12mI2sKABnUw5RBWXDYY-HtHth4iMSNcEoQ10jDQbxgA/edit?usp=sharing).
 - [2024/01] Added ROCm 6.0 support to vLLM.

From b37cdce2b1125ac06829c2606be1e26d75b5a505 Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Mon, 18 Mar 2024 17:14:26 -0700
Subject: [PATCH 135/196] [Core] Cache some utils (#3474)

---
 vllm/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vllm/utils.py b/vllm/utils.py
index d4a8c962c3bfc..729a4332af967 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -4,6 +4,7 @@
 import subprocess
 import uuid
 import gc
+from functools import cache
 from platform import uname
 from typing import List, Tuple, Union
 from packaging.version import parse, Version
@@ -120,6 +121,7 @@ def is_hip() -> bool:
     return torch.version.hip is not None
 
 
+@cache
 def is_neuron() -> bool:
     try:
         import transformers_neuronx
@@ -128,6 +130,7 @@ def is_neuron() -> bool:
     return transformers_neuronx is not None
 
 
+@cache
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
     """Returns the maximum shared memory per thread block in bytes."""
     # NOTE: This import statement should be executed lazily since
@@ -151,6 +154,7 @@ def random_uuid() -> str:
     return str(uuid.uuid4().hex)
 
 
+@cache
 def in_wsl() -> bool:
     # Reference: https://github.com/microsoft/WSL/issues/4071
     return "microsoft" in " ".join(uname()).lower()
@@ -225,6 +229,7 @@ def set_cuda_visible_devices(device_ids: List[int]) -> None:
     os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, device_ids))
 
 
+@cache
 def get_nvcc_cuda_version() -> Optional[Version]:
     cuda_home = os.environ.get('CUDA_HOME')
     if not cuda_home:

From 6a9c583e73c75c8eab10a9c607cb096750b751a0 Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@gmail.com>
Date: Mon, 18 Mar 2024 21:06:23 -0700
Subject: [PATCH 136/196] [Core] print error before deadlock (#3459)

---
 vllm/engine/ray_utils.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/vllm/engine/ray_utils.py b/vllm/engine/ray_utils.py
index 742f3dc575190..27414f085b45a 100644
--- a/vllm/engine/ray_utils.py
+++ b/vllm/engine/ray_utils.py
@@ -33,8 +33,17 @@ def __getattr__(self, name):
             return getattr(self.worker, name)
 
         def execute_method(self, method, *args, **kwargs):
-            executor = getattr(self, method)
-            return executor(*args, **kwargs)
+            try:
+                executor = getattr(self, method)
+                return executor(*args, **kwargs)
+            except Exception as e:
+                # exceptions in ray worker may cause deadlock
+                # see https://github.com/vllm-project/vllm/issues/3455
+                # print the error and inform the user to solve the error
+                msg = (f"Error executing method {method}. "
+                       "This might cause deadlock in distributed execution.")
+                logger.exception(msg)
+                raise e
 
         def get_node_ip(self) -> str:
             return get_ip()

From ef65dcfa6f5820ce9e4a2411e9be18586f6fd467 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Mon, 18 Mar 2024 22:05:34 -0700
Subject: [PATCH 137/196] [Doc] Add docs about OpenAI compatible server (#3288)

---
 docs/requirements-docs.txt                    |   7 +
 docs/source/conf.py                           |   4 +-
 docs/source/dev/sampling_params.rst           |   4 +
 docs/source/index.rst                         |  10 +-
 docs/source/models/lora.rst                   |   2 +-
 docs/source/serving/integrations.rst          |  11 ++
 .../serving/openai_compatible_server.md       | 114 ++++++++++++
 vllm/entrypoints/openai/api_server.py         | 108 +-----------
 vllm/entrypoints/openai/cli_args.py           | 118 +++++++++++++
 vllm/entrypoints/openai/protocol.py           | 166 +++++++++++++-----
 10 files changed, 383 insertions(+), 161 deletions(-)
 create mode 100644 docs/source/dev/sampling_params.rst
 create mode 100644 docs/source/serving/integrations.rst
 create mode 100644 docs/source/serving/openai_compatible_server.md
 create mode 100644 vllm/entrypoints/openai/cli_args.py

diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt
index 95e54bd151850..96749b9327d7a 100644
--- a/docs/requirements-docs.txt
+++ b/docs/requirements-docs.txt
@@ -1,3 +1,10 @@
 sphinx == 6.2.1
 sphinx-book-theme == 1.0.1
 sphinx-copybutton == 0.5.2
+myst-parser == 2.0.0
+sphinx-argparse
+
+# packages to install to build the documentation
+pydantic
+-f https://download.pytorch.org/whl/cpu
+torch
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 61d24e1612128..2ca0d642b7463 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -22,7 +22,7 @@
 # -- Project information -----------------------------------------------------
 
 project = 'vLLM'
-copyright = '2023, vLLM Team'
+copyright = '2024, vLLM Team'
 author = 'the vLLM Team'
 
 # -- General configuration ---------------------------------------------------
@@ -37,6 +37,8 @@
     "sphinx_copybutton",
     "sphinx.ext.autodoc",
     "sphinx.ext.autosummary",
+    "myst_parser",
+    "sphinxarg.ext",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/source/dev/sampling_params.rst b/docs/source/dev/sampling_params.rst
new file mode 100644
index 0000000000000..844859b3ec1f0
--- /dev/null
+++ b/docs/source/dev/sampling_params.rst
@@ -0,0 +1,4 @@
+Sampling Params
+===============
+
+.. automodule:: vllm.sampling_params.SamplingParams
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 65bfbbabf8be1..72081588b1bcf 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -69,14 +69,11 @@ Documentation
    :maxdepth: 1
    :caption: Serving
 
-   serving/distributed_serving
-   serving/run_on_sky
-   serving/deploying_with_kserve
-   serving/deploying_with_triton
-   serving/deploying_with_bentoml
+   serving/openai_compatible_server
    serving/deploying_with_docker
-   serving/serving_with_langchain
+   serving/distributed_serving
    serving/metrics
+   serving/integrations
 
 .. toctree::
    :maxdepth: 1
@@ -98,6 +95,7 @@ Documentation
    :maxdepth: 2
    :caption: Developer Documentation
 
+   dev/sampling_params
    dev/engine/engine_index
    dev/kernel/paged_attention
 
diff --git a/docs/source/models/lora.rst b/docs/source/models/lora.rst
index f05fafe9f8279..2278640481a91 100644
--- a/docs/source/models/lora.rst
+++ b/docs/source/models/lora.rst
@@ -90,7 +90,7 @@ Requests can specify the LoRA adapter as if it were any other model via the ``mo
 processed according to the server-wide LoRA configuration (i.e. in parallel with base model requests, and potentially other
 LoRA adapter requests if they were provided and ``max_loras`` is set high enough).
 
-The following is an example request 
+The following is an example request
 
 .. code-block:: bash
 
diff --git a/docs/source/serving/integrations.rst b/docs/source/serving/integrations.rst
new file mode 100644
index 0000000000000..93872397913e3
--- /dev/null
+++ b/docs/source/serving/integrations.rst
@@ -0,0 +1,11 @@
+Integrations
+------------
+
+.. toctree::
+   :maxdepth: 1
+
+   run_on_sky
+   deploying_with_kserve
+   deploying_with_triton
+   deploying_with_bentoml
+   serving_with_langchain
diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md
new file mode 100644
index 0000000000000..032fe5d03bd52
--- /dev/null
+++ b/docs/source/serving/openai_compatible_server.md
@@ -0,0 +1,114 @@
+# OpenAI Compatible Server
+
+vLLM provides an HTTP server that implements OpenAI's [Completions](https://platform.openai.com/docs/api-reference/completions) and [Chat](https://platform.openai.com/docs/api-reference/chat) API.
+
+You can start the server using Python, or using [Docker](deploying_with_docker.rst):
+```bash
+python -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-hf --dtype float32 --api-key token-abc123
+```
+
+To call the server, you can use the official OpenAI Python client library, or any other HTTP client.
+```python
+from openai import OpenAI
+client = OpenAI(
+    base_url="http://localhost:8000/v1",
+    api_key="token-abc123",
+)
+
+completion = client.chat.completions.create(
+  model="meta-llama/Llama-2-7b-hf",
+  messages=[
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Hello!"}
+  ]
+)
+
+print(completion.choices[0].message)
+```
+
+## API Reference
+Please see the [OpenAI API Reference](https://platform.openai.com/docs/api-reference) for more information on the API. We support all parameters except:
+- Chat: `tools`, and `tool_choice`.
+- Completions: `suffix`.
+
+## Extra Parameters
+vLLM supports a set of parameters that are not part of the OpenAI API.
+In order to use them, you can pass them as extra parameters in the OpenAI client.
+Or directly merge them into the JSON payload if you are using HTTP call directly.
+
+```python
+completion = client.chat.completions.create(
+  model="meta-llama/Llama-2-7b-hf",
+  messages=[
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
+  ],
+  extra_body={
+    "guided_choice": ["positive", "negative"]
+  }
+)
+```
+
+### Extra Parameters for Chat API
+The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported.
+
+```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
+:language: python
+:start-after: begin-chat-completion-sampling-params
+:end-before: end-chat-completion-sampling-params
+```
+
+The following extra parameters are supported:
+
+```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
+:language: python
+:start-after: begin-chat-completion-extra-params
+:end-before: end-chat-completion-extra-params
+```
+
+### Extra Parameters for Completions API
+The following [sampling parameters (click through to see documentation)](../dev/sampling_params.rst) are supported.
+
+```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
+:language: python
+:start-after: begin-completion-sampling-params
+:end-before: end-completion-sampling-params
+```
+
+The following extra parameters are supported:
+
+```{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
+:language: python
+:start-after: begin-completion-extra-params
+:end-before: end-completion-extra-params
+```
+
+## Chat Template
+
+In order for the language model to support chat protocol, vLLM requires the model to include
+a chat template in its tokenizer configuration. The chat template is a Jinja2 template that
+specifies how are roles, messages, and other chat-specific tokens are encoded in the input.
+
+An example chat template for `meta-llama/Llama-2-7b-chat-hf` can be found [here](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/09bd0f49e16738cdfaa6e615203e126038736eb0/tokenizer_config.json#L12)
+
+Some models do not provide a chat template even though they are instruction/chat fine-tuned. For those model,
+you can manually specify their chat template in the `--chat-template` parameter with the file path to the chat
+template, or the template in string form. Without a chat template, the server will not be able to process chat
+and all chat requests will error.
+
+```bash
+python -m vllm.entrypoints.openai.api_server \
+  --model ... \
+  --chat-template ./path-to-chat-template.jinja
+```
+
+vLLM community provides a set of chat templates for popular models. You can find them in the examples
+directory [here](https://github.com/vllm-project/vllm/tree/main/examples/)
+
+## Command line arguments for the server
+
+```{argparse}
+:module: vllm.entrypoints.openai.cli_args
+:func: make_arg_parser
+:prog: vllm-openai-server
+```
\ No newline at end of file
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index e0626ca4e9da1..a0685a4d38fbe 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -1,11 +1,8 @@
-import argparse
 import asyncio
-import json
 from contextlib import asynccontextmanager
 import os
 import importlib
 import inspect
-import ssl
 
 from prometheus_client import make_asgi_app
 import fastapi
@@ -23,9 +20,9 @@
                                               ChatCompletionRequest,
                                               ErrorResponse)
 from vllm.logger import init_logger
+from vllm.entrypoints.openai.cli_args import make_arg_parser
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
 from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
-from vllm.entrypoints.openai.serving_engine import LoRA
 
 TIMEOUT_KEEP_ALIVE = 5  # seconds
 
@@ -51,109 +48,8 @@ async def _force_log():
 app = fastapi.FastAPI(lifespan=lifespan)
 
 
-class LoRAParserAction(argparse.Action):
-
-    def __call__(self, parser, namespace, values, option_string=None):
-        lora_list = []
-        for item in values:
-            name, path = item.split('=')
-            lora_list.append(LoRA(name, path))
-        setattr(namespace, self.dest, lora_list)
-
-
 def parse_args():
-    parser = argparse.ArgumentParser(
-        description="vLLM OpenAI-Compatible RESTful API server.")
-    parser.add_argument("--host", type=str, default=None, help="host name")
-    parser.add_argument("--port", type=int, default=8000, help="port number")
-    parser.add_argument(
-        "--uvicorn-log-level",
-        type=str,
-        default="info",
-        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
-        help="log level for uvicorn")
-    parser.add_argument("--allow-credentials",
-                        action="store_true",
-                        help="allow credentials")
-    parser.add_argument("--allowed-origins",
-                        type=json.loads,
-                        default=["*"],
-                        help="allowed origins")
-    parser.add_argument("--allowed-methods",
-                        type=json.loads,
-                        default=["*"],
-                        help="allowed methods")
-    parser.add_argument("--allowed-headers",
-                        type=json.loads,
-                        default=["*"],
-                        help="allowed headers")
-    parser.add_argument("--api-key",
-                        type=str,
-                        default=None,
-                        help="If provided, the server will require this key "
-                        "to be presented in the header.")
-    parser.add_argument("--served-model-name",
-                        type=str,
-                        default=None,
-                        help="The model name used in the API. If not "
-                        "specified, the model name will be the same as "
-                        "the huggingface name.")
-    parser.add_argument(
-        "--lora-modules",
-        type=str,
-        default=None,
-        nargs='+',
-        action=LoRAParserAction,
-        help="LoRA module configurations in the format name=path. "
-        "Multiple modules can be specified.")
-    parser.add_argument("--chat-template",
-                        type=str,
-                        default=None,
-                        help="The file path to the chat template, "
-                        "or the template in single-line form "
-                        "for the specified model")
-    parser.add_argument("--response-role",
-                        type=str,
-                        default="assistant",
-                        help="The role name to return if "
-                        "`request.add_generation_prompt=true`.")
-    parser.add_argument("--ssl-keyfile",
-                        type=str,
-                        default=None,
-                        help="The file path to the SSL key file")
-    parser.add_argument("--ssl-certfile",
-                        type=str,
-                        default=None,
-                        help="The file path to the SSL cert file")
-    parser.add_argument("--ssl-ca-certs",
-                        type=str,
-                        default=None,
-                        help="The CA certificates file")
-    parser.add_argument(
-        "--ssl-cert-reqs",
-        type=int,
-        default=int(ssl.CERT_NONE),
-        help="Whether client certificate is required (see stdlib ssl module's)"
-    )
-    parser.add_argument(
-        "--root-path",
-        type=str,
-        default=None,
-        help="FastAPI root_path when app is behind a path based routing proxy")
-    parser.add_argument(
-        "--middleware",
-        type=str,
-        action="append",
-        default=[],
-        help="Additional ASGI middleware to apply to the app. "
-        "We accept multiple --middleware arguments. "
-        "The value should be an import path. "
-        "If a function is provided, vLLM will add it to the server "
-        "using @app.middleware('http'). "
-        "If a class is provided, vLLM will add it to the server "
-        "using app.add_middleware(). ")
-
-    parser = AsyncEngineArgs.add_cli_args(parser)
+    parser = make_arg_parser()
     return parser.parse_args()
 
 
diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py
new file mode 100644
index 0000000000000..cc71931b97955
--- /dev/null
+++ b/vllm/entrypoints/openai/cli_args.py
@@ -0,0 +1,118 @@
+"""
+This file contains the command line arguments for the vLLM's
+OpenAI-compatible server. It is kept in a separate file for documentation
+purposes.
+"""
+
+import argparse
+import json
+import ssl
+
+from vllm.engine.arg_utils import AsyncEngineArgs
+from vllm.entrypoints.openai.serving_engine import LoRA
+
+
+class LoRAParserAction(argparse.Action):
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        lora_list = []
+        for item in values:
+            name, path = item.split('=')
+            lora_list.append(LoRA(name, path))
+        setattr(namespace, self.dest, lora_list)
+
+
+def make_arg_parser():
+    parser = argparse.ArgumentParser(
+        description="vLLM OpenAI-Compatible RESTful API server.")
+    parser.add_argument("--host", type=str, default=None, help="host name")
+    parser.add_argument("--port", type=int, default=8000, help="port number")
+    parser.add_argument(
+        "--uvicorn-log-level",
+        type=str,
+        default="info",
+        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
+        help="log level for uvicorn")
+    parser.add_argument("--allow-credentials",
+                        action="store_true",
+                        help="allow credentials")
+    parser.add_argument("--allowed-origins",
+                        type=json.loads,
+                        default=["*"],
+                        help="allowed origins")
+    parser.add_argument("--allowed-methods",
+                        type=json.loads,
+                        default=["*"],
+                        help="allowed methods")
+    parser.add_argument("--allowed-headers",
+                        type=json.loads,
+                        default=["*"],
+                        help="allowed headers")
+    parser.add_argument("--api-key",
+                        type=str,
+                        default=None,
+                        help="If provided, the server will require this key "
+                        "to be presented in the header.")
+    parser.add_argument("--served-model-name",
+                        type=str,
+                        default=None,
+                        help="The model name used in the API. If not "
+                        "specified, the model name will be the same as "
+                        "the huggingface name.")
+    parser.add_argument(
+        "--lora-modules",
+        type=str,
+        default=None,
+        nargs='+',
+        action=LoRAParserAction,
+        help="LoRA module configurations in the format name=path. "
+        "Multiple modules can be specified.")
+    parser.add_argument("--chat-template",
+                        type=str,
+                        default=None,
+                        help="The file path to the chat template, "
+                        "or the template in single-line form "
+                        "for the specified model")
+    parser.add_argument("--response-role",
+                        type=str,
+                        default="assistant",
+                        help="The role name to return if "
+                        "`request.add_generation_prompt=true`.")
+    parser.add_argument("--ssl-keyfile",
+                        type=str,
+                        default=None,
+                        help="The file path to the SSL key file")
+    parser.add_argument("--ssl-certfile",
+                        type=str,
+                        default=None,
+                        help="The file path to the SSL cert file")
+    parser.add_argument("--ssl-ca-certs",
+                        type=str,
+                        default=None,
+                        help="The CA certificates file")
+    parser.add_argument(
+        "--ssl-cert-reqs",
+        type=int,
+        default=int(ssl.CERT_NONE),
+        help="Whether client certificate is required (see stdlib ssl module's)"
+    )
+    parser.add_argument(
+        "--root-path",
+        type=str,
+        default=None,
+        help="FastAPI root_path when app is behind a path based routing proxy")
+    parser.add_argument(
+        "--middleware",
+        type=str,
+        action="append",
+        default=[],
+        help="Additional ASGI middleware to apply to the app. "
+        "We accept multiple --middleware arguments. "
+        "The value should be an import path. "
+        "If a function is provided, vLLM will add it to the server "
+        "using @app.middleware('http'). "
+        "If a class is provided, vLLM will add it to the server "
+        "using app.add_middleware(). ")
+
+    parser = AsyncEngineArgs.add_cli_args(parser)
+    return parser
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 9421880411611..1f089d524fd03 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -61,41 +61,80 @@ class ResponseFormat(BaseModel):
 
 
 class ChatCompletionRequest(BaseModel):
-    model: str
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/chat/create
     messages: List[Dict[str, str]]
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 1.0
-    n: Optional[int] = 1
+    model: str
+    frequency_penalty: Optional[float] = 0.0
+    logit_bias: Optional[Dict[str, float]] = None
+    logprobs: Optional[bool] = False
+    top_logprobs: Optional[int] = None
     max_tokens: Optional[int] = None
+    n: Optional[int] = 1
+    presence_penalty: Optional[float] = 0.0
+    response_format: Optional[ResponseFormat] = None
     seed: Optional[int] = None
     stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
     stream: Optional[bool] = False
-    logprobs: Optional[bool] = False
-    top_logprobs: Optional[int] = None
-    presence_penalty: Optional[float] = 0.0
-    frequency_penalty: Optional[float] = 0.0
-    logit_bias: Optional[Dict[str, float]] = None
+    temperature: Optional[float] = 0.7
+    top_p: Optional[float] = 1.0
     user: Optional[str] = None
-    # Additional parameters supported by vLLM
+
+    # doc: begin-chat-completion-sampling-params
     best_of: Optional[int] = None
-    top_k: Optional[int] = -1
-    ignore_eos: Optional[bool] = False
     use_beam_search: Optional[bool] = False
+    top_k: Optional[int] = -1
+    min_p: Optional[float] = 0.0
+    repetition_penalty: Optional[float] = 1.0
+    length_penalty: Optional[float] = 1.0
     early_stopping: Optional[bool] = False
+    ignore_eos: Optional[bool] = False
     stop_token_ids: Optional[List[int]] = Field(default_factory=list)
     skip_special_tokens: Optional[bool] = True
     spaces_between_special_tokens: Optional[bool] = True
-    add_generation_prompt: Optional[bool] = True
-    echo: Optional[bool] = False
-    repetition_penalty: Optional[float] = 1.0
-    min_p: Optional[float] = 0.0
-    include_stop_str_in_output: Optional[bool] = False
-    length_penalty: Optional[float] = 1.0
-    guided_json: Optional[Union[str, dict, BaseModel]] = None
-    guided_regex: Optional[str] = None
-    guided_choice: Optional[List[str]] = None
-    guided_grammar: Optional[str] = None
-    response_format: Optional[ResponseFormat] = None
+    # doc: end-chat-completion-sampling-params
+
+    # doc: begin-chat-completion-extra-params
+    echo: Optional[bool] = Field(
+        default=False,
+        description=(
+            "If true, the new message will be prepended with the last message "
+            "if they belong to the same role."),
+    )
+    add_generation_prompt: Optional[bool] = Field(
+        default=True,
+        description=
+        ("If true, the generation prompt will be added to the chat template. "
+         "This is a parameter used by chat template in tokenizer config of the "
+         "model."),
+    )
+    include_stop_str_in_output: Optional[bool] = Field(
+        default=False,
+        description=(
+            "Whether to include the stop string in the output. "
+            "This is only applied when the stop or stop_token_ids is set."),
+    )
+    guided_json: Optional[Union[str, dict, BaseModel]] = Field(
+        default=None,
+        description=("If specified, the output will follow the JSON schema."),
+    )
+    guided_regex: Optional[str] = Field(
+        default=None,
+        description=(
+            "If specified, the output will follow the regex pattern."),
+    )
+    guided_choice: Optional[List[str]] = Field(
+        default=None,
+        description=(
+            "If specified, the output will be exactly one of the choices."),
+    )
+    guided_grammar: Optional[str] = Field(
+        default=None,
+        description=(
+            "If specified, the output will follow the context free grammar."),
+    )
+
+    # doc: end-chat-completion-extra-params
 
     def to_sampling_params(self) -> SamplingParams:
         if self.logprobs and not self.top_logprobs:
@@ -157,41 +196,74 @@ def check_guided_decoding_count(cls, data):
 
 
 class CompletionRequest(BaseModel):
+    # Ordered by official OpenAI API documentation
+    # https://platform.openai.com/docs/api-reference/completions/create
     model: str
-    # a string, array of strings, array of tokens, or array of token arrays
     prompt: Union[List[int], List[List[int]], str, List[str]]
-    suffix: Optional[str] = None
-    max_tokens: Optional[int] = 16
-    temperature: Optional[float] = 1.0
-    top_p: Optional[float] = 1.0
-    n: Optional[int] = 1
-    stream: Optional[bool] = False
-    logprobs: Optional[int] = None
+    best_of: Optional[int] = None
     echo: Optional[bool] = False
-    stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
-    seed: Optional[int] = None
-    presence_penalty: Optional[float] = 0.0
     frequency_penalty: Optional[float] = 0.0
-    best_of: Optional[int] = None
     logit_bias: Optional[Dict[str, float]] = None
+    logprobs: Optional[int] = None
+    max_tokens: Optional[int] = 16
+    n: Optional[int] = 1
+    presence_penalty: Optional[float] = 0.0
+    seed: Optional[int] = None
+    stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
+    stream: Optional[bool] = False
+    suffix: Optional[str] = None
+    temperature: Optional[float] = 1.0
+    top_p: Optional[float] = 1.0
     user: Optional[str] = None
-    # Additional parameters supported by vLLM
-    top_k: Optional[int] = -1
-    ignore_eos: Optional[bool] = False
+
+    # doc: begin-completion-sampling-params
     use_beam_search: Optional[bool] = False
+    top_k: Optional[int] = -1
+    min_p: Optional[float] = 0.0
+    repetition_penalty: Optional[float] = 1.0
+    length_penalty: Optional[float] = 1.0
     early_stopping: Optional[bool] = False
     stop_token_ids: Optional[List[int]] = Field(default_factory=list)
+    ignore_eos: Optional[bool] = False
     skip_special_tokens: Optional[bool] = True
     spaces_between_special_tokens: Optional[bool] = True
-    repetition_penalty: Optional[float] = 1.0
-    min_p: Optional[float] = 0.0
-    include_stop_str_in_output: Optional[bool] = False
-    length_penalty: Optional[float] = 1.0
-    guided_json: Optional[Union[str, dict, BaseModel]] = None
-    guided_regex: Optional[str] = None
-    guided_choice: Optional[List[str]] = None
-    guided_grammar: Optional[str] = None
-    response_format: Optional[ResponseFormat] = None
+    # doc: end-completion-sampling-params
+
+    # doc: begin-completion-extra-params
+    include_stop_str_in_output: Optional[bool] = Field(
+        default=False,
+        description=(
+            "Whether to include the stop string in the output. "
+            "This is only applied when the stop or stop_token_ids is set."),
+    )
+    response_format: Optional[ResponseFormat] = Field(
+        default=None,
+        description=
+        ("Similar to chat completion, this parameter specifies the format of "
+         "output. Only {'type': 'json_object'} or {'type': 'text' } is "
+         "supported."),
+    )
+    guided_json: Optional[Union[str, dict, BaseModel]] = Field(
+        default=None,
+        description=("If specified, the output will follow the JSON schema."),
+    )
+    guided_regex: Optional[str] = Field(
+        default=None,
+        description=(
+            "If specified, the output will follow the regex pattern."),
+    )
+    guided_choice: Optional[List[str]] = Field(
+        default=None,
+        description=(
+            "If specified, the output will be exactly one of the choices."),
+    )
+    guided_grammar: Optional[str] = Field(
+        default=None,
+        description=(
+            "If specified, the output will follow the context free grammar."),
+    )
+
+    # doc: end-completion-extra-params
 
     def to_sampling_params(self):
         echo_without_generation = self.echo and self.max_tokens == 0

From 7341c77d693edcecf0a9f5a6e399c5137177dfba Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Mon, 18 Mar 2024 23:05:20 -0700
Subject: [PATCH 138/196] [BugFix] Avoid initializing CUDA too early (#3487)

---
 vllm/config.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index f792e89095246..51ae66e2375ab 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -577,12 +577,12 @@ class DeviceConfig:
     def __init__(self, device: str = "auto") -> None:
         if device == "auto":
             # Automated device type detection
-            if torch.cuda.is_available():
-                self.device_type = "cuda"
-            elif is_neuron():
+            if is_neuron():
                 self.device_type = "neuron"
             else:
-                raise RuntimeError("No supported device detected.")
+                # We don't call torch.cuda.is_available() here to
+                # avoid initializing CUDA before workers are forked
+                self.device_type = "cuda"
         else:
             # Device type is assigned explicitly
             self.device_type = device

From c614cfee5861e5715a023fa501e432d4acf910fe Mon Sep 17 00:00:00 2001
From: ifsheldon <39153080+ifsheldon@users.noreply.github.com>
Date: Wed, 20 Mar 2024 01:54:59 +0800
Subject: [PATCH 139/196] Update dockerfile with ModelScope support (#3429)

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 6a56a33cfe7ac..1f254c76fe5af 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -122,7 +122,7 @@ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,ta
 FROM vllm-base AS vllm-openai
 # install additional dependencies for openai api server
 RUN --mount=type=cache,target=/root/.cache/pip \
-    pip install accelerate hf_transfer
+    pip install accelerate hf_transfer modelscope
 
 COPY --from=build /workspace/vllm/*.so /workspace/vllm/
 COPY vllm vllm

From 2a60c9bd174c4eaba790ecb36d13fa4c145d99f4 Mon Sep 17 00:00:00 2001
From: Jim Burtoft <39492751+jimburtoft@users.noreply.github.com>
Date: Tue, 19 Mar 2024 16:21:35 -0400
Subject: [PATCH 140/196] [Doc] minor fix to neuron-installation.rst (#3505)

---
 docs/source/getting_started/neuron-installation.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/getting_started/neuron-installation.rst b/docs/source/getting_started/neuron-installation.rst
index 0aff1037d8a29..62bf779c339d5 100644
--- a/docs/source/getting_started/neuron-installation.rst
+++ b/docs/source/getting_started/neuron-installation.rst
@@ -128,6 +128,7 @@ Once neuronx-cc and transformers-neuronx packages are installed, we will be able
 
 .. code-block:: console
 
+    $ git clone https://github.com/vllm-project/vllm.git
     $ cd vllm
     $ pip install -U -r requirements-neuron.txt
     $ pip install .

From cc63d03fbb93f2b984d38e1f5626f523c1f9f1a4 Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Tue, 19 Mar 2024 13:22:58 -0700
Subject: [PATCH 141/196] Revert "[Core] Cache some utils" (#3507)

---
 vllm/utils.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index 729a4332af967..d4a8c962c3bfc 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -4,7 +4,6 @@
 import subprocess
 import uuid
 import gc
-from functools import cache
 from platform import uname
 from typing import List, Tuple, Union
 from packaging.version import parse, Version
@@ -121,7 +120,6 @@ def is_hip() -> bool:
     return torch.version.hip is not None
 
 
-@cache
 def is_neuron() -> bool:
     try:
         import transformers_neuronx
@@ -130,7 +128,6 @@ def is_neuron() -> bool:
     return transformers_neuronx is not None
 
 
-@cache
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
     """Returns the maximum shared memory per thread block in bytes."""
     # NOTE: This import statement should be executed lazily since
@@ -154,7 +151,6 @@ def random_uuid() -> str:
     return str(uuid.uuid4().hex)
 
 
-@cache
 def in_wsl() -> bool:
     # Reference: https://github.com/microsoft/WSL/issues/4071
     return "microsoft" in " ".join(uname()).lower()
@@ -229,7 +225,6 @@ def set_cuda_visible_devices(device_ids: List[int]) -> None:
     os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, device_ids))
 
 
-@cache
 def get_nvcc_cuda_version() -> Optional[Version]:
     cuda_home = os.environ.get('CUDA_HOME')
     if not cuda_home:

From 63e8b28a990ef1584975c642b1ee5ae8a65b3183 Mon Sep 17 00:00:00 2001
From: Jim Burtoft <39492751+jimburtoft@users.noreply.github.com>
Date: Tue, 19 Mar 2024 16:32:30 -0400
Subject: [PATCH 142/196] [Doc] minor fix of spelling in amd-installation.rst
 (#3506)

---
 docs/source/getting_started/amd-installation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/getting_started/amd-installation.rst b/docs/source/getting_started/amd-installation.rst
index 5d9fdf4056709..3d736bf7120ec 100644
--- a/docs/source/getting_started/amd-installation.rst
+++ b/docs/source/getting_started/amd-installation.rst
@@ -100,7 +100,7 @@ You can build and install vLLM from source:
 
 Build a docker image from `Dockerfile.rocm`, and launch a docker container.
 
-The `Dokerfile.rocm` is designed to support both ROCm 5.7 and ROCm 6.0 and later versions. It provides flexibility to customize the build of docker image using the following arguments:
+The `Dockerfile.rocm` is designed to support both ROCm 5.7 and ROCm 6.0 and later versions. It provides flexibility to customize the build of docker image using the following arguments:
 
 * `BASE_IMAGE`: specifies the base image used when running ``docker build``, specifically the PyTorch on ROCm base image. We have tested ROCm 5.7 and ROCm 6.0. The default is `rocm/pytorch:rocm6.0_ubuntu20.04_py3.9_pytorch_2.1.1`
 * `FX_GFX_ARCHS`: specifies the GFX architecture that is used to build flash-attention, for example, `gfx90a;gfx942` for MI200 and MI300. The default is `gfx90a;gfx942`

From 20478c4d3abcd0aa8a1d9ace9c76ea3a2e04cb5e Mon Sep 17 00:00:00 2001
From: Simon Mo <simon.mo@hey.com>
Date: Tue, 19 Mar 2024 14:34:15 -0700
Subject: [PATCH 143/196] Use lru_cache for some environment detection utils
 (#3508)

---
 vllm/utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vllm/utils.py b/vllm/utils.py
index d4a8c962c3bfc..7c73062e809f3 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -11,7 +11,7 @@
 import psutil
 import torch
 import asyncio
-from functools import partial
+from functools import partial, lru_cache
 from typing import (
     Awaitable,
     Callable,
@@ -120,6 +120,7 @@ def is_hip() -> bool:
     return torch.version.hip is not None
 
 
+@lru_cache(maxsize=None)
 def is_neuron() -> bool:
     try:
         import transformers_neuronx
@@ -128,6 +129,7 @@ def is_neuron() -> bool:
     return transformers_neuronx is not None
 
 
+@lru_cache(maxsize=None)
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
     """Returns the maximum shared memory per thread block in bytes."""
     # NOTE: This import statement should be executed lazily since
@@ -151,6 +153,7 @@ def random_uuid() -> str:
     return str(uuid.uuid4().hex)
 
 
+@lru_cache(maxsize=None)
 def in_wsl() -> bool:
     # Reference: https://github.com/microsoft/WSL/issues/4071
     return "microsoft" in " ".join(uname()).lower()
@@ -225,6 +228,7 @@ def set_cuda_visible_devices(device_ids: List[int]) -> None:
     os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, device_ids))
 
 
+@lru_cache(maxsize=None)
 def get_nvcc_cuda_version() -> Optional[Version]:
     cuda_home = os.environ.get('CUDA_HOME')
     if not cuda_home:

From 9474e89ba4ecae253b585eb6b3e1d85f4e108f01 Mon Sep 17 00:00:00 2001
From: ElizaWszola <eliza@neuralmagic.com>
Date: Wed, 20 Mar 2024 08:11:11 +0100
Subject: [PATCH 144/196] [PREFIX CACHING FOLLOW UP] A bunch of fixes to block
 allocator performance when automatic prefix caching is disabled (#3357)

Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
---
 tests/core/test_block_manager.py            |  14 +-
 tests/prefix_caching/test_prefix_caching.py |  12 +-
 vllm/core/block_manager.py                  | 189 +++++++++++++++-----
 vllm/core/evictor.py                        |  71 +-------
 4 files changed, 165 insertions(+), 121 deletions(-)

diff --git a/tests/core/test_block_manager.py b/tests/core/test_block_manager.py
index 44ac05a1430b3..9473a33f0ee68 100644
--- a/tests/core/test_block_manager.py
+++ b/tests/core/test_block_manager.py
@@ -4,7 +4,7 @@
 
 from vllm import SamplingParams
 from vllm.block import PhysicalTokenBlock
-from vllm.core.block_manager import (BlockAllocator, BlockSpaceManager,
+from vllm.core.block_manager import (UncachedBlockAllocator, BlockSpaceManager,
                                      AllocStatus)
 from vllm.utils import Device
 from vllm.sequence import Sequence, SequenceGroup, SequenceStatus, Logprob
@@ -15,7 +15,8 @@
 def test_block_allocator_allocate():
     block_size = 4
     num_cpu_blocks = 4
-    cpu_allocator = BlockAllocator(Device.CPU, block_size, num_cpu_blocks)
+    cpu_allocator = UncachedBlockAllocator(Device.CPU, block_size,
+                                           num_cpu_blocks)
 
     # Allocate all available cpu blocks.
     num_free = num_cpu_blocks
@@ -24,7 +25,7 @@ def test_block_allocator_allocate():
         block = cpu_allocator.allocate()
         num_free -= 1
 
-        assert block.block_hash not in cpu_allocator.evictor
+        assert block not in cpu_allocator.free_blocks
         assert cpu_allocator.get_num_free_blocks() == num_free
 
     with pytest.raises(ValueError):
@@ -34,14 +35,15 @@ def test_block_allocator_allocate():
 def test_block_allocator_free():
     block_size = 4
     num_cpu_blocks = 4
-    cpu_allocator = BlockAllocator(Device.CPU, block_size, num_cpu_blocks)
+    cpu_allocator = UncachedBlockAllocator(Device.CPU, block_size,
+                                           num_cpu_blocks)
 
     # Allocate all available cpu blocks.
     blocks: List[PhysicalTokenBlock] = []
     for _ in range(num_cpu_blocks):
         block = cpu_allocator.allocate()
         blocks.append(block)
-        assert block.block_hash not in cpu_allocator.evictor
+        assert block not in cpu_allocator.free_blocks
 
     # Free all allocated cpu blocks.
     num_free = 0
@@ -49,7 +51,7 @@ def test_block_allocator_free():
     for block in blocks:
         cpu_allocator.free(block)
         num_free += 1
-        assert block.block_hash in cpu_allocator.evictor
+        assert block in cpu_allocator.free_blocks
         assert cpu_allocator.get_num_free_blocks() == num_free
 
         with pytest.raises(ValueError):
diff --git a/tests/prefix_caching/test_prefix_caching.py b/tests/prefix_caching/test_prefix_caching.py
index c83551c36ef10..cb61aac3975a8 100644
--- a/tests/prefix_caching/test_prefix_caching.py
+++ b/tests/prefix_caching/test_prefix_caching.py
@@ -4,7 +4,7 @@
 """
 import pytest
 
-from vllm.core.block_manager import BlockAllocator
+from vllm.core.block_manager import CachedBlockAllocator
 from vllm.utils import Device
 
 
@@ -15,10 +15,7 @@ def test_block_allocator(
     num_blocks: int,
 ):
     block_hash = 1
-    block_allocator = BlockAllocator(Device.CPU,
-                                     block_size,
-                                     num_blocks,
-                                     enable_caching=True)
+    block_allocator = CachedBlockAllocator(Device.CPU, block_size, num_blocks)
 
     # Allocate two PysicalTokenBlocks with the same hash and check
     # that they are the same PhysicalTokenBlock
@@ -45,10 +42,7 @@ def test_block_allocator(
 @pytest.mark.parametrize("num_blocks", [16])
 def test_eviction(num_blocks: int, ):
     block_size = 16
-    block_allocator = BlockAllocator(Device.CPU,
-                                     block_size,
-                                     num_blocks,
-                                     enable_caching=True)
+    block_allocator = CachedBlockAllocator(Device.CPU, block_size, num_blocks)
     blocks = []
 
     for i in range(num_blocks):
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 8b089a5650f48..ad9b557fd9a83 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -3,6 +3,7 @@
 from itertools import count, takewhile
 from os.path import commonprefix
 from typing import Dict, List, Optional, Set, Tuple
+from abc import ABC, abstractmethod
 
 from vllm.block import BlockTable, PhysicalTokenBlock
 from vllm.sequence import Sequence, SequenceGroup, SequenceStatus
@@ -10,7 +11,7 @@
 from vllm.core.evictor import Evictor, EvictionPolicy, make_evictor
 
 
-class BlockAllocator:
+class BlockAllocatorBase(ABC):
     """Manages free physical token blocks for a device.
 
     The allocator maintains a list of free blocks and allocates a block when
@@ -18,23 +19,57 @@ class BlockAllocator:
     the reference count becomes zero, the block is added back to the free list.
     """
 
+    @abstractmethod
     def __init__(self,
                  device: Device,
                  block_size: int,
                  num_blocks: int,
-                 eviction_policy: EvictionPolicy = EvictionPolicy.LRU,
-                 enable_caching: bool = False) -> None:
+                 eviction_policy: EvictionPolicy = EvictionPolicy.LRU):
+        pass
+
+    @abstractmethod
+    def allocate(self,
+                 block_hash: Optional[int] = None,
+                 num_hashed_tokens: int = 0) -> PhysicalTokenBlock:
+        pass
+
+    @abstractmethod
+    def free(self, block: PhysicalTokenBlock) -> None:
+        pass
+
+    @abstractmethod
+    def get_num_free_blocks(self) -> int:
+        pass
+
+    @abstractmethod
+    def contains_block(self, block_hash: int) -> bool:
+        pass
+
+    @abstractmethod
+    def update_hash(self, block_hash: int, block: PhysicalTokenBlock):
+        pass
+
+
+class CachedBlockAllocator(BlockAllocatorBase):
+    """Manages free physical token blocks for a device.
+
+    The allocator maintains a list of free blocks and allocates a block when
+    requested. When a block is freed, its reference count is decremented. If
+    the reference count becomes zero, the block is added back to the free list.
+    """
+
+    def __init__(self,
+                 device: Device,
+                 block_size: int,
+                 num_blocks: int,
+                 eviction_policy: EvictionPolicy = EvictionPolicy.LRU) -> None:
         self.device = device
         self.block_size = block_size
         self.num_blocks = num_blocks
-        self.enable_caching = enable_caching
 
         self.current_num_blocks = 0
         self.cached_blocks: Dict[int, PhysicalTokenBlock] = {}
 
-        # Switch over to FIFO eviction when caching is disabled
-        if not self.enable_caching:
-            eviction_policy = EvictionPolicy.FIFO
         self.evictor: Evictor = make_evictor(eviction_policy)
 
         self.default_hash_ctr = count()
@@ -57,13 +92,6 @@ def allocate_block(self, block_hash: int,
     def allocate(self,
                  block_hash: Optional[int] = None,
                  num_hashed_tokens: int = 0) -> PhysicalTokenBlock:
-        # If caching is disabled, just allocate a new block and return it
-        if not self.enable_caching:
-            block = self.allocate_block(next(self.default_hash_ctr),
-                                        num_hashed_tokens)
-            block.ref_count += 1
-            return block
-
         if block_hash is None:
             block_hash = next(self.default_hash_ctr)
         if block_hash in self.evictor:
@@ -90,9 +118,8 @@ def free(self, block: PhysicalTokenBlock) -> None:
             assert block.block_hash not in self.evictor
             self.evictor.add(block)
 
-            # If caching is enabled, remove the block from the cached_blocks
-            if self.enable_caching:
-                del self.cached_blocks[block.block_hash]
+            # Remove the block from the cached_blocks
+            del self.cached_blocks[block.block_hash]
 
     def get_num_free_blocks(self) -> int:
         return (self.num_blocks - self.current_num_blocks +
@@ -102,14 +129,68 @@ def contains_block(self, block_hash: int) -> bool:
         return block_hash in self.cached_blocks or block_hash in self.evictor
 
     def update_hash(self, block_hash: int, block: PhysicalTokenBlock):
-        # If caching is enabled, update the hash of block and the
-        # cached_blocks dictionary.
-        if self.enable_caching:
-            assert not self.contains_block(block_hash)
-            old_hash = block.block_hash
-            block.block_hash = block_hash
-            del self.cached_blocks[old_hash]
-            self.cached_blocks[block_hash] = block
+        # Update the hash of block and the cached_blocks dictionary.
+        assert not self.contains_block(block_hash)
+        old_hash = block.block_hash
+        block.block_hash = block_hash
+        del self.cached_blocks[old_hash]
+        self.cached_blocks[block_hash] = block
+
+
+class UncachedBlockAllocator(BlockAllocatorBase):
+    """Manages free physical token blocks for a device.
+
+    The allocator maintains a list of free blocks and allocates a block when
+    requested. When a block is freed, its reference count is decremented. If
+    the reference count becomes zero, the block is added back to the free list.
+    """
+
+    def __init__(
+        self,
+        device: Device,
+        block_size: int,
+        num_blocks: int,
+    ) -> None:
+        self.device = device
+        self.block_size = block_size
+        self.num_blocks = num_blocks
+
+        # Initialize the free blocks.
+        self.free_blocks: BlockTable = []
+        for i in range(num_blocks):
+            block = PhysicalTokenBlock(device=device,
+                                       block_number=i,
+                                       block_size=block_size,
+                                       block_hash=-1,
+                                       num_hashed_tokens=0)
+            self.free_blocks.append(block)
+
+    def allocate(self,
+                 block_hash: Optional[int] = None,
+                 num_hashed_tokens: int = 0) -> PhysicalTokenBlock:
+        if not self.free_blocks:
+            raise ValueError("Out of memory! No free blocks are available.")
+        block = self.free_blocks.pop()
+        block.ref_count = 1
+        return block
+
+    def free(self, block: PhysicalTokenBlock) -> None:
+        if block.ref_count == 0:
+            raise ValueError(f"Double free! {block} is already freed.")
+        block.ref_count -= 1
+        if block.ref_count == 0:
+            self.free_blocks.append(block)
+
+    def get_num_free_blocks(self) -> int:
+        return len(self.free_blocks)
+
+    def contains_block(self, block_hash: int) -> bool:
+        raise NotImplementedError(
+            "Invalid codepath for uncached block allocator.")
+
+    def update_hash(self, block_hash: int, block: PhysicalTokenBlock):
+        raise NotImplementedError(
+            "Invalid codepath for uncached block allocator.")
 
 
 class AllocStatus(enum.Enum):
@@ -142,6 +223,10 @@ def __init__(
         self.num_total_gpu_blocks = num_gpu_blocks
         self.num_total_cpu_blocks = num_cpu_blocks
 
+        if enable_caching and sliding_window is not None:
+            raise NotImplementedError(
+                "Sliding window is not allowed with prefix caching enabled!")
+
         self.block_sliding_window = None
         if sliding_window is not None:
             assert sliding_window % block_size == 0, (sliding_window,
@@ -154,14 +239,17 @@ def __init__(
         self.enable_caching = enable_caching
 
         self.watermark_blocks = int(watermark * num_gpu_blocks)
-        self.gpu_allocator = BlockAllocator(Device.GPU,
-                                            block_size,
-                                            num_gpu_blocks,
-                                            enable_caching=enable_caching)
-        self.cpu_allocator = BlockAllocator(Device.CPU,
-                                            block_size,
-                                            num_cpu_blocks,
-                                            enable_caching=enable_caching)
+
+        if self.enable_caching:
+            self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size,
+                                                      num_gpu_blocks)
+            self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size,
+                                                      num_cpu_blocks)
+        else:
+            self.gpu_allocator = UncachedBlockAllocator(
+                Device.GPU, block_size, num_gpu_blocks)
+            self.cpu_allocator = UncachedBlockAllocator(
+                Device.CPU, block_size, num_cpu_blocks)
         # Mapping: seq_id -> BlockTable.
         self.block_tables: Dict[int, BlockTable] = {}
 
@@ -198,10 +286,16 @@ def allocate(self, seq_group: SequenceGroup) -> None:
             if (self.block_sliding_window is not None
                     and logical_idx >= self.block_sliding_window):
                 block = block_table[logical_idx % self.block_sliding_window]
-            else:
+                # Set the reference counts of the token blocks.
+                block.ref_count = seq_group.num_seqs()
+            elif self.enable_caching:
                 block = self.gpu_allocator.allocate(
                     seq.hash_of_block(logical_idx),
                     seq.num_hashed_tokens_of_block(logical_idx))
+            else:
+                block = self.gpu_allocator.allocate()
+                # Set the reference counts of the token blocks.
+                block.ref_count = seq_group.num_seqs()
             block_table.append(block)
 
         # Assign the block table for each sequence.
@@ -220,8 +314,10 @@ def _promote_last_block(
         seq: Sequence,
         last_block: PhysicalTokenBlock,
     ) -> PhysicalTokenBlock:
-        # Compute a new hash for the block so that it can be shared by
-        # other Sequences
+        assert self.enable_caching
+
+        # Compute a new hash for the block so that it can be shared by other
+        # Sequences
         new_hash = seq.hash_of_block(len(seq.logical_token_blocks) - 1)
 
         # if new_hash is already in the cached table, then free last_block
@@ -254,6 +350,8 @@ def _allocate_last_physical_block(
         self,
         seq: Sequence,
     ) -> PhysicalTokenBlock:
+        if not self.enable_caching:
+            return self.gpu_allocator.allocate()
         block_hash: Optional[int] = None
         if (self._is_last_block_full(seq)):
             block_hash = seq.hash_of_block(len(seq.logical_token_blocks) - 1)
@@ -293,10 +391,12 @@ def append_slot(
         assert last_block.device == Device.GPU
         if last_block.ref_count == 1:
             # Not shared with other sequences. Appendable.
-            # If the last block is now complete, promote it to a full block so
-            # that it can be shared
-            new_block = self._maybe_promote_last_block(seq, last_block)
-            block_table[-1] = new_block
+            if self.enable_caching:
+                # If the last block is now complete, we may reuse an old block
+                # to save memory.
+                maybe_new_block = self._maybe_promote_last_block(
+                    seq, last_block)
+                block_table[-1] = maybe_new_block
             return None
         else:
             # The last block is shared with other sequences.
@@ -440,9 +540,12 @@ def access_all_blocks_in_seq(
         seq: Sequence,
         access_time: float,
     ) -> None:
-        block_table = self.block_tables[seq.seq_id]
-        for block in block_table:
-            block.last_accessed = access_time
+        if self.enable_caching:
+            # Update the last accessed time of all the blocks accessed
+            # in this step.
+            block_table = self.block_tables[seq.seq_id]
+            for block in block_table:
+                block.last_accessed = access_time
 
     def compute_full_blocks_in_seq(self, seq: Sequence):
         if seq.seq_id not in self.block_tables:
diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py
index 1d81f5a97d71c..9f401cba3fbea 100644
--- a/vllm/core/evictor.py
+++ b/vllm/core/evictor.py
@@ -1,5 +1,5 @@
 import enum
-from typing import Dict, List, Optional
+from typing import Dict
 from abc import ABC, abstractmethod, abstractproperty
 
 from vllm.block import PhysicalTokenBlock
@@ -10,7 +10,6 @@ class EvictionPolicy(enum.Enum):
        Evictor subclass.
     """
     LRU = enum.auto()
-    FIFO = enum.auto()
 
 
 class Evictor(ABC):
@@ -66,37 +65,18 @@ def __contains__(self, block_hash: int) -> bool:
 
     # TODO: The performance of this evict function can be optimized further.
     def evict(self) -> PhysicalTokenBlock:
-        free_blocks: List[PhysicalTokenBlock] = list(self.free_table.values())
-        if len(free_blocks) == 0:
+        if len(self.free_table) == 0:
             raise ValueError("No usable cache memory left")
+        free_blocks = self.free_table.values()
 
-        # Find lowest timestamp
-        lowest_timestamp = free_blocks[0].last_accessed
-        for block in free_blocks:
-            if block.last_accessed < lowest_timestamp:
-                lowest_timestamp = block.last_accessed
+        # Get evicted block
+        evicted_block: PhysicalTokenBlock = next(iter(free_blocks))
 
-        # Find all blocks with the lowest timestamp
-        least_recent: List[PhysicalTokenBlock] = []
         for block in free_blocks:
-            if block.last_accessed == lowest_timestamp:
-                least_recent.append(block)
-
-        # Find highest prefix count per block
-        highest_num_hashed_tokens = 0
-        for block in least_recent:
-            if block.num_hashed_tokens > highest_num_hashed_tokens:
-                highest_num_hashed_tokens = block.num_hashed_tokens
-
-        evicted_block: Optional[PhysicalTokenBlock] = None
-
-        # Find the first block with the lowest timestamp
-        for block in least_recent:
-            if block.num_hashed_tokens == highest_num_hashed_tokens:
+            if (block.last_accessed < evicted_block.last_accessed
+                    or block.last_accessed == evicted_block.last_accessed and
+                    block.num_hashed_tokens > evicted_block.num_hashed_tokens):
                 evicted_block = block
-                break
-
-        assert evicted_block is not None
 
         del self.free_table[evicted_block.block_hash]
 
@@ -119,43 +99,8 @@ def num_blocks(self) -> int:
         return len(self.free_table)
 
 
-class RandomEvictor(Evictor):
-    """Evicts in a first-in-first-out order"""
-
-    def __init__(self):
-        self.free_table: Dict[int, PhysicalTokenBlock] = {}
-
-    def __contains__(self, block_hash: int) -> bool:
-        return block_hash in self.free_table
-
-    def evict(self) -> PhysicalTokenBlock:
-        if len(self.free_table) == 0:
-            raise ValueError("No usable cache memory left")
-        evicted_block = next(iter(self.free_table.values()))
-        evicted_block.computed = False
-        del self.free_table[evicted_block.block_hash]
-        return evicted_block
-
-    def add(self, block: PhysicalTokenBlock):
-        self.free_table[block.block_hash] = block
-
-    def remove(self, block_hash: int) -> PhysicalTokenBlock:
-        if block_hash not in self.free_table:
-            raise ValueError(
-                "Attempting to remove block that's not in the evictor")
-        block: PhysicalTokenBlock = self.free_table[block_hash]
-        del self.free_table[block_hash]
-        return block
-
-    @property
-    def num_blocks(self) -> int:
-        return len(self.free_table)
-
-
 def make_evictor(eviction_policy: EvictionPolicy) -> Evictor:
     if eviction_policy == EvictionPolicy.LRU:
         return LRUEvictor()
-    elif eviction_policy == EvictionPolicy.FIFO:
-        return RandomEvictor()
     else:
         raise ValueError(f"Unknown cache eviction policy: {eviction_policy}")

From 4ad521d8b51145a55c1be6b8e451f76423cc2d87 Mon Sep 17 00:00:00 2001
From: Nick Hill <nickhill@us.ibm.com>
Date: Wed, 20 Mar 2024 00:36:09 -0700
Subject: [PATCH 145/196] [Core] Add generic typing to `LRUCache` (#3511)

---
 vllm/lora/models.py                           |  6 +++---
 .../tokenizer_group/base_tokenizer_group.py   | 19 ++++++++++++------
 .../tokenizer_group/tokenizer_group.py        |  6 ++----
 vllm/utils.py                                 | 20 ++++++++++---------
 4 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/vllm/lora/models.py b/vllm/lora/models.py
index 238da256b7cdc..6fe07b69b3203 100644
--- a/vllm/lora/models.py
+++ b/vllm/lora/models.py
@@ -4,7 +4,7 @@
 import math
 import os
 import re
-from typing import (Any, Callable, Dict, Hashable, List, Optional, Tuple, Type)
+from typing import (Callable, Dict, Hashable, List, Optional, Tuple, Type)
 
 import safetensors.torch
 import torch
@@ -535,14 +535,14 @@ def _create_merged_loras_inplace(self, lora_model: LoRAModel) -> None:
                 replacement_loras)
 
 
-class LoRALRUCache(LRUCache):
+class LoRALRUCache(LRUCache[LoRAModel]):
 
     def __init__(self, capacity: int, deactivate_lora_fn: Callable[[Hashable],
                                                                    None]):
         super().__init__(capacity)
         self.deactivate_lora_fn = deactivate_lora_fn
 
-    def _on_remove(self, key: Hashable, value: Any):
+    def _on_remove(self, key: Hashable, value: LoRAModel):
         logger.debug(f"Removing LoRA. int id: {key}")
         self.deactivate_lora_fn(key)
         return super()._on_remove(key, value)
diff --git a/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py
index 99518a606fabe..3cce96e06d1a0 100644
--- a/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py
+++ b/vllm/transformers_utils/tokenizer_group/base_tokenizer_group.py
@@ -22,27 +22,34 @@ def get_max_input_len(self,
         pass
 
     @abstractmethod
-    def encode(self, prompt: str, request_id: Optional[str],
-               lora_request: Optional[LoRARequest]) -> List[int]:
+    def encode(self,
+               prompt: str,
+               request_id: Optional[str] = None,
+               lora_request: Optional[LoRARequest] = None) -> List[int]:
         """Encode a prompt using the tokenizer group."""
         pass
 
     @abstractmethod
-    async def encode_async(self, prompt: str, request_id: Optional[str],
-                           lora_request: Optional[LoRARequest]) -> List[int]:
+    async def encode_async(
+            self,
+            prompt: str,
+            request_id: Optional[str] = None,
+            lora_request: Optional[LoRARequest] = None) -> List[int]:
         """Encode a prompt using the tokenizer group."""
         pass
 
     @abstractmethod
     def get_lora_tokenizer(
             self,
-            lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer":
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
         """Get a tokenizer for a LoRA request."""
         pass
 
     @abstractmethod
     async def get_lora_tokenizer_async(
             self,
-            lora_request: Optional[LoRARequest]) -> "PreTrainedTokenizer":
+            lora_request: Optional[LoRARequest] = None
+    ) -> "PreTrainedTokenizer":
         """Get a tokenizer for a LoRA request."""
         pass
diff --git a/vllm/transformers_utils/tokenizer_group/tokenizer_group.py b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
index 3af1334cb5ede..ec20d0fb713a4 100644
--- a/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
+++ b/vllm/transformers_utils/tokenizer_group/tokenizer_group.py
@@ -21,10 +21,8 @@ def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int,
         self.enable_lora = enable_lora
         self.max_input_length = max_input_length
         self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config)
-        if enable_lora:
-            self.lora_tokenizers = LRUCache(capacity=max_num_seqs)
-        else:
-            self.lora_tokenizers = None
+        self.lora_tokenizers = LRUCache[PreTrainedTokenizer](
+            capacity=max_num_seqs) if enable_lora else None
 
     def ping(self) -> bool:
         """Check if the tokenizer group is alive."""
diff --git a/vllm/utils.py b/vllm/utils.py
index 7c73062e809f3..8fa372b5f7f09 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -5,7 +5,7 @@
 import uuid
 import gc
 from platform import uname
-from typing import List, Tuple, Union
+from typing import List, Tuple, Union, Generic
 from packaging.version import parse, Version
 
 import psutil
@@ -53,10 +53,10 @@ def reset(self) -> None:
         self.counter = 0
 
 
-class LRUCache:
+class LRUCache(Generic[T]):
 
     def __init__(self, capacity: int):
-        self.cache = OrderedDict()
+        self.cache = OrderedDict[Hashable, T]()
         self.capacity = capacity
 
     def __contains__(self, key: Hashable) -> bool:
@@ -65,10 +65,10 @@ def __contains__(self, key: Hashable) -> bool:
     def __len__(self) -> int:
         return len(self.cache)
 
-    def __getitem__(self, key: Hashable) -> Any:
+    def __getitem__(self, key: Hashable) -> T:
         return self.get(key)
 
-    def __setitem__(self, key: Hashable, value: Any) -> None:
+    def __setitem__(self, key: Hashable, value: T) -> None:
         self.put(key, value)
 
     def __delitem__(self, key: Hashable) -> None:
@@ -77,7 +77,9 @@ def __delitem__(self, key: Hashable) -> None:
     def touch(self, key: Hashable) -> None:
         self.cache.move_to_end(key)
 
-    def get(self, key: Hashable, default_value: Optional[Any] = None) -> int:
+    def get(self,
+            key: Hashable,
+            default_value: Optional[T] = None) -> Optional[T]:
         if key in self.cache:
             value = self.cache[key]
             self.cache.move_to_end(key)
@@ -85,12 +87,12 @@ def get(self, key: Hashable, default_value: Optional[Any] = None) -> int:
             value = default_value
         return value
 
-    def put(self, key: Hashable, value: Any) -> None:
+    def put(self, key: Hashable, value: T) -> None:
         self.cache[key] = value
         self.cache.move_to_end(key)
         self._remove_old_if_needed()
 
-    def _on_remove(self, key: Hashable, value: Any):
+    def _on_remove(self, key: Hashable, value: T):
         pass
 
     def remove_oldest(self):
@@ -103,7 +105,7 @@ def _remove_old_if_needed(self) -> None:
         while len(self.cache) > self.capacity:
             self.remove_oldest()
 
-    def pop(self, key: int, default_value: Optional[Any] = None) -> Any:
+    def pop(self, key: Hashable, default_value: Optional[Any] = None) -> T:
         run_on_remove = key in self.cache
         value = self.cache.pop(key, default_value)
         if run_on_remove:

From 5ee14494e4c78769fa10af8b58c3e7808053da0d Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Wed, 20 Mar 2024 00:38:53 -0700
Subject: [PATCH 146/196] [Misc] Remove cache stream and cache events (#3461)

---
 tests/worker/test_swap.py   | 77 +++++++++++++++++++++++++++++++++++++
 vllm/worker/cache_engine.py | 26 ++++---------
 vllm/worker/worker.py       | 15 +-------
 3 files changed, 86 insertions(+), 32 deletions(-)
 create mode 100644 tests/worker/test_swap.py

diff --git a/tests/worker/test_swap.py b/tests/worker/test_swap.py
new file mode 100644
index 0000000000000..35630a06a900f
--- /dev/null
+++ b/tests/worker/test_swap.py
@@ -0,0 +1,77 @@
+import torch
+
+from vllm.engine.arg_utils import EngineArgs
+from vllm.worker.worker import Worker
+from vllm.utils import get_distributed_init_method, get_ip, get_open_port
+
+
+def test_swap() -> None:
+    # Configure the engine.
+    engine_args = EngineArgs(model="facebook/opt-125m",
+                             dtype="half",
+                             load_format="dummy")
+    (model_config, cache_config, parallel_config, scheduler_config,
+     device_config, _) = engine_args.create_engine_configs()
+    cache_config.num_gpu_blocks = 100
+    cache_config.num_cpu_blocks = 100
+
+    # Create the worker.
+    distributed_init_method = get_distributed_init_method(
+        get_ip(), get_open_port())
+    worker = Worker(
+        model_config=model_config,
+        parallel_config=parallel_config,
+        scheduler_config=scheduler_config,
+        device_config=device_config,
+        local_rank=0,
+        rank=0,
+        distributed_init_method=distributed_init_method,
+        is_driver_worker=True,
+    )
+
+    # Initialize the worker.
+    worker.init_model()
+    worker.load_model()
+    worker.init_cache_engine(cache_config)
+    worker.warm_up_model()
+
+    # Randomly initialize the cache.
+    gpu_cache = worker.cache_engine.gpu_cache
+    cpu_cache = worker.cache_engine.cpu_cache
+    num_layers = len(gpu_cache)
+    for i in range(num_layers):
+        gpu_key_cache, gpu_value_cache = gpu_cache[i]
+        gpu_key_cache.random_()
+        gpu_value_cache.random_()
+        cpu_key_cache, cpu_value_cache = cpu_cache[i]
+        cpu_key_cache.random_()
+        cpu_value_cache.random_()
+
+    allclose = lambda a, b: torch.allclose(
+        a.cuda(), b.cuda(), rtol=0.0, atol=0.0)
+
+    # Test swap out.
+    blocks_to_swap_out = {3: 72, 56: 35, 84: 34}
+    worker.execute_model(seq_group_metadata_list=[],
+                         blocks_to_swap_in={},
+                         blocks_to_swap_out=blocks_to_swap_out,
+                         blocks_to_copy={})
+    for i in range(num_layers):
+        gpu_key_cache, gpu_value_cache = gpu_cache[i]
+        cpu_key_cache, cpu_value_cache = cpu_cache[i]
+        for src, dst in blocks_to_swap_out.items():
+            assert allclose(gpu_key_cache[src], cpu_key_cache[dst])
+            assert allclose(gpu_value_cache[src], cpu_value_cache[dst])
+
+    # Test swap in.
+    blocks_to_swap_in = {19: 45, 67: 23, 12: 78, 40: 99, 1: 71}
+    worker.execute_model(seq_group_metadata_list=[],
+                         blocks_to_swap_in=blocks_to_swap_in,
+                         blocks_to_swap_out={},
+                         blocks_to_copy={})
+    for i in range(num_layers):
+        gpu_key_cache, gpu_value_cache = gpu_cache[i]
+        cpu_key_cache, cpu_value_cache = cpu_cache[i]
+        for src, dst in blocks_to_swap_in.items():
+            assert allclose(gpu_key_cache[dst], cpu_key_cache[src])
+            assert allclose(gpu_value_cache[dst], cpu_value_cache[src])
diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
index 880299783935c..1782fe7e57177 100644
--- a/vllm/worker/cache_engine.py
+++ b/vllm/worker/cache_engine.py
@@ -38,7 +38,7 @@ def __init__(
         self.num_gpu_blocks = cache_config.num_gpu_blocks
         self.num_cpu_blocks = cache_config.num_cpu_blocks
 
-        # Skip initializing CUDA stream and buffer for Neuron backend.
+        # Skip initializing KV cache for Neuron backend.
         if is_neuron():
             return
 
@@ -51,12 +51,6 @@ def __init__(
         self.gpu_cache = self.allocate_gpu_cache()
         self.cpu_cache = self.allocate_cpu_cache()
 
-        # Initialize the stream for caching operations.
-        self.cache_stream = torch.cuda.Stream()
-        assert self.cache_stream != torch.cuda.current_stream()
-        # Initialize the events for stream synchronization.
-        self.events = [torch.cuda.Event() for _ in range(self.num_layers)]
-
     def get_key_block_shape(self) -> Tuple[int, int, int, int]:
         element_size = torch.tensor([], dtype=self.dtype).element_size()
         x = 16 // element_size
@@ -126,17 +120,13 @@ def _swap(
     ) -> None:
         from vllm._C import cache_ops
 
-        with torch.cuda.stream(self.cache_stream):
-            for i in range(self.num_layers):
-                src_key_cache, src_value_cache = src[i]
-                dst_key_cache, dst_value_cache = dst[i]
-                # Copy the key blocks.
-                cache_ops.swap_blocks(src_key_cache, dst_key_cache, src_to_dst)
-                # Copy the value blocks.
-                cache_ops.swap_blocks(src_value_cache, dst_value_cache,
-                                      src_to_dst)
-                event = self.events[i]
-                event.record(stream=self.cache_stream)
+        for i in range(self.num_layers):
+            src_key_cache, src_value_cache = src[i]
+            dst_key_cache, dst_value_cache = dst[i]
+            # Copy the key blocks.
+            cache_ops.swap_blocks(src_key_cache, dst_key_cache, src_to_dst)
+            # Copy the value blocks.
+            cache_ops.swap_blocks(src_value_cache, dst_value_cache, src_to_dst)
 
     def swap_in(self, src_to_dst: Dict[int, int]) -> None:
         self._swap(self.cpu_cache, self.gpu_cache, src_to_dst)
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
index 0dcd4018afa5f..81beb5ce4d8d4 100644
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -65,7 +65,6 @@ def __init__(
         # self.init_cache_engine().
         self.cache_config = None
         self.cache_engine = None
-        self.cache_events = None
         self.gpu_cache = None
 
     def init_model(self, cupy_port: Optional[int] = None) -> None:
@@ -148,7 +147,6 @@ def init_cache_engine(self, cache_config: CacheConfig) -> None:
         self.cache_config = cache_config
         self.cache_engine = CacheEngine(self.cache_config, self.model_config,
                                         self.parallel_config)
-        self.cache_events = self.cache_engine.events
         self.gpu_cache = self.cache_engine.gpu_cache
         self.model_runner.set_block_size(self.cache_engine.block_size)
 
@@ -166,24 +164,13 @@ def cache_swap(
         blocks_to_copy: Dict[int, List[int]],
     ) -> None:
         # Issue cache operations.
-        issued_cache_op = False
+        # TODO(woosuk): Profile swapping overhead and optimize if needed.
         if blocks_to_swap_in:
             self.cache_engine.swap_in(blocks_to_swap_in)
-            issued_cache_op = True
         if blocks_to_swap_out:
             self.cache_engine.swap_out(blocks_to_swap_out)
-            issued_cache_op = True
         if blocks_to_copy:
             self.cache_engine.copy(blocks_to_copy)
-            issued_cache_op = True
-
-        cache_events = self.cache_events if issued_cache_op else None
-
-        # Wait for cache operations to finish.
-        # TODO(woosuk): Profile swapping overhead and optimize if needed.
-        if cache_events is not None:
-            for event in cache_events:
-                event.wait()
 
     @torch.inference_mode()
     def execute_model(

From 84eaa68425807a490f363d2e5ddf9bee3d362b0d Mon Sep 17 00:00:00 2001
From: "Allen.Dou" <allen.dou@hotmail.com>
Date: Thu, 21 Mar 2024 00:28:29 +0800
Subject: [PATCH 147/196] Abort when nvcc command is not found in the PATH
 (#3527)

---
 CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 29a531d44a9d5..150fcebeb8878 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,6 +49,12 @@ endif()
 #
 append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
 
+# Ensure the 'nvcc' command is in the PATH
+find_program(NVCC_EXECUTABLE nvcc)
+if (NOT NVCC_EXECUTABLE)
+    message(FATAL_ERROR "nvcc not found")
+endif()
+
 #
 # Import torch cmake configuration.
 # Torch also imports CUDA (and partially HIP) languages with some customizations,

From ba8ae1d84f66dd804a97182350fee6ffcadf0faf Mon Sep 17 00:00:00 2001
From: bnellnm <49004751+bnellnm@users.noreply.github.com>
Date: Wed, 20 Mar 2024 13:06:56 -0400
Subject: [PATCH 148/196] Check for _is_cuda() in compute_num_jobs (#3481)

---
 setup.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/setup.py b/setup.py
index 88787334be21a..67575a0e04bf0 100644
--- a/setup.py
+++ b/setup.py
@@ -61,12 +61,12 @@ def compute_num_jobs(self):
         except AttributeError:
             num_jobs = os.cpu_count()
 
-        nvcc_cuda_version = get_nvcc_cuda_version()
-        if nvcc_cuda_version >= Version("11.2"):
-            nvcc_threads = int(os.getenv("NVCC_THREADS", 8))
-            num_jobs = max(1, round(num_jobs / (nvcc_threads / 4)))
-        else:
-            nvcc_threads = None
+        nvcc_threads = None
+        if _is_cuda():
+            nvcc_cuda_version = get_nvcc_cuda_version()
+            if nvcc_cuda_version >= Version("11.2"):
+                nvcc_threads = int(os.getenv("NVCC_THREADS", 8))
+                num_jobs = max(1, round(num_jobs / (nvcc_threads / 4)))
 
         return num_jobs, nvcc_threads
 

From 80e254834de9c3c34eaca02d8880e952b3daf344 Mon Sep 17 00:00:00 2001
From: James Whedbee <jamesw@telnyx.com>
Date: Wed, 20 Mar 2024 16:05:03 -0500
Subject: [PATCH 149/196] [Bugfix] Fix ROCm support in CMakeLists.txt (#3534)

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 150fcebeb8878..66842e6845edd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,7 +51,7 @@ append_cmake_prefix_path("torch" "torch.utils.cmake_prefix_path")
 
 # Ensure the 'nvcc' command is in the PATH
 find_program(NVCC_EXECUTABLE nvcc)
-if (NOT NVCC_EXECUTABLE)
+if (CUDA_FOUND AND NOT NVCC_EXECUTABLE)
     message(FATAL_ERROR "nvcc not found")
 endif()
 

From 426ec4ec6711b4180538cd56b9f6b856e5276a1f Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Wed, 20 Mar 2024 14:45:08 -0700
Subject: [PATCH 150/196] [1/n] Triton sampling kernel (#3186)

Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
---
 tests/kernels/test_rand.py                 |  51 +++
 tests/kernels/test_sampler.py              | 196 ++++++++++
 tests/samplers/test_sampler.py             |   6 +-
 vllm/model_executor/layers/ops/__init__.py |   0
 vllm/model_executor/layers/ops/rand.py     | 157 ++++++++
 vllm/model_executor/layers/ops/sample.py   | 405 +++++++++++++++++++++
 vllm/model_executor/layers/sampler.py      | 109 +++++-
 vllm/model_executor/sampling_metadata.py   | 129 ++++++-
 vllm/sequence.py                           |   3 +
 vllm/worker/model_runner.py                |  40 +-
 10 files changed, 1072 insertions(+), 24 deletions(-)
 create mode 100644 tests/kernels/test_rand.py
 create mode 100644 tests/kernels/test_sampler.py
 create mode 100644 vllm/model_executor/layers/ops/__init__.py
 create mode 100644 vllm/model_executor/layers/ops/rand.py
 create mode 100644 vllm/model_executor/layers/ops/sample.py

diff --git a/tests/kernels/test_rand.py b/tests/kernels/test_rand.py
new file mode 100644
index 0000000000000..3b9d0d732acf5
--- /dev/null
+++ b/tests/kernels/test_rand.py
@@ -0,0 +1,51 @@
+import torch
+import pytest
+import random
+
+from vllm.model_executor.layers.ops.rand import seeded_uniform
+from vllm.model_executor.utils import set_random_seed
+
+
+@pytest.mark.parametrize("dtype",
+                         [torch.float32, torch.float16, torch.bfloat16])
+@pytest.mark.parametrize("use_3d", [True, False])
+def test_seeded_uniform(dtype: torch.dtype, use_3d: bool):
+    device = "cuda"
+    for seed in range(512):
+        set_random_seed(seed)
+        rows = random.randint(1, 512)
+        cols = random.randint(1, 64000)
+        if use_3d:
+            third_dim = random.randint(2, 10)
+            dims = [rows, third_dim, cols]
+        else:
+            dims = [rows, cols]
+        seeds = torch.randint(torch.iinfo(torch.long).min,
+                              torch.iinfo(torch.long).max, (rows, ),
+                              device=device)
+
+        # Test that the same seed produces the same output
+        out = seeded_uniform(*dims, seeds=seeds, dtype=dtype, device=device)
+        out2 = seeded_uniform(*dims, seeds=seeds, dtype=dtype, device=device)
+        torch.testing.assert_close(out, out2)
+        # del to save memory
+        del out2
+
+        out3 = seeded_uniform(*dims, seeds=seeds, dtype=dtype, device=device)
+        torch.testing.assert_close(out, out3)
+        # del to save memory
+        del out3
+
+        # Initialize out tensor with garbage to ensure that it is overwritten
+        out_with_tensor = seeded_uniform(
+            *dims,
+            out=torch.full(
+                (*dims, ),
+                -1,
+                dtype=dtype,
+                device=device,
+            ),
+            seeds=seeds,
+            dtype=dtype,
+        )
+        torch.testing.assert_close(out, out_with_tensor)
diff --git a/tests/kernels/test_sampler.py b/tests/kernels/test_sampler.py
new file mode 100644
index 0000000000000..5f8c51fb074f4
--- /dev/null
+++ b/tests/kernels/test_sampler.py
@@ -0,0 +1,196 @@
+import gc
+
+import torch
+import pytest
+import triton
+import triton.language as tl
+
+from vllm.model_executor.layers.ops.sample import (
+    _uniform_to_exponential, sample, get_num_triton_sampler_splits,
+    MAX_TRITON_N_COLS)
+from vllm.model_executor.utils import set_random_seed
+from vllm.model_executor.sampling_metadata import SamplingTensors
+
+SINGLE_SPLIT_VOCAB_SIZE = 32000  # llama/mistral/mixtral vocab size
+MULTI_SPLIT_VOCAB_SIZE = MAX_TRITON_N_COLS + 100
+
+
+@pytest.fixture(autouse=True)
+def _cleanup():
+    yield
+    gc.collect()
+    torch.cuda.empty_cache()
+
+
+@triton.jit
+def _uniform_to_exponential_kernel(input, output, n: tl.constexpr):
+    idx = tl.arange(0, n)
+    x = tl.load(input + idx)
+    y = _uniform_to_exponential(x)
+    tl.store(output + idx, y)
+
+
+def test_uniform_to_exponential():
+    """Test that we can convert uniform to exponential without div by 0."""
+    input = torch.tensor([0.0, 1.0 - torch.finfo(torch.float32).eps],
+                         dtype=torch.float32,
+                         device="cuda")
+    output = torch.zeros(input.shape, dtype=torch.float32, device="cuda")
+    _uniform_to_exponential_kernel[(1, )](input, output, 2)
+    assert torch.all(torch.isfinite(output))
+    assert torch.all(output > 0)
+    assert torch.all(torch.isfinite(torch.full_like(output, 1.0) / output))
+
+
+@pytest.mark.parametrize("random_sampling", [True, False, "mixed"])
+@pytest.mark.parametrize("max_best_of", [1, 2, 3, 4, 5])
+@pytest.mark.parametrize("modify_greedy_probs", [True, False])
+@pytest.mark.parametrize("seed", [1337])
+@pytest.mark.parametrize("vocab_size",
+                         [SINGLE_SPLIT_VOCAB_SIZE, MULTI_SPLIT_VOCAB_SIZE])
+@pytest.mark.parametrize("save_logprobs", [True, False])
+def test_sample_decoding_only(random_sampling, max_best_of,
+                              modify_greedy_probs, seed, vocab_size,
+                              save_logprobs):
+    set_random_seed(seed)
+    bs = 8
+    probs = torch.zeros((bs, vocab_size), dtype=torch.float32, device="cuda")
+    for i in range(bs):
+        probs[i, i * (vocab_size // bs)] = 1.0
+    logprobs = torch.rand_like(probs)
+    sample_indices = torch.arange(bs, dtype=torch.long, device="cuda")
+    n_splits = get_num_triton_sampler_splits(probs.shape[1])
+    if random_sampling == "mixed":
+        random_sampling_mask = (torch.rand(
+            (1, bs), device="cuda") < 0.5).expand(n_splits, bs)
+    elif random_sampling:
+        random_sampling_mask = torch.ones((n_splits, bs),
+                                          dtype=torch.bool,
+                                          device="cuda")
+    else:
+        random_sampling_mask = torch.zeros((n_splits, bs),
+                                           dtype=torch.bool,
+                                           device="cuda")
+
+    seeds = torch.randint(1,
+                          torch.iinfo(torch.long).max, (n_splits, bs),
+                          device="cuda").mul_(random_sampling_mask)
+    sampled_tokens, sampled_logprobs, sampled_modified_probs = sample(
+        probs=probs,
+        logprobs=logprobs,
+        sample_indices=sample_indices,
+        seeds=seeds,
+        max_best_of=max_best_of,
+        modify_greedy_probs=modify_greedy_probs,
+        save_logprobs=save_logprobs,
+        _save_modified_probs=True)
+    assert sampled_tokens.shape == (bs, max_best_of)
+    for i in range(bs):
+        assert torch.all(sampled_tokens[i] == i * (vocab_size // bs))
+        request_uses_random_sampling = random_sampling_mask[0, i]
+        if modify_greedy_probs and not request_uses_random_sampling:
+            # If we are modifying greedy probs and the request is greedy,
+            # we want to make sure the probs tensor is modified in place
+            assert torch.allclose(
+                probs[i][sampled_tokens[i]],
+                torch.full_like(probs[i][sampled_tokens[i]], 1.0))
+            assert torch.sum(probs[i]) == 1.0
+            assert torch.allclose(
+                sampled_modified_probs[i][0],
+                torch.full_like(sampled_modified_probs[i][0], 1.0))
+        elif request_uses_random_sampling:
+            # If the request is random, we want to make sure
+            # sampled_modified_probs tensor has noise added
+            # (and thus is different from probs tensor)
+            assert not torch.allclose(sampled_modified_probs[i][0],
+                                      probs[i][sampled_tokens[i]])
+        elif not request_uses_random_sampling:
+            # If the request is greedy and we are not modifying greedy probs,
+            # we want to make sure sampled_modified_probs tensor is the same as
+            # the probs tensor.
+            assert torch.allclose(sampled_modified_probs[i][0],
+                                  probs[i][sampled_tokens[i]])
+
+    if save_logprobs:
+        assert sampled_logprobs.shape == (bs, max_best_of)
+        for i in range(bs):
+            for best_of in range(max_best_of):
+                assert torch.all(sampled_logprobs[i] == logprobs[i][
+                    sampled_tokens[i, best_of]])
+    else:
+        assert sampled_logprobs is None
+
+
+@pytest.mark.parametrize("random_sampling", [True, False, "mixed"])
+@pytest.mark.parametrize("max_best_of", [1, 2, 3, 4, 5])
+@pytest.mark.parametrize("modify_greedy_probs", [True, False])
+@pytest.mark.parametrize("seed", [1337])
+@pytest.mark.parametrize("vocab_size",
+                         [SINGLE_SPLIT_VOCAB_SIZE, MULTI_SPLIT_VOCAB_SIZE])
+def test_sample_prompt_logprobs(random_sampling, max_best_of,
+                                modify_greedy_probs, seed, vocab_size):
+    set_random_seed(seed)
+    prompt_sizes = [16, 32, 64, 128] * 2
+    samples = 8
+    bs = samples + sum(prompt_sizes)
+    probs = torch.zeros((bs, vocab_size), dtype=torch.float32, device="cuda")
+    for i in range(bs):
+        probs[i, i * (vocab_size // bs)] = 1.0
+    logprobs = torch.rand_like(probs)
+    sample_indices = torch.tensor(prompt_sizes,
+                                  dtype=torch.long,
+                                  device="cuda").cumsum_(0)
+    n_splits = get_num_triton_sampler_splits(probs.shape[1])
+    if random_sampling == "mixed":
+        random_sampling_mask = torch.rand(
+            (n_splits, samples), device="cuda") < 0.5
+    elif random_sampling:
+        random_sampling_mask = torch.ones((n_splits, samples),
+                                          dtype=torch.bool,
+                                          device="cuda")
+    else:
+        random_sampling_mask = torch.zeros((n_splits, samples),
+                                           dtype=torch.bool,
+                                           device="cuda")
+
+    seeds = torch.randint(1,
+                          torch.iinfo(torch.long).max, (n_splits, samples),
+                          device="cuda").mul_(random_sampling_mask)
+    sampled_tokens, sampled_logprobs, _ = sample(
+        probs=probs,
+        logprobs=logprobs,
+        sample_indices=sample_indices,
+        seeds=seeds,
+        max_best_of=max_best_of,
+        modify_greedy_probs=modify_greedy_probs,
+        save_logprobs=True)
+    assert sampled_tokens.shape == (samples, max_best_of)
+    assert sampled_logprobs.shape == (samples, max_best_of)
+    for i, t in enumerate(sample_indices):
+        assert torch.all(sampled_tokens[i] == t * (vocab_size // bs))
+        for best_of in range(max_best_of):
+            assert torch.all(sampled_logprobs[i] == logprobs[sample_indices[i]]
+                             [sampled_tokens[i, best_of]])
+
+
+@pytest.mark.parametrize("seed", list(range(16)))
+def test_get_sequence_seeds(seed):
+    """Ensure that we get a different child seed from base 
+    seed + extra entropy"""
+    starting_seed = seed
+    seq_seed = None
+    extra_entropy = 1
+    for i in range(512):
+        new_seq_seed = SamplingTensors._get_sequence_seeds(starting_seed,
+                                                           i,
+                                                           seeds_to_generate=1,
+                                                           is_greedy=False)[0]
+        new_seq_seed_extra_entropy = SamplingTensors._get_sequence_seeds(
+            starting_seed,
+            i,
+            extra_entropy,
+            seeds_to_generate=1,
+            is_greedy=False)[0]
+        assert new_seq_seed_extra_entropy != new_seq_seed
+        assert seq_seed != new_seq_seed
+        seq_seed = new_seq_seed
diff --git a/tests/samplers/test_sampler.py b/tests/samplers/test_sampler.py
index 1bc8703d1a8e0..b0c6e1c09eebc 100644
--- a/tests/samplers/test_sampler.py
+++ b/tests/samplers/test_sampler.py
@@ -302,11 +302,11 @@ def test_sampler_logits_processors(seed: int, device: str):
     batch_size = random.randint(1, 256)
     input_tensor, _, sampler, model_runner = _prepare_test(batch_size)
 
-    # This sample logits processor gives infinite score to the i-th token,
+    # This sample logits processor gives maximum score to the i-th token,
     # where i is the length of the input sequence.
     # We therefore expect the output token sequence to be [0, 1, 2, ...]
     def pick_ith(token_ids, logits):
-        logits[len(token_ids)] = float("inf")
+        logits[len(token_ids)] = torch.finfo(logits.dtype).max
         return logits
 
     seq_group_metadata_list = []
@@ -385,7 +385,7 @@ def test_sampler_top_k_top_p(seed: int, device: str):
 
     sample_probs = None
 
-    def mock_sample(probs, logprobs, sampling_metadata):
+    def mock_sample(probs, *args, **kwargs):
         nonlocal sample_probs
         sample_probs = probs
         return [[prob.topk(1, dim=-1).indices.tolist(), [0]] for prob in probs]
diff --git a/vllm/model_executor/layers/ops/__init__.py b/vllm/model_executor/layers/ops/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/vllm/model_executor/layers/ops/rand.py b/vllm/model_executor/layers/ops/rand.py
new file mode 100644
index 0000000000000..5b4b7a153351f
--- /dev/null
+++ b/vllm/model_executor/layers/ops/rand.py
@@ -0,0 +1,157 @@
+import torch
+import triton
+import triton.language as tl
+
+from typing import Optional, Union
+
+
+def seeded_uniform(
+    *size,
+    seeds: torch.Tensor,
+    out: Optional[torch.Tensor] = None,
+    dtype: Optional[torch.dtype] = None,
+    device: Optional[Union[torch.device, str]] = None,
+    pin_memory: Optional[bool] = False,
+) -> torch.Tensor:
+    """Similar to torch.rand, but allows for seeds to be set per row.
+
+    seeds must be a 1d tensor. The output tensor may be 1d, 2d, or 3d.
+    If it is 3d, the additional seeds needed will be derived automatically
+    in a deterministic fashion:
+    [
+        row 0: [columns_with_seed_0], [columns_with_seed0^1], ...
+    ]
+    """
+    n_dims = len(size)
+
+    if n_dims > 3:
+        raise ValueError("seeded_uniform only supports up to 3D tensors")
+
+    if out is None:
+        out = torch.empty(*size,
+                          dtype=dtype,
+                          device=device,
+                          pin_memory=pin_memory)
+    elif out.shape != size:
+        raise ValueError("shape of out and size must be the same")
+
+    if n_dims == 3:
+        n_rows, n_3d, n_cols = out.shape
+        stride_row = out.stride(0)
+        stride_3d = out.stride(1)
+    elif n_dims == 2:
+        n_rows, n_cols = out.shape
+        n_3d = 1
+        stride_row = out.stride(0)
+        stride_3d = 1
+    else:
+        n_cols = out.shape[0]
+        n_rows = 1
+        n_3d = 1
+        stride_row = 1
+        stride_3d = 1
+
+    if seeds.ndim != 1:
+        raise ValueError("seeds must be a 1D tensor")
+
+    if seeds.numel() != n_rows:
+        raise ValueError(
+            "seeds must have the same number of elements as out has rows")
+
+    # The philox PRNG Triton uses generates 4 random numbers at once.
+    # Therefore, the most efficient use of it is to divide the
+    # block size by 4, and then save the generated random numbers to
+    # each of the 4 slices of the tensor.
+    full_block_size = triton.next_power_of_2(n_cols)
+    philox_block_size = max(full_block_size // 4, 1)
+    n_slices = full_block_size // philox_block_size
+    num_warps = 4
+    # Manual tuning. This seems to give best performance on A100 for
+    # simple kernels like this.
+    if philox_block_size >= 8192:
+        num_warps = 32
+    elif philox_block_size >= 4096:
+        num_warps = 16
+    elif philox_block_size >= 2048:
+        num_warps = 8
+
+    _seeded_uniform_triton[(n_rows, n_3d)](
+        out,
+        seeds,
+        stride_row,
+        stride_3d,
+        seeds.stride(0),
+        n_rows,
+        n_3d,
+        n_cols,
+        n_slices=n_slices,
+        num_warps=num_warps,
+        block_size=philox_block_size,
+    )
+    return out
+
+
+@triton.jit
+def _seeded_uniform_triton(
+    out_ptr: torch.Tensor,
+    seed_ptr: torch.Tensor,
+    out_row_stride: int,
+    out_3d_stride: int,
+    seed_row_stride: int,
+    n_rows: int,
+    n_3d: int,
+    n_cols: int,
+    n_slices: tl.constexpr,
+    block_size: tl.constexpr,
+):
+    """
+    Generate a random float32 number in [0, 1) for each element in the output
+    tensor. The random numbers in a row generated using the seed for that row.
+
+    Args:
+        out_ptr: The output tensor.
+        seed_ptr: The per-row seeds to use for random number generation.
+        out_row_stride: The stride between rows of the output tensor.
+        out_3d_stride: The stride between 3D slices of the output tensor.
+        seed_row_stride: The stride between rows of the seed tensor.
+        n_rows: The number of rows in the output tensor.
+        n_3d: The size of second dimension of the output tensor,
+            if output tensor is 3D.
+        n_cols: The number of columns in the output tensor.
+        n_slices: The number of philox outputs to use.
+    """
+    tl.static_assert(n_slices > 0 and n_slices <= 4, "0 < n_slices <= 4")
+
+    # Get the row index.
+    row_idx = tl.program_id(axis=0)
+    three_d_idx = tl.program_id(axis=1)
+
+    philox_offsets = tl.arange(0, block_size)
+    # Get the seed for the current element.
+    seed = tl.load(seed_ptr + row_idx * seed_row_stride)
+    if three_d_idx > 0:
+        seed ^= three_d_idx
+    # Generate random numbers in [0, 1).
+    out1, out2, out3, out4 = tl.rand4x(seed, philox_offsets)
+
+    output_row_start_ptr = (out_ptr + row_idx * out_row_stride +
+                            three_d_idx * out_3d_stride)
+    out1_offsets = philox_offsets
+    tl.store(output_row_start_ptr + out1_offsets,
+             out1,
+             mask=out1_offsets < n_cols)
+    if n_slices > 1:
+        out2_offsets = tl.arange(block_size, block_size * 2)
+        tl.store(output_row_start_ptr + out2_offsets,
+                 out2,
+                 mask=out2_offsets < n_cols)
+    if n_slices > 2:
+        out3_offsets = tl.arange(block_size * 2, block_size * 3)
+        tl.store(output_row_start_ptr + out3_offsets,
+                 out3,
+                 mask=out3_offsets < n_cols)
+    if n_slices > 3:
+        out4_offsets = tl.arange(block_size * 3, block_size * 4)
+        tl.store(output_row_start_ptr + out4_offsets,
+                 out4,
+                 mask=out4_offsets < n_cols)
diff --git a/vllm/model_executor/layers/ops/sample.py b/vllm/model_executor/layers/ops/sample.py
new file mode 100644
index 0000000000000..0077317282204
--- /dev/null
+++ b/vllm/model_executor/layers/ops/sample.py
@@ -0,0 +1,405 @@
+import math
+from typing import Tuple, Optional
+
+import torch
+import triton
+import triton.language as tl
+
+from vllm.model_executor.layers.ops.rand import seeded_uniform
+
+_EPS = 1e-6
+
+# This is a hardcoded limit in Triton (max block size).
+MAX_TRITON_N_COLS = 131072
+
+
+def get_num_triton_sampler_splits(n_cols: int) -> int:
+    """Get the number of splits to use for Triton sampling.
+
+    Triton has a limit on the number of columns it can handle, so we need to
+    split the tensor and call the kernel multiple times if it's too large.
+    """
+    return math.ceil(n_cols / MAX_TRITON_N_COLS)
+
+
+def _multi_split_sample(
+    probs: torch.Tensor,
+    seeds: torch.Tensor,
+    n_splits: int,
+    sampled_tokens_size: Tuple[int, int],
+    sampled_logprobs_size: Tuple[int, int],
+    sample_indices: torch.Tensor,
+    *,
+    logprobs: Optional[torch.Tensor] = None,
+    modify_greedy_probs: bool = False,
+    save_logprobs: bool = False,
+):
+    """Sample tokens where vocab size is split into multiple parts
+    (too large for Triton otherwise)."""
+    assert seeds.ndim == 2 and seeds.shape[0] == n_splits
+    split_probs = probs.tensor_split(n_splits, 1)
+    split_logprobs = logprobs.tensor_split(n_splits, 1)
+    sampled_tokens_tmp = [
+        torch.empty(sampled_tokens_size, dtype=torch.long, device=probs.device)
+        for _ in range(n_splits)
+    ]
+    sampled_logprobs_tmp = [
+        torch.empty(sampled_logprobs_size,
+                    dtype=probs.dtype,
+                    device=probs.device) for _ in range(n_splits)
+    ]
+    # We are purposefuly using sampled_tokens_size as we need to always
+    # save modified probs in this case.
+    sampled_modified_probs_tmp = [
+        torch.empty(sampled_tokens_size,
+                    dtype=probs.dtype,
+                    device=probs.device) for _ in range(n_splits)
+    ]
+    for i in range(n_splits):
+        n_samples = sample_indices.shape[0]
+        n_cols = split_probs[i].shape[1]
+        n_best = sampled_tokens_tmp[i].shape[1]
+        uniform_noise = seeded_uniform(n_samples,
+                                       n_best,
+                                       n_cols,
+                                       seeds=seeds[i].flatten(),
+                                       device=split_probs[i].device,
+                                       dtype=split_probs[i].dtype)
+        # TODO(yard1): See if we can remove the contiguous() calls.
+        # Will need kernel support.
+        _sample(
+            split_probs[i].contiguous(),
+            split_logprobs[i].contiguous(),
+            sample_indices,
+            sampled_tokens_tmp[i],
+            sampled_logprobs_tmp[i],
+            sampled_modified_probs_tmp[i],
+            seeds[i],
+            uniform_noise,
+            modify_greedy_probs=False,
+            save_logprobs=save_logprobs,
+            save_modified_probs=True,
+        )
+        if i > 0:
+            # Add offset to sampled tokens
+            sampled_tokens_tmp[i].add_(i * split_probs[i - 1].shape[1])
+    sampled_tokens = torch.stack(sampled_tokens_tmp)
+    sampled_modified_probs = torch.stack(sampled_modified_probs_tmp)
+    # Reduce the results from the splits.
+    sampled_modified_probs, indices = torch.max(sampled_modified_probs,
+                                                dim=0,
+                                                keepdim=True)
+    sampled_tokens = sampled_tokens.gather(0, indices).squeeze(0)
+    if save_logprobs:
+        sampled_logprobs = torch.stack(sampled_logprobs_tmp)
+        sampled_logprobs = sampled_logprobs.gather(0, indices).squeeze(0)
+    else:
+        sampled_logprobs = None
+    sampled_modified_probs = sampled_modified_probs.squeeze(0)
+
+    if modify_greedy_probs:
+        # We need to modify the greedy probs for the sampled tokens.
+        # We can't do this in the kernel as we need to know the
+        # sampled tokens.
+        probs.fill_(0.0)
+        probs.scatter_(1, sampled_tokens, 1.0)
+
+    return (sampled_tokens, sampled_logprobs, sampled_modified_probs)
+
+
+def sample(
+    probs: torch.Tensor,
+    seeds: torch.Tensor,
+    *,
+    max_best_of: int = 1,
+    sample_indices: Optional[torch.Tensor] = None,
+    logprobs: Optional[torch.Tensor] = None,
+    modify_greedy_probs: bool = False,
+    save_logprobs: bool = False,
+    _save_modified_probs: bool = False,  # pylint: disable=invalid-name
+) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:
+    """Sample tokens from probs. with per-sequence seeds.
+
+    Can sample from a subset of sequences through sample_indices.
+
+    Args:
+        probs: Probabilities to sample from.
+            shape = [batch_size, vocab_size]
+        seeds: Per-sequence seed values.
+            shape = [n, math.ceil(vocab_size / MAX_TRITON_N_COLS)]
+        max_best_of: Number of samples to generate per sequence.
+            Sequence seed will be incremented by 1 each time.
+        sample_indices: Indices of sequences to sample from.
+            If not provided, will sample from all sequences.
+            shape = [n]
+        logprobs: Log-probabilities of the sampled tokens.
+            Only used for saving the logprobs if save_logprobs is True.
+            shape = [batch_size, vocab_size]
+        modify_greedy_probs: Whether to modify the greedy probabilities
+            for speculative sampling (sampled token = 1.0,
+            everything else = 0.0).
+        save_logprobs: Whether to save the log-probabilities of the
+            sampled tokens to a tensor.
+        _save_modified_probs: Whether to save the modified probabilities
+            (including gumbel noise) of the sampled tokens to a tensor.
+            DOES NOT include the modification done by modify_greedy_probs
+            (because we want to use the unmodified probs to pick the best
+            split in case of multi-split sampling).
+            This is exposed only for testing.
+
+    Returns:
+        sampled_tokens: shape = [n, max_best_of]
+        sampled_logprobs: shape = [n, max_best_of] if save_logprobs else None
+        sampled_modified_probs: shape = [n, max_best_of]
+            if save_modified_probs else None
+    """
+    if sample_indices is None:
+        sample_indices = torch.arange(0, probs.shape[0], device=probs.device)
+
+    sampled_tokens_size = (sample_indices.size(0), max_best_of)
+    if save_logprobs:
+        if logprobs is None:
+            raise ValueError(
+                "logprobs tensor must be provided if save_logprobs is True")
+        sampled_logprobs_size = sampled_tokens_size
+    else:
+        # Empty tensors to invoke the kernel
+        sampled_logprobs_size = (0, 0)
+        logprobs = probs
+
+    if _save_modified_probs:
+        sampled_modified_probs_size = sampled_tokens_size
+    else:
+        # Empty tensors to invoke the kernel
+        sampled_modified_probs_size = (0, 0)
+
+    # If the number of columns in probs is too large for Triton to handle,
+    # we split the tensor and sample from each split separately, and then
+    # do an argmax+gather to combine the results.
+    n_splits = get_num_triton_sampler_splits(probs.shape[1])
+    if n_splits > 1:
+        (sampled_tokens, sampled_logprobs,
+         sampled_modified_probs) = _multi_split_sample(
+             probs,
+             seeds,
+             n_splits,
+             sampled_tokens_size,
+             sampled_logprobs_size,
+             sample_indices,
+             logprobs=logprobs,
+             modify_greedy_probs=modify_greedy_probs,
+             save_logprobs=save_logprobs)
+    else:
+        sampled_tokens = torch.empty(sampled_tokens_size,
+                                     dtype=torch.long,
+                                     device=probs.device)
+        sampled_logprobs = torch.empty(sampled_logprobs_size,
+                                       dtype=probs.dtype,
+                                       device=probs.device)
+        sampled_modified_probs = torch.empty(sampled_modified_probs_size,
+                                             dtype=probs.dtype,
+                                             device=probs.device)
+        n_samples = sample_indices.shape[0]
+        n_cols = probs.shape[1]
+        uniform_noise = seeded_uniform(n_samples,
+                                       max_best_of,
+                                       n_cols,
+                                       seeds=seeds.flatten(),
+                                       device=probs.device,
+                                       dtype=probs.dtype)
+
+        _sample(
+            probs,
+            logprobs,
+            sample_indices,
+            sampled_tokens,
+            sampled_logprobs,
+            sampled_modified_probs,
+            seeds,
+            uniform_noise,
+            modify_greedy_probs=modify_greedy_probs,
+            save_logprobs=save_logprobs,
+            save_modified_probs=_save_modified_probs,
+        )
+    return (sampled_tokens, sampled_logprobs if save_logprobs else None,
+            sampled_modified_probs if _save_modified_probs else None)
+
+
+def _sample(probs: torch.Tensor,
+            logprobs: torch.Tensor,
+            sample_indices: torch.Tensor,
+            output_samples: torch.Tensor,
+            output_logprobs: torch.Tensor,
+            output_modified_probs: torch.Tensor,
+            seeds: torch.Tensor,
+            uniform_noise: torch.Tensor,
+            *,
+            modify_greedy_probs: bool = False,
+            save_logprobs: bool = True,
+            save_modified_probs: bool = False) -> torch.Tensor:
+    """Sample tokens from probs.
+
+    Args:
+        probs [batch_size, vocab_size]: probs to sample from.
+        logprobs [batch_size, vocab_size]: logprobs (used when
+            save_logprobsis True).
+        sample_indices [n]: Indices of the samples to use for each row of probs.
+        output_samples [n, n_best]: Output tensor to store samples in.
+        output_logprobs [n, n_best]: Output tensor to store logprobs in.
+        output_modified_probs [n, n_best]: Output tensor to store
+            probs of chosen tokens in (modified with noise).
+        seeds [n]: Seeds to use for sampling. If the seed is 0, we use
+            greedy sampling. Note this is ONLY used for determining
+            whether to use random sampling or not. The actual random
+            noise should be passed as uniform_noise.
+        uniform_noise [batch_size, n_best, vocab_size]: Uniform
+            noise to use for random sampling (will be converted
+            to exponential gumbel noise by the kernel).
+        modify_greedy_probs: If True, we modify the probs tensor in-place
+            to encode the sampling method used for each row. This is used
+            in speculative decoding. Only applies in greedy decoding.
+        save_logprobs: If True, we save the logprobs of the sampled tokens
+            in the output_logprobs tensor.
+        save_modified_probs: If True, we save the modified probs (with noise)
+            of the sampled tokens in the output_modified_probs tensor.
+            DOES NOT include the modification done by modify_greedy_probs
+            (because we want to use the unmodified probs to pick the best
+            split in case of multi-split sampling).
+    """
+    n_samples = sample_indices.shape[0]
+    n_cols = probs.shape[1]
+    n_best = output_samples.shape[1] if len(output_samples.shape) > 1 else 1
+
+    # The block size is the smallest power of two greater than the number of
+    # columns in probs
+    block_size = triton.next_power_of_2(n_cols)
+    num_warps = 4
+    # Manual tuning. This seems to give best performance on A100 for
+    # simple kernels like this.
+    if block_size >= 8192:
+        num_warps = 32
+    elif block_size >= 4096:
+        num_warps = 16
+    elif block_size >= 2048:
+        num_warps = 8
+
+    # Enqueue kernel. The 1D launch grid is simple: we have one kernel
+    # instance per row of the probs matrix
+    _sample_triton[(n_samples, n_best)](
+        sample_indices,
+        output_samples,
+        output_logprobs,
+        output_modified_probs,
+        probs,
+        logprobs,
+        seeds,
+        uniform_noise,
+        output_samples.stride(0),
+        probs.stride(0),
+        uniform_noise.stride(0),
+        uniform_noise.stride(1) if n_best > 1 else 1,
+        n_samples,
+        n_cols,
+        n_best,
+        num_warps=num_warps,
+        block_size=block_size,
+        modify_greedy_probs=modify_greedy_probs,
+        save_logprobs=save_logprobs,
+        save_modified_probs=save_modified_probs,
+    )
+    return output_samples, output_logprobs, output_modified_probs
+
+
+@triton.jit
+def _uniform_to_exponential(uniform_noise):
+    """Convert uniform samples to exponential samples."""
+    # tl.rand returns values in [0, 1), so we clamp lower bound
+    # to _EPS to avoid log(0) and thus division by 0 later
+    lb = tl.full(uniform_noise.shape, _EPS, uniform_noise.dtype)
+    uniform_noise = tl.maximum(uniform_noise, lb)
+    # Use the inversion method to turn uniform samples
+    # into exponential samples
+    exponential_noise = -tl.log(uniform_noise)
+    return exponential_noise
+
+
+@triton.jit
+def _sample_triton(
+        sample_indices_ptr: torch.Tensor, output_ptr: torch.Tensor,
+        output_logprobs_ptr: torch.Tensor,
+        output_modified_probs_ptr: torch.Tensor, probs_ptr: torch.Tensor,
+        logprobs_ptr: torch.Tensor, seeds_ptr: torch.Tensor,
+        uniform_noise_ptr: torch.Tensor, output_row_stride: int,
+        probs_row_stride: int, uniform_noise_row_stride: int,
+        uniform_noise_best_stride: int, n_samples: int, n_cols: int,
+        n_best: int, block_size: tl.constexpr,
+        modify_greedy_probs: tl.constexpr, save_logprobs: tl.constexpr,
+        save_modified_probs: tl.constexpr):
+    # The rows are independent, so we parallelize across those
+    sample_idx = tl.program_id(0)
+    best_idx = tl.program_id(1)
+
+    # Load the row index from DRAM
+    row_idx = tl.load(sample_indices_ptr + sample_idx)
+    seed = tl.load(seeds_ptr + sample_idx)
+    uses_random_sampling = seed != 0
+
+    # The stride represents how much we need to increase the
+    # pointer to advance 1 row
+    row_start_ptr = probs_ptr + row_idx * probs_row_stride
+
+    # The block size is the next power of two greater than n_cols,
+    # so we can fit each row in a single block
+    col_offsets = tl.arange(0, block_size)
+
+    # Load the row into SRAM, using a mask since block_size may be > than n_cols
+    row = tl.load(row_start_ptr + col_offsets,
+                  mask=col_offsets < n_cols,
+                  other=float("-inf"))
+
+    if uses_random_sampling:
+        uniform_noise_start_ptr = (uniform_noise_ptr +
+                                   sample_idx * uniform_noise_row_stride +
+                                   best_idx * uniform_noise_best_stride)
+        uniform_noise = tl.load(uniform_noise_start_ptr + col_offsets,
+                                mask=col_offsets < n_cols,
+                                other=0.5)
+        exponential_noise = _uniform_to_exponential(uniform_noise)
+        row /= exponential_noise
+
+    sampled_value, sampled_token = tl.max(row, axis=0, return_indices=True)
+    # clamp sampled token to n_cols - 1
+    # this should not be necessary, but we do it
+    # just in case
+    if sampled_token >= n_cols:
+        sampled_token = n_cols - 1
+    # Write back output to DRAM
+    output_row_start_ptr = (output_ptr + sample_idx * output_row_stride +
+                            best_idx)
+    tl.store(output_row_start_ptr, sampled_token)
+
+    if modify_greedy_probs:  # noqa
+        if not uses_random_sampling:
+            # Set the probability of the sampled token to 1, all other
+            # tokens to zero. This is used in speculative decoding where
+            # the sampling method must be encoded within the sampled
+            # probability distributions.
+            row = tl.where(col_offsets == sampled_token, 1.0, 0.0)
+            tl.store(row_start_ptr + col_offsets,
+                     row,
+                     mask=col_offsets < n_cols)
+
+    if save_modified_probs:
+        output_row_start_ptr = (output_modified_probs_ptr +
+                                sample_idx * output_row_stride + best_idx)
+        tl.store(output_row_start_ptr, sampled_value)
+
+    if save_logprobs:
+        # Load the row into SRAM, using a mask since block_size
+        # may be > than n_cols
+        sampled_logprob = tl.load(logprobs_ptr + row_idx * probs_row_stride +
+                                  sampled_token)
+        # Write back output to DRAM
+        output_row_start_ptr = (output_logprobs_ptr +
+                                sample_idx * output_row_stride + best_idx)
+        tl.store(output_row_start_ptr, sampled_logprob)
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index 4377b845df628..1fab1e734e1d7 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -12,6 +12,7 @@
 from vllm.sequence import (Logprob, PromptLogprobs, SampleLogprobs,
                            SamplerOutput, SequenceData, SequenceGroupOutput,
                            SequenceOutput)
+from vllm.model_executor.layers.ops.sample import (sample as sample_triton)
 from vllm.utils import is_neuron
 
 
@@ -114,7 +115,8 @@ def forward(
         logprobs = torch.log_softmax(logits, dim=-1, dtype=torch.float)
 
         # Sample the next tokens.
-        sample_results = _sample(probs, logprobs, sampling_metadata)
+        sample_results = _sample(probs, logprobs, sampling_metadata,
+                                 sampling_tensors)
         # Get the logprobs query results.
         prompt_logprobs, sample_logprobs = _get_logprobs(
             logprobs, sampling_metadata, sample_results)
@@ -375,7 +377,7 @@ def _multinomial(
     return probs.div_(q).argmax(dim=1).view(-1, num_samples)
 
 
-def _sample(
+def _sample_with_torch(
     probs: torch.Tensor,
     logprobs: torch.Tensor,
     sampling_metadata: SamplingMetadata,
@@ -394,7 +396,7 @@ def _sample(
     # Counterintiutively, having two loops here is actually faster.
     # The first loop can run without waiting on GPU<->CPU sync.
     for sampling_type in SamplingType:
-        sample_indices = categorized_sample_indices[sampling_type]
+        sample_indices = categorized_sample_indices[sampling_type][:, 0]
         num_tokens = len(sample_indices)
         if num_tokens == 0:
             continue
@@ -407,17 +409,19 @@ def _sample(
             greedy_samples = torch.argmax(logprobs[sample_indices.long()],
                                           dim=-1)
         elif sampling_type in (SamplingType.RANDOM, SamplingType.RANDOM_SEED):
-            max_best_of = 1
+            max_best_of_in_batch = 1
             for seq_group, is_prompt in zip(seq_groups, is_prompts):
                 if is_prompt:
                     _, sampling_params = seq_group
-                    max_best_of = max(max_best_of, sampling_params.best_of)
+                    max_best_of_in_batch = max(max_best_of_in_batch,
+                                               sampling_params.best_of)
             seeded_args = {} if sampling_type == SamplingType.RANDOM else {
                 "seq_groups": seq_groups,
                 "generators": sampling_metadata.generators,
             }
             multinomial_samples[sampling_type] = _multinomial(
-                probs[sample_indices.long()], max_best_of, **seeded_args)
+                probs[sample_indices.long()], max_best_of_in_batch,
+                **seeded_args)
         elif sampling_type == SamplingType.BEAM:
             beam_search_logprobs = logprobs[sample_indices]
         else:
@@ -448,6 +452,99 @@ def _sample(
     return sample_results
 
 
+def _sample_with_triton_kernel(
+    probs: torch.Tensor,
+    logprobs: torch.Tensor,
+    sampling_metadata: SamplingMetadata,
+    sampling_tensors: SamplingTensors,
+) -> List[Tuple[List[int], List[int]]]:
+    categorized_seq_group_ids = {t: [] for t in SamplingType}
+    categorized_sample_indices = sampling_metadata.categorized_sample_indices
+    for i, seq_group in enumerate(sampling_metadata.seq_groups):
+        _, sampling_params = seq_group
+        sampling_type = sampling_params.sampling_type
+        categorized_seq_group_ids[sampling_type].append(i)
+
+    sample_results_dict: Dict[int, Tuple[List[int], List[int]]] = {}
+    sample_metadata = {}
+    max_best_of_in_batch = 1
+
+    # Counterintiutively, having two loops here is actually faster.
+    # The first loop can run without waiting on GPU<->CPU sync.
+    for sampling_type in SamplingType:
+        sample_indices = categorized_sample_indices[sampling_type][:, 0]
+        sampled_token_indices = categorized_sample_indices[sampling_type][:, 1]
+        num_tokens = len(sample_indices)
+        if num_tokens == 0:
+            continue
+        seq_group_ids = categorized_seq_group_ids[sampling_type]
+        seq_groups = [sampling_metadata.seq_groups[i] for i in seq_group_ids]
+        is_prompts = [i < sampling_metadata.num_prompts for i in seq_group_ids]
+        sample_metadata[sampling_type] = (seq_group_ids, seq_groups,
+                                          is_prompts, sample_indices,
+                                          sampled_token_indices)
+        if sampling_type in (SamplingType.GREEDY, SamplingType.RANDOM,
+                             SamplingType.RANDOM_SEED):
+            for seq_group, is_prompt in zip(seq_groups, is_prompts):
+                if is_prompt:
+                    _, sampling_params = seq_group
+                    max_best_of_in_batch = max(max_best_of_in_batch,
+                                               sampling_params.best_of)
+        elif sampling_type == SamplingType.BEAM:
+            beam_search_logprobs = logprobs[sample_indices]
+        else:
+            raise ValueError(f"Unsupported sampling type: {sampling_type}")
+
+    sampled_tokens, _, _ = sample_triton(
+        probs=probs,
+        seeds=sampling_tensors.sampling_seeds,
+        max_best_of=max_best_of_in_batch,
+        sample_indices=sampling_tensors.sample_indices,
+        logprobs=logprobs,
+        # don't save logprobs because we have logic for that below
+        # TODO: use this instead of the CPU-based logic below
+        save_logprobs=False,
+    )
+
+    # GPU<->CPU sync happens in the loop below.
+
+    for sampling_type in SamplingType:
+        if sampling_type not in sample_metadata:
+            continue
+        (seq_group_ids, seq_groups, is_prompts, sample_indices,
+         sampled_token_indices) = sample_metadata[sampling_type]
+        if sampling_type == SamplingType.GREEDY:
+            sample_results = _greedy_sample(
+                seq_groups, sampled_tokens[sampled_token_indices][:, 0])
+        elif sampling_type in (SamplingType.RANDOM, SamplingType.RANDOM_SEED):
+            sample_results = _random_sample(
+                seq_groups, is_prompts, sampled_tokens[sampled_token_indices])
+        elif sampling_type == SamplingType.BEAM:
+            sample_results = _beam_search_sample(seq_groups, is_prompts,
+                                                 sampling_metadata.seq_data,
+                                                 beam_search_logprobs)
+        sample_results_dict.update(zip(seq_group_ids, sample_results))
+
+    sample_results = [
+        sample_results_dict[i]
+        for i in range(len(sampling_metadata.seq_groups))
+    ]
+    return sample_results
+
+
+def _sample(
+    probs: torch.Tensor,
+    logprobs: torch.Tensor,
+    sampling_metadata: SamplingMetadata,
+    sampling_tensors: SamplingTensors,
+) -> List[Tuple[List[int], List[int]]]:
+    return _sample_with_torch(probs, logprobs, sampling_metadata)
+
+    # TODO: Enable once Triton kernel & associated code is faster.
+    # return _sample_with_triton_kernel(probs, logprobs, sampling_metadata,
+    #                                   sampling_tensors)
+
+
 def _get_logprobs(
     logprobs: torch.Tensor,
     sampling_metadata: SamplingMetadata,
diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
index b23f0170a6ca5..7d08feb3fee1c 100644
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@@ -2,12 +2,16 @@
 from typing import Dict, List, Optional, Tuple
 
 import torch
+import random
 
 from vllm.sampling_params import SamplingParams, SamplingType
 from vllm.sequence import SequenceData
 from vllm.utils import in_wsl, is_neuron
+from vllm.model_executor.layers.ops.sample import (
+    get_num_triton_sampler_splits)
 
 _SAMPLING_EPS = 1e-5
+_SEED_0_REPLACEMENT = 3403598558
 
 
 class SamplingMetadata:
@@ -67,14 +71,28 @@ class SamplingTensors:
     presence_penalties: torch.Tensor
     frequency_penalties: torch.Tensor
     repetition_penalties: torch.Tensor
+    sampling_seeds: torch.Tensor
+    sample_indices: torch.Tensor
+    extra_seeds: Optional[torch.Tensor]
     prompt_tokens: torch.Tensor
     output_tokens: torch.Tensor
 
     @classmethod
     def from_sampling_metadata(
-            cls, sampling_metadata: "SamplingMetadata", vocab_size: int,
-            device: torch.device,
-            dtype: torch.dtype) -> Tuple["SamplingTensors", bool, bool, bool]:
+        cls,
+        sampling_metadata: "SamplingMetadata",
+        vocab_size: int,
+        device: torch.device,
+        dtype: torch.dtype,
+        *,
+        extra_seeds_to_generate: int = 0,
+        extra_entropy: Optional[Tuple[int, ...]] = None
+    ) -> Tuple["SamplingTensors", bool, bool, bool]:
+        """
+        extra_seeds_to_generate: extra seeds to generate using the
+            user-defined seed for each sequence.
+        extra_entropy: extra entropy to use when generating seeds.
+        """
         prompt_tokens: List[List[int]] = []
         output_tokens: List[List[int]] = []
         top_ks: List[int] = []
@@ -84,9 +102,18 @@ def from_sampling_metadata(
         presence_penalties: List[float] = []
         frequency_penalties: List[float] = []
         repetition_penalties: List[float] = []
+        sampling_seeds: List[int] = []
+        sample_indices: List[int] = []
+        prompt_best_of: List[int] = []
         do_penalties = False
         do_top_p_top_k = False
         do_min_p = False
+
+        # We need one base seed per Triton slice.
+        seeds_to_generate = (extra_seeds_to_generate +
+                             get_num_triton_sampler_splits(vocab_size))
+
+        sample_indices_start_idx = 0
         for i, seq_group in enumerate(sampling_metadata.seq_groups):
             seq_ids, sampling_params = seq_group
             temperature = sampling_params.temperature
@@ -95,6 +122,10 @@ def from_sampling_metadata(
             r = sampling_params.repetition_penalty
             top_p = sampling_params.top_p
             min_p = sampling_params.min_p
+            seed = sampling_params.seed
+
+            is_greedy = sampling_params.sampling_type == SamplingType.GREEDY
+
             # k should not be greater than the vocab size.
             top_k = min(sampling_params.top_k, vocab_size)
             top_k = vocab_size if top_k == -1 else top_k
@@ -112,6 +143,7 @@ def from_sampling_metadata(
                                      or abs(f) >= _SAMPLING_EPS
                                      or abs(r - 1.0) >= _SAMPLING_EPS):
                 do_penalties = True
+
             if (i < sampling_metadata.num_prompts
                     and sampling_params.prompt_logprobs is not None):
                 # For tokens in the prompt that we only need to get
@@ -138,10 +170,34 @@ def from_sampling_metadata(
             frequency_penalties += [f] * len(seq_ids)
             repetition_penalties += [r] * len(seq_ids)
 
+            is_prompt = i < sampling_metadata.num_prompts
+            if is_prompt:
+                prompt_best_of.append(sampling_params.best_of)
+                prompt_len = sampling_metadata.prompt_lens[i]
+
+                if sampling_params.prompt_logprobs is not None:
+                    # NOTE: the sampling position is the last token
+                    # in the prompt
+                    sample_indices_start_idx += prompt_len - 1
+            for seq_id in seq_ids:
+                seq_data = sampling_metadata.seq_data[seq_id]
+                extra_entropy = extra_entropy or ()
+                seq_seeds = cls._get_sequence_seeds(
+                    seed,
+                    seq_data.get_len(),
+                    *extra_entropy,
+                    seq_id,
+                    seeds_to_generate=seeds_to_generate,
+                    is_greedy=is_greedy)
+                sampling_seeds.append(seq_seeds)
+                sample_indices.append(sample_indices_start_idx)
+                sample_indices_start_idx += 1
+
         sampling_tensors = SamplingTensors.from_lists(
             temperatures, top_ps, top_ks, min_ps, presence_penalties,
-            frequency_penalties, repetition_penalties, prompt_tokens,
-            output_tokens, vocab_size, device, dtype)
+            frequency_penalties, repetition_penalties, sampling_seeds,
+            sample_indices, prompt_tokens, output_tokens, vocab_size,
+            extra_seeds_to_generate, device, dtype)
         return (sampling_tensors, do_penalties, do_top_p_top_k, do_min_p)
 
     @classmethod
@@ -150,9 +206,10 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
                    presence_penalties: List[float],
                    frequency_penalties: List[float],
                    repetition_penalties: List[float],
+                   sampling_seeds: List[int], sample_indices: List[int],
                    prompt_tokens: List[List[int]],
                    output_tokens: List[List[int]], vocab_size: int,
-                   device: torch.device,
+                   extra_seeds_to_generate: int, device: torch.device,
                    dtype: torch.dtype) -> "SamplingTensors":
         # Note that the performance will be very bad without
         # pinned memory.
@@ -210,6 +267,12 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
             dtype=torch.int,
             pin_memory=pin_memory,
         )
+        sample_indices_t = torch.tensor(
+            sample_indices,
+            device="cpu",
+            dtype=torch.long,
+            pin_memory=pin_memory,
+        )
         prompt_tensor = torch.tensor(
             prompt_padded_tokens,
             device="cpu",
@@ -222,8 +285,28 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
             dtype=torch.long,
             pin_memory=pin_memory,
         )
+        # need to transpose and make contiguous to
+        # copy the tensor correctly.
+        # [batch_size, n_seeds] -> [n_seeds, batch_size]
+        sampling_seeds_t = torch.tensor(
+            sampling_seeds,
+            device="cpu",
+            dtype=torch.long,
+            pin_memory=pin_memory,
+        ).T.contiguous()
+
         # Because the memory is pinned, we can do non-blocking
         # transfer to device.
+
+        # How many seeds the sample operation itself will need.
+        num_base_seeds = sampling_seeds_t.shape[0] - extra_seeds_to_generate
+        sampling_seeds_gpu = sampling_seeds_t.to(device=device,
+                                                 non_blocking=True)
+        extra_seeds_gpu = sampling_seeds_gpu[num_base_seeds:]
+        if not extra_seeds_gpu.numel():
+            extra_seeds_gpu = None
+        sampling_seeds_gpu = sampling_seeds_gpu[:num_base_seeds]
+
         return cls(
             temperatures=temperatures_t.to(device=device, non_blocking=True),
             top_ps=top_ps_t.to(device=device, non_blocking=True),
@@ -237,4 +320,38 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
                                                            non_blocking=True),
             prompt_tokens=prompt_tensor.to(device=device, non_blocking=True),
             output_tokens=output_tensor.to(device=device, non_blocking=True),
+            sampling_seeds=sampling_seeds_gpu,
+            sample_indices=sample_indices_t.to(device=device,
+                                               non_blocking=True),
+            extra_seeds=extra_seeds_gpu,
         )
+
+    @staticmethod
+    def _get_sequence_seeds(
+        seed: int,
+        *extra_entropy: int,
+        seeds_to_generate: int,
+        is_greedy: bool,
+    ):
+        """Get `seeds_to_generate` child seeds from `seed` and extra entropy."""
+        if not is_greedy:
+            if seed is None:
+                randint_fn = random.randint
+            else:
+                generator = random.Random(str((seed, ) + extra_entropy))
+                randint_fn = generator.randint
+            lo, hi = torch.iinfo(torch.long).min, torch.iinfo(torch.long).max
+            # If the user/random sets seed = 0 but request should
+            # have sampling, we need to change it to something
+            # else. We use a constant in that case.
+            # This way we don't need to create and load a bool
+            # matrix in the sampling kernel, which reduces CPU
+            # overhead and latency.
+            seq_seeds = [
+                randint_fn(lo, hi) or _SEED_0_REPLACEMENT
+                for _ in range(seeds_to_generate)
+            ]
+        else:
+            # For the kernel, seed == 0 means greedy decoding.
+            seq_seeds = [0] * seeds_to_generate
+        return seq_seeds
diff --git a/vllm/sequence.py b/vllm/sequence.py
index 4a002edaf580f..ff96dd306791c 100644
--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -242,6 +242,9 @@ def get_output_len(self) -> int:
     def get_token_ids(self) -> List[int]:
         return self.data.get_token_ids()
 
+    def get_prompt_token_ids(self) -> List[int]:
+        return self.data.get_prompt_token_ids()
+
     def get_last_token_id(self) -> int:
         return self.data.get_last_token_id()
 
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 27213887ed265..7e25311fa2268 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -408,6 +408,7 @@ def _prepare_sample(
         selected_token_start_idx = 0
         categorized_sample_indices = {t: [] for t in SamplingType}
         categorized_sample_indices_start_idx = 0
+        categorized_sampled_token_indices_start_idx = 0
         pin_memory = not self.in_wsl and not self.device_config.is_neuron
 
         max_subquery_len = max(subquery_lens) if subquery_lens else 1
@@ -425,9 +426,12 @@ def _prepare_sample(
                     categorized_sample_indices_start_idx += subquery_len - 1
 
                 categorized_sample_indices[
-                    sampling_params.sampling_type].append(
-                        categorized_sample_indices_start_idx)
+                    sampling_params.sampling_type].append([
+                        categorized_sample_indices_start_idx,
+                        categorized_sampled_token_indices_start_idx
+                    ])
                 categorized_sample_indices_start_idx += 1
+                categorized_sampled_token_indices_start_idx += 1
 
                 if sampling_params.prompt_logprobs is not None:
                     selected_token_indices.extend(
@@ -449,9 +453,17 @@ def _prepare_sample(
 
                 categorized_sample_indices[
                     sampling_params.sampling_type].extend(
-                        range(categorized_sample_indices_start_idx,
-                              categorized_sample_indices_start_idx + num_seqs))
+                        zip(
+                            range(
+                                categorized_sample_indices_start_idx,
+                                categorized_sample_indices_start_idx +
+                                num_seqs),
+                            range(
+                                categorized_sampled_token_indices_start_idx,
+                                categorized_sampled_token_indices_start_idx +
+                                num_seqs)))
                 categorized_sample_indices_start_idx += num_seqs
+                categorized_sampled_token_indices_start_idx += num_seqs
 
             if sampling_params.seed is not None:
                 generators.append(seq_group_metadata.state.generator)
@@ -459,12 +471,14 @@ def _prepare_sample(
         selected_token_indices = _async_h2d(selected_token_indices,
                                             dtype=torch.long,
                                             target_device=self.device,
-                                            pin_memory=pin_memory)
+                                            pin_memory=not self.in_wsl)
+
         categorized_sample_indices = {
-            t: _async_h2d(seq_ids,
-                          dtype=torch.int,
-                          target_device=self.device,
-                          pin_memory=pin_memory)
+            t: _maybe_expand_dim(
+                _async_h2d(seq_ids,
+                           dtype=torch.int,
+                           target_device=self.device,
+                           pin_memory=pin_memory), 2, 2)
             for t, seq_ids in categorized_sample_indices.items()
         }
 
@@ -884,3 +898,11 @@ def _async_h2d(
 ) -> torch.Tensor:
     t = torch.tensor(data, dtype=dtype, pin_memory=pin_memory, device="cpu")
     return t.to(device=target_device, non_blocking=True)
+
+
+def _maybe_expand_dim(tensor: torch.Tensor,
+                      target_dims: int,
+                      size: int = 1) -> torch.Tensor:
+    if tensor.ndim < target_dims:
+        tensor = tensor.view(-1, *([size] * (target_dims - tensor.ndim)))
+    return tensor

From 6e435de766c7749b214b637ac58570a221006c95 Mon Sep 17 00:00:00 2001
From: SangBin Cho <rkooo567@gmail.com>
Date: Thu, 21 Mar 2024 06:46:05 +0900
Subject: [PATCH 151/196] [1/n][Chunked Prefill] Refactor input query shapes
 (#3236)

---
 .buildkite/test-pipeline.yaml                 |   4 +-
 .../test_basic_correctness.py                 |   4 +-
 tests/core/test_scheduler.py                  |  18 +-
 tests/lora/test_worker.py                     |   2 +-
 tests/spec_decode/test_multi_step_worker.py   |   4 +-
 tests/worker/test_model_runner.py             | 161 +++++++++++-
 vllm/config.py                                |   3 -
 vllm/core/scheduler.py                        |  13 +-
 vllm/engine/arg_utils.py                      |   8 +-
 vllm/engine/llm_engine.py                     |   1 -
 vllm/model_executor/input_metadata.py         |  82 +++++-
 vllm/model_executor/layers/activation.py      |   4 +-
 .../layers/attention/attention.py             |   3 +-
 .../layers/attention/backends/flash_attn.py   |  46 +++-
 .../layers/attention/backends/xformers.py     | 232 ++++++++++-------
 .../layers/attention/ops/paged_attn.py        |   9 +-
 vllm/model_executor/layers/sampler.py         |   1 -
 vllm/worker/model_runner.py                   | 239 +++++++++++-------
 18 files changed, 575 insertions(+), 259 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 6ae351130f203..17f4c33670821 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -47,7 +47,7 @@ steps:
     - pytest -v -s prefix_caching
 
 - label: Samplers Test
-  command: pytest -v -s samplers --forked
+  command: pytest -v -s samplers
 
 - label: Worker Test
   command: pytest -v -s worker
@@ -56,7 +56,7 @@ steps:
   command: pytest -v -s spec_decode
 
 - label: LoRA Test %N
-  command: pytest -v -s lora --forked --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 4
 
 - label: Metrics Test
diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py
index fe67e0f2f4808..da0176306b4ee 100644
--- a/tests/basic_correctness/test_basic_correctness.py
+++ b/tests/basic_correctness/test_basic_correctness.py
@@ -13,6 +13,7 @@
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("max_tokens", [5])
+@pytest.mark.parametrize("enforce_eager", [False, True])
 def test_models(
     hf_runner,
     vllm_runner,
@@ -20,12 +21,13 @@ def test_models(
     model: str,
     dtype: str,
     max_tokens: int,
+    enforce_eager: bool,
 ) -> None:
     hf_model = hf_runner(model, dtype=dtype)
     hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
     del hf_model
 
-    vllm_model = vllm_runner(model, dtype=dtype)
+    vllm_model = vllm_runner(model, dtype=dtype, enforce_eager=enforce_eager)
     vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
     del vllm_model
 
diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py
index ebfeb8ba04812..397101fa86104 100644
--- a/tests/core/test_scheduler.py
+++ b/tests/core/test_scheduler.py
@@ -10,7 +10,7 @@
 
 def test_scheduler_add_seq_group():
     block_size = 4
-    scheduler_config = SchedulerConfig(100, 64, 1, 256)
+    scheduler_config = SchedulerConfig(100, 64, 1)
     cache_config = CacheConfig(block_size, 1.0, 1, "auto")
     cache_config.num_cpu_blocks = 4
     cache_config.num_gpu_blocks = 4
@@ -26,7 +26,7 @@ def test_scheduler_add_seq_group():
 
 def test_scheduler_abort_seq_group():
     block_size = 4
-    scheduler_config = SchedulerConfig(100, 64, 1, 256)
+    scheduler_config = SchedulerConfig(100, 64, 1)
     cache_config = CacheConfig(block_size, 1.0, 1, "auto")
     cache_config.num_cpu_blocks = 4
     cache_config.num_gpu_blocks = 4
@@ -50,7 +50,7 @@ def test_scheduler_schedule_simple():
     block_size = 4
     num_seq_group = 4
     max_model_len = 16
-    scheduler_config = SchedulerConfig(64, num_seq_group, max_model_len, 256)
+    scheduler_config = SchedulerConfig(64, num_seq_group, max_model_len)
     cache_config = CacheConfig(block_size, 1.0, 1, "auto")
     cache_config.num_cpu_blocks = 8
     cache_config.num_gpu_blocks = 8
@@ -64,10 +64,10 @@ def test_scheduler_schedule_simple():
         running.append(seq_group)
 
     # Schedule seq groups prompts.
+    num_tokens = block_size * num_seq_group
     seq_group_meta, out = scheduler.schedule()
     assert set(out.scheduled_seq_groups) == set(running)
-    assert out.num_batched_tokens == num_seq_group * seq_group.get_seqs(
-    )[0].get_len()
+    assert out.num_batched_tokens == num_tokens
     assert (not out.blocks_to_copy and not out.blocks_to_swap_in
             and not out.blocks_to_swap_out)
     assert len(seq_group_meta) == num_seq_group
@@ -84,7 +84,7 @@ def test_scheduler_schedule_simple():
 def test_scheduler_schedule_preempt_abort():
     block_size = 4
     max_model_len = 16
-    scheduler_config = SchedulerConfig(64, 2, max_model_len, 256)
+    scheduler_config = SchedulerConfig(64, 2, max_model_len)
     cache_config = CacheConfig(block_size, 1.0, 1, "auto")
     cache_config.num_cpu_blocks = 2
     cache_config.num_gpu_blocks = 2
@@ -99,7 +99,7 @@ def test_scheduler_schedule_preempt_abort():
     # Schedule seq groups prompts.
     seq_group_meta, out = scheduler.schedule()
     assert out.scheduled_seq_groups == [seq_group_a, seq_group_b]
-    assert out.num_batched_tokens == seq_group_a.get_seqs()[0].get_len() * 2
+    assert out.num_batched_tokens == block_size * 2  # seq_a and seq_b
     assert (not out.blocks_to_copy and not out.blocks_to_swap_in
             and not out.blocks_to_swap_out)
     assert len(seq_group_meta) == 2
@@ -124,7 +124,7 @@ def test_scheduler_schedule_preempt_abort():
     scheduler.abort_seq_group("1")
     seq_group_meta, out = scheduler.schedule()
     assert out.scheduled_seq_groups == [seq_group_b]
-    assert out.num_batched_tokens == seq_group_b.get_seqs()[0].get_len()
+    assert out.num_batched_tokens == 5  # 4 prompt + 1 generation.
     assert (not out.blocks_to_copy and not out.blocks_to_swap_in
             and not out.blocks_to_swap_out)
     assert len(seq_group_meta) == 1
@@ -136,7 +136,7 @@ def test_scheduler_max_seqs():
     num_seq_group = 4
     max_seq_group = 2
     max_model_len = 16
-    scheduler_config = SchedulerConfig(64, max_seq_group, max_model_len, 256)
+    scheduler_config = SchedulerConfig(64, max_seq_group, max_model_len)
     cache_config = CacheConfig(block_size, 1.0, 1, "auto")
     cache_config.num_cpu_blocks = 8
     cache_config.num_gpu_blocks = 8
diff --git a/tests/lora/test_worker.py b/tests/lora/test_worker.py
index 31a7c716afbf2..e4538de35169b 100644
--- a/tests/lora/test_worker.py
+++ b/tests/lora/test_worker.py
@@ -25,7 +25,7 @@ def test_worker_apply_lora(sql_lora_files):
             revision=None,
         ),
         parallel_config=ParallelConfig(1, 1, False),
-        scheduler_config=SchedulerConfig(32, 32, 32, 256),
+        scheduler_config=SchedulerConfig(32, 32, 32),
         device_config=DeviceConfig("cuda"),
         local_rank=0,
         rank=0,
diff --git a/tests/spec_decode/test_multi_step_worker.py b/tests/spec_decode/test_multi_step_worker.py
index 45b43ec59ee8f..5f788549d44d0 100644
--- a/tests/spec_decode/test_multi_step_worker.py
+++ b/tests/spec_decode/test_multi_step_worker.py
@@ -92,8 +92,8 @@ def test_same_output_for_single_step():
         num_gpu_blocks,
         seed,
     )
-    multi_step_worker.model_runner = worker.model_runner
-    multi_step_worker.cache_engine = worker.cache_engine
+    # multi_step_worker.model_runner = worker.model_runner
+    # multi_step_worker.cache_engine = worker.cache_engine
 
     num_steps = 1
 
diff --git a/tests/worker/test_model_runner.py b/tests/worker/test_model_runner.py
index f44895a728c7e..44b22c2bd8a21 100644
--- a/tests/worker/test_model_runner.py
+++ b/tests/worker/test_model_runner.py
@@ -1,8 +1,13 @@
 import random
 import torch
 
+from vllm.config import ModelConfig
 from vllm.sequence import SamplingParams, SequenceData, SequenceGroupMetadata
-from vllm.worker.model_runner import ModelRunner
+from vllm.worker.model_runner import ModelRunner, _BATCH_SIZE_ALIGNMENT
+
+
+def get_aligned_size(batch_size: int, alignment: int):
+    return ((batch_size + alignment - 1) // alignment * alignment)
 
 
 def test_prepare_prompt():
@@ -12,6 +17,7 @@ def test_prepare_prompt():
     batch_size = random.randint(1, 256)
     prompt_lens = []
     seq_group_metadata_list = []
+    block_tables = {0: [1]}
     for i in range(batch_size):
         # make sure all tokens fit into one block
         prompt_len = i % (model_runner.block_size - 1) + 1
@@ -23,26 +29,165 @@ def test_prepare_prompt():
                 is_prompt=True,
                 seq_data={0: SequenceData(seq_data)},
                 sampling_params=SamplingParams(temperature=0),
-                block_tables={0: [1]},
+                block_tables=block_tables,
             ))
 
     expected_selected_token_indices = []
     selected_token_start_idx = 0
-    max_seq_len = max(prompt_lens)
     for prompt_len in prompt_lens:
         expected_selected_token_indices.append(selected_token_start_idx +
                                                prompt_len - 1)
-        selected_token_start_idx += max_seq_len
-    input_tokens, input_positions, _, return_prompt_lens, _, _, _, _ = (
-        model_runner._prepare_prompt(seq_group_metadata_list))
+        selected_token_start_idx += prompt_len
+    (input_tokens, input_positions, input_metadata, return_prompt_lens, _, _,
+     _, _) = (model_runner._prepare_prompt(seq_group_metadata_list))
     assert return_prompt_lens == prompt_lens
+
+    # Verify input metadata is correct for prompts.
+    device = model_runner.device
+    assert input_metadata.is_prompt is True
+    assert torch.allclose(input_metadata.prompt_lens_tensor,
+                          torch.tensor(prompt_lens, device=device))
+    assert input_metadata.prompt_lens == prompt_lens
+    assert input_metadata.num_prompt_tokens == sum(prompt_lens)
+    assert input_metadata.num_generation_tokens == 0
+    assert input_metadata.max_seq_len == max(prompt_lens)
+
+    # Test subquery start locs.
+    start_idx = 0
+    start_loc = [start_idx]
+    for prompt_len in prompt_lens:
+        start_idx += prompt_len
+        start_loc.append(start_idx)
+    assert torch.allclose(
+        input_metadata.subquery_start_loc,
+        torch.tensor(start_loc, dtype=torch.int32, device=device))
+
+    # Test seq start locs. Note that for normal prefill it is
+    # equivalent to subquery_start_loc.
+    start_idx = 0
+    seq_start_loc = [start_idx]
+    for prompt_len in prompt_lens:
+        start_idx += prompt_len
+        seq_start_loc.append(start_idx)
+
+    assert torch.allclose(
+        input_metadata.seq_start_loc,
+        torch.tensor(start_loc, dtype=torch.int32, device=device))
+    assert input_metadata.max_context_len is None
+    assert torch.allclose(
+        input_metadata.context_lens,
+        torch.zeros(input_metadata.context_lens.shape[0],
+                    dtype=torch.int,
+                    device=device))
+
+    expected = torch.tensor([[] for _ in range(len(seq_group_metadata_list))],
+                            dtype=torch.int32,
+                            device=model_runner.device)
+    assert torch.allclose(input_metadata.block_tables, expected)
+    # Cuda graph should not be used for prerill.
+    assert input_metadata.use_cuda_graph is False
+    assert input_metadata.kv_cache_dtype == "auto"
+
+    assert input_tokens.shape == (sum(prompt_lens), )
+    assert input_positions.shape == (sum(prompt_lens), )
+    torch.testing.assert_close(input_tokens, input_positions)
+
     sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
                                                      prompt_lens,
                                                      subquery_lens=prompt_lens)
-    assert input_tokens.shape == (batch_size, max_seq_len)
-    assert input_positions.shape == (batch_size, max_seq_len)
+    assert input_tokens.shape == (sum(prompt_lens), )
+    assert input_positions.shape == (sum(prompt_lens), )
+    actual = sampling_metadata.selected_token_indices
+    expected = torch.tensor(expected_selected_token_indices,
+                            device=actual.device,
+                            dtype=actual.dtype)
+    torch.testing.assert_close(actual, expected)
+    torch.testing.assert_close(input_tokens, input_positions)
+
+    actual = sampling_metadata.selected_token_indices
+    expected = torch.tensor(expected_selected_token_indices,
+                            device=actual.device,
+                            dtype=actual.dtype)
+    torch.testing.assert_close(actual, expected)
+
+
+def test_prepare_decode_cuda_graph():
+    model_config = ModelConfig(
+        "facebook/opt-125m",
+        "facebook/opt-125m",
+        tokenizer_mode="auto",
+        trust_remote_code=False,
+        download_dir=None,
+        load_format="dummy",
+        seed=0,
+        dtype="float16",
+        revision=None,
+        enforce_eager=False,
+    )
+    model_runner = ModelRunner(model_config, None, None, None, None)
+    model_runner.set_block_size(16)
+
+    batch_size = random.randint(1, 256)
+    prompt_lens = []
+    seq_group_metadata_list = []
+    for i in range(batch_size):
+        # make sure all tokens fit into one block
+        prompt_len = i % (model_runner.block_size - 1) + 1
+        prompt_lens.append(prompt_len)
+        seq_data = list(range(prompt_len))
+        seq_group_metadata_list.append(
+            SequenceGroupMetadata(
+                request_id=f"test_{i}",
+                is_prompt=False,
+                seq_data={0: SequenceData(seq_data)},
+                sampling_params=SamplingParams(temperature=0),
+                block_tables={0: [1]},
+            ))
+
+    input_tokens, input_positions, input_metadata, _, _, _ = (
+        model_runner._prepare_decode(seq_group_metadata_list))
+
+    # Verify input metadata is correct for prompts.
+    device = model_runner.device
+    assert input_metadata.is_prompt is False
+    assert input_metadata.prompt_lens is None
+    assert input_metadata.num_prompt_tokens == 0
+    assert input_metadata.num_generation_tokens == (get_aligned_size(
+        len(seq_group_metadata_list), _BATCH_SIZE_ALIGNMENT))
+    assert input_metadata.max_seq_len is None
+    assert input_metadata.subquery_start_loc is None
+    assert input_metadata.seq_start_loc is None
+    assert input_metadata.max_context_len == max(prompt_lens)
+    assert torch.allclose(
+        input_metadata.context_lens[:len(prompt_lens)],
+        torch.tensor(prompt_lens, dtype=torch.int, device=device))
+
+    # block table's first index corresponds to each batch, meaning in
+    # decoding it is each token.
+    assert input_metadata.block_tables.shape[0] == len(input_tokens)
+    # Block table's second dim correspondsd to each token's block number.
+    # It is padded up to
+    assert input_metadata.block_tables.shape[1] == (
+        model_runner.get_max_block_per_batch())
+    # Cuda graph should not be used for prerill.
+    assert input_metadata.use_cuda_graph is True
+    assert input_metadata.kv_cache_dtype == "auto"
+
+    assert input_tokens.shape == (get_aligned_size(
+        len(seq_group_metadata_list), _BATCH_SIZE_ALIGNMENT), )
+    assert input_positions.shape == (get_aligned_size(
+        len(seq_group_metadata_list), _BATCH_SIZE_ALIGNMENT), )
     torch.testing.assert_close(input_tokens, input_positions)
 
+    # Verify Sampling
+    expected_selected_token_indices = []
+    selected_token_start_idx = 0
+    for prompt_len in prompt_lens:
+        expected_selected_token_indices.append(selected_token_start_idx)
+        selected_token_start_idx += 1
+    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
+                                                     prompt_lens,
+                                                     subquery_lens=prompt_lens)
     actual = sampling_metadata.selected_token_indices
     expected = torch.tensor(expected_selected_token_indices,
                             device=actual.device,
diff --git a/vllm/config.py b/vllm/config.py
index 51ae66e2375ab..b769ecdce8808 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -535,7 +535,6 @@ class SchedulerConfig:
             iteration.
         max_model_len: Maximum length of a sequence (including prompt
             and generated text).
-        max_paddings: Maximum number of paddings to be added to a batch.
     """
 
     def __init__(
@@ -543,7 +542,6 @@ def __init__(
         max_num_batched_tokens: Optional[int],
         max_num_seqs: int,
         max_model_len: int,
-        max_paddings: int,
     ) -> None:
         if max_num_batched_tokens is not None:
             self.max_num_batched_tokens = max_num_batched_tokens
@@ -553,7 +551,6 @@ def __init__(
             self.max_num_batched_tokens = max(max_model_len, 2048)
         self.max_num_seqs = max_num_seqs
         self.max_model_len = max_model_len
-        self.max_paddings = max_paddings
         self._verify_args()
 
     def _verify_args(self) -> None:
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index c3f93a2928df5..be55e8520a55f 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -173,12 +173,12 @@ def _schedule(self) -> SchedulerOutputs:
             curr_loras = set(
                 seq_group.lora_int_id
                 for seq_group in self.running) if self.lora_enabled else None
-            seq_lens: List[int] = []
 
             # Optimization: We do not sort the waiting queue since the preempted
             # sequence groups are added to the front and the new sequence groups
             # are added to the back.
             leftover_waiting_sequences = deque()
+            num_batched_tokens = 0
             while self.waiting:
                 seq_group = self.waiting[0]
                 waiting_seqs = seq_group.get_seqs(
@@ -223,8 +223,7 @@ def _schedule(self) -> SchedulerOutputs:
                         continue
 
                 # If the number of batched tokens exceeds the limit, stop.
-                new_seq_lens = seq_lens + [num_prompt_tokens]
-                num_batched_tokens = len(new_seq_lens) * max(new_seq_lens)
+                num_batched_tokens += num_prompt_tokens
                 if (num_batched_tokens >
                         self.scheduler_config.max_num_batched_tokens):
                     break
@@ -236,11 +235,6 @@ def _schedule(self) -> SchedulerOutputs:
                         self.scheduler_config.max_num_seqs):
                     break
 
-                num_paddings = num_batched_tokens - sum(new_seq_lens)
-                if num_paddings > self.scheduler_config.max_paddings:
-                    break
-                seq_lens = new_seq_lens
-
                 if lora_int_id > 0:
                     curr_loras.add(lora_int_id)
                 self.waiting.popleft()
@@ -255,8 +249,7 @@ def _schedule(self) -> SchedulerOutputs:
                 scheduler_outputs = SchedulerOutputs(
                     scheduled_seq_groups=scheduled,
                     prompt_run=True,
-                    num_batched_tokens=len(seq_lens) *
-                    max(seq_lens) if seq_lens else 0,
+                    num_batched_tokens=num_batched_tokens,
                     blocks_to_swap_in=blocks_to_swap_in,
                     blocks_to_swap_out=blocks_to_swap_out,
                     blocks_to_copy=blocks_to_copy,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 3e146d2e6c0c4..94c80f4284067 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -31,7 +31,6 @@ class EngineArgs:
     gpu_memory_utilization: float = 0.90
     max_num_batched_tokens: Optional[int] = None
     max_num_seqs: int = 256
-    max_paddings: int = 256
     max_logprobs: int = 5  # OpenAI default value
     disable_log_stats: bool = False
     revision: Optional[str] = None
@@ -213,10 +212,6 @@ def add_cli_args(
                             type=int,
                             default=EngineArgs.max_num_seqs,
                             help='maximum number of sequences per iteration')
-        parser.add_argument('--max-paddings',
-                            type=int,
-                            default=EngineArgs.max_paddings,
-                            help='maximum number of paddings in a batch')
         parser.add_argument(
             '--max-logprobs',
             type=int,
@@ -347,8 +342,7 @@ def create_engine_configs(
             ), self.ray_workers_use_nsight)
         scheduler_config = SchedulerConfig(self.max_num_batched_tokens,
                                            self.max_num_seqs,
-                                           model_config.max_model_len,
-                                           self.max_paddings)
+                                           model_config.max_model_len)
         lora_config = LoRAConfig(
             max_lora_rank=self.max_lora_rank,
             max_loras=self.max_loras,
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 71798ab7d17c0..2280481cca9cb 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -561,7 +561,6 @@ def _process_model_outputs(
         # Log stats.
         if self.log_stats:
             self.stat_logger.log(self._get_stats(scheduler_outputs))
-
         return request_outputs
 
     def step(self) -> List[RequestOutput]:
diff --git a/vllm/model_executor/input_metadata.py b/vllm/model_executor/input_metadata.py
index 01bba70ac10a8..35245865fb1b1 100644
--- a/vllm/model_executor/input_metadata.py
+++ b/vllm/model_executor/input_metadata.py
@@ -1,36 +1,92 @@
 from dataclasses import dataclass, fields
-from typing import Optional, Any, Dict
+from typing import Optional, List, Any, Dict
 
 import torch
+from xformers.ops.fmha.attn_bias import AttentionBias
 
 
 @dataclass
 class InputMetadata:
     """Metadata for input sequences. Used in PagedAttention.
 
-    Args:
-        prompt_lens: Lengths of prompts.
-        slot_mapping: The address to write the new KV to of each token.
-        max_context_len: The maximum context length.
-        context_lens: the length of attention context for each sequence.
-        block_tables: The block tables. (Seq id -> list of physical block)
-        kv_cache_dtype: Data type to store kv cache.
+    NOTE: Any python object stored here is not updated when it is
+    cuda-graph replayed. If you have values that need to be changed
+    dynamically, it should be stored in tensor. The tensor has to be
+    updated from `CUDAGraphRunner.forward` API.
     """
-
+    # Currently, input sequences can only contain all prompts
+    # or all decoding. True if all sequences are prompts.
     is_prompt: bool
+    # (num_tokens,). The indices of the token slots that input tokens will be
+    # stored into. E.g., if `slot_mapping` is [35, 2, 17] and the block size
+    # is 16, the three tokens are stored in the 3rd slot in block 2, 2nd slot
+    # in block 0, and 1st slot in block 1, respectively.
     slot_mapping: torch.Tensor
-    prompt_lens: Optional[torch.Tensor]
-    max_seq_len: Optional[int]
-    start_loc: Optional[torch.Tensor]
+    # (batch_size,). The prompt length per sequence. None if it is a decoding.
+    prompt_lens: Optional[List[int]]
+    # prompt_lens stored as a tensor.
+    prompt_lens_tensor: Optional[torch.Tensor]
+    # The number of prompt tokens. Doesn't include padding.
+    num_prompt_tokens: int
+    # The number of generation tokens. Doesn't include padding.
+    num_generation_tokens: int
+    """
+    Definition of context_len, subquery_len, and seqlen.
+    |---------- N-1 iteration --------|
+    |---------------- N iteration ---------------------|
+    |- tokenA -|......................|-- newTokens ---|
+    |---------- context_len ----------|
+    |-------------------- seqlen ----------------------|
+                                      |- subquery_len -|
+
+    WARNING: context_len has different definition depending on if it is
+    prefill vs decoding. When it is prefill, it doesn't include new
+    tokens. When it is for decoding, it includes a new token.
+    """
+
+    # Maximum subquery length in the batch.
+    max_subquery_len: Optional[int]
+    # Maximum context length in the batch.
     max_context_len: Optional[int]
+    # FIXME: It is for flash attn.
+    # Maximum sequence length in the batch.
+    max_seq_len: Optional[int]
+    # (batch_size + 1,). The cumulative subquery lengths of the sequences in
+    # the batch, used to index into subquery. E.g., if the subquery length
+    # is [4, 6], it is [0, 4, 10].
+    subquery_start_loc: Optional[torch.Tensor]
+    # FIXME: It is for flash attn.
+    # (batch_size + 1,). The cumulative sequence lengths of the sequences in
+    # the batch, used to index into sequence. E.g., if the sequence length is
+    # [4, 6], it is [0, 4, 10].
+    seq_start_loc: Optional[torch.Tensor]
+    # (batch_size,). The length of context (tokens stored in KV cache) per
+    # sequence. WARNING: When it is a prefill request, it doesn't include new
+    # tokens. When it is for decoding, it includes a new token.
     context_lens: Optional[torch.Tensor]
+    # (batch_size, max_blocks_per_seq).
+    # Block addresses per sequence. (Seq id -> list of physical block)
+    # E.g., [0, 1, 2] means tokens are stored in 0th, 1st, and 2nd blocks
+    # in the kv cache. Each block can contain up to block_size tokens.
+    # 2nd dimensions are padded up to max_blocks_per_seq if it is cuda-graph
+    # captured.
     block_tables: Optional[torch.Tensor]
+    # Whether or not if cuda graph is enabled.
+    # Cuda-graph is currently enabled for decoding only.
     use_cuda_graph: bool
     kv_cache_dtype: str
 
     def __post_init__(self):
+        # Set during the execution of the first attention op.
+        # It is a list because it is needed to set per prompt
+        # when alibi slopes is used. It is because of the limitation
+        # from xformer API.
         # will not appear in the __repr__ and __init__
-        self.attn_bias = None
+        self.attn_bias: Optional[List[AttentionBias]] = None
+
+        # Cuda graph is only used for decoding now.
+        if self.use_cuda_graph:
+            assert self.num_prompt_tokens == 0
 
     def asdict_zerocopy(self) -> Dict[str, Any]:
         """Similar to dataclasses.asdict, but avoids deepcopying."""
diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py
index 3eb73ee109f50..f569a5a49cbdf 100644
--- a/vllm/model_executor/layers/activation.py
+++ b/vllm/model_executor/layers/activation.py
@@ -20,8 +20,8 @@ class SiluAndMul(nn.Module):
     The function computes x -> silu(x[:d]) * x[d:] where d = x.shape[-1] // 2.
 
     Shapes:
-        x: (batch_size, seq_len, 2 * d) or (num_tokens, 2 * d)
-        return: (batch_size, seq_len, d) or (num_tokens, d)
+        x: (num_tokens, 2 * d) or (batch_size, seq_len, 2 * d)
+        return: (num_tokens, d) or (batch_size, seq_len, d)
     """
 
     def _forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/vllm/model_executor/layers/attention/attention.py b/vllm/model_executor/layers/attention/attention.py
index 4b63b9eaf59a7..ae598b029a007 100644
--- a/vllm/model_executor/layers/attention/attention.py
+++ b/vllm/model_executor/layers/attention/attention.py
@@ -17,11 +17,12 @@ class Attention(nn.Module):
 
     This class takes query, key, and value tensors as input. The input tensors
     can either contain prompt tokens or generation tokens.
+
     The class does the following:
 
     1. Store the input key and value tensors in the KV cache.
     2. Perform (multi-head/multi-query/grouped-query) attention.
-    3. Return the output tensor.
+    3. Output the output tensor.
     """
 
     def __init__(
diff --git a/vllm/model_executor/layers/attention/backends/flash_attn.py b/vllm/model_executor/layers/attention/backends/flash_attn.py
index 58ccd461b993e..9ce5851f3650d 100644
--- a/vllm/model_executor/layers/attention/backends/flash_attn.py
+++ b/vllm/model_executor/layers/attention/backends/flash_attn.py
@@ -1,7 +1,7 @@
 """Attention layer with Flash and PagedAttention."""
 from typing import List, Optional
 
-from flash_attn import flash_attn_func
+from flash_attn import flash_attn_varlen_func
 import torch
 
 from vllm.model_executor.input_metadata import InputMetadata
@@ -10,6 +10,21 @@
 
 
 class FlashAttentionBackend:
+    """
+    If the input tensors contain prompt tokens, the layout is as follows:
+    |<--------------- num_prompt_tokens -------------->|	
+    |<--prompt_0-->|<--prompt_1-->|...|<--prompt_N-1-->|
+
+    Otherwise, the layout is as follows:	
+    |<------------------ num_generation_tokens (M) ----------------->|	
+    |<--generation_0-->|..........|<--generation_M-1-->|<--padding-->|
+
+    Generation tokens can contain padding when cuda-graph is used.
+    Currently, prompt tokens don't contain any padding.
+
+    The prompts might have different lengths, while the generation tokens
+    always have length 1.
+    """
 
     def __init__(
         self,
@@ -52,18 +67,18 @@ def forward(
         """Forward pass with FlashAttention and PagedAttention.
 
         Args:
-            query: shape = [batch_size, seq_len, num_heads * head_size]
-            key: shape = [batch_size, seq_len, num_kv_heads * head_size]
-            value: shape = [batch_size, seq_len, num_kv_heads * head_size]
+            query: shape = [num_tokens, num_heads * head_size]
+            key: shape = [num_tokens, num_kv_heads * head_size]
+            value: shape = [num_tokens, num_kv_heads * head_size]
             key_cache: shape = [num_blocks, num_kv_heads, head_size/x,
                 block_size, x]
             value_cache: shape = [num_blocks, num_kv_heads, head_size,
                 block_size]
             input_metadata: metadata for the inputs.
         Returns:
-            shape = [batch_size, seq_len, num_heads * head_size]
+            shape = [num_tokens, num_heads * head_size]
         """
-        batch_size, seq_len, hidden_size = query.shape
+        num_tokens, hidden_size = query.shape
         # Reshape the query, key, and value tensors.
         query = query.view(-1, self.num_heads, self.head_size)
         key = key.view(-1, self.num_kv_heads, self.head_size)
@@ -82,13 +97,16 @@ def forward(
             if (key_cache is None or value_cache is None
                     or input_metadata.block_tables.numel() == 0):
                 # normal attention
-                query = query.unflatten(0, (batch_size, seq_len))
-                key = key.unflatten(0, (batch_size, seq_len))
-                value = value.unflatten(0, (batch_size, seq_len))
-                output = flash_attn_func(
-                    query,
-                    key,
-                    value,
+                # When block_tables are not filled, it means q and k are the
+                # prompt, and they have the same length.
+                output = flash_attn_varlen_func(
+                    q=query,
+                    k=key,
+                    v=value,
+                    cu_seqlens_q=input_metadata.seq_start_loc,
+                    cu_seqlens_k=input_metadata.seq_start_loc,
+                    max_seqlen_q=input_metadata.max_seq_len,
+                    max_seqlen_k=input_metadata.max_seq_len,
                     softmax_scale=self.scale,
                     causal=True,
                     window_size=self.sliding_window,
@@ -118,4 +136,4 @@ def forward(
             )
 
         # Reshape the output tensor.
-        return output.view(batch_size, seq_len, hidden_size)
+        return output.view(num_tokens, hidden_size)
diff --git a/vllm/model_executor/layers/attention/backends/xformers.py b/vllm/model_executor/layers/attention/backends/xformers.py
index bad2a648b6703..f0ef9fac9aaa4 100644
--- a/vllm/model_executor/layers/attention/backends/xformers.py
+++ b/vllm/model_executor/layers/attention/backends/xformers.py
@@ -14,6 +14,21 @@
 
 
 class XFormersBackend:
+    """
+    If the input tensors contain prompt tokens, the layout is as follows:
+    |<--------------- num_prompt_tokens --------------->|	
+    |<--prompt_0-->|<--prompt_1-->|...|<--prompt_N-1--->|
+
+    Otherwise, the layout is as follows:	
+    |<------------------ num_generation_tokens (M) ----------------->|	
+    |<--generation_0-->|..........|<--generation_M-1-->|<--padding-->|
+
+    Generation tokens can contain padding when cuda-graph is used.
+    Currently, prompt tokens don't contain any padding.
+
+    The prompts might have different lengths, while the generation tokens
+    always have length 1.
+    """
 
     def __init__(
         self,
@@ -55,19 +70,18 @@ def forward(
         """Forward pass with xFormers and PagedAttention.
 
         Args:
-            query: shape = [batch_size, seq_len, num_heads * head_size]
-            key: shape = [batch_size, seq_len, num_kv_heads * head_size]
-            value: shape = [batch_size, seq_len, num_kv_heads * head_size]
+            query: shape = [num_tokens, num_heads * head_size]
+            key: shape = [num_tokens, num_kv_heads * head_size]
+            value: shape = [num_tokens, num_kv_heads * head_size]
             key_cache: shape = [num_blocks, num_kv_heads, head_size/x,
                 block_size, x]
             value_cache: shape = [num_blocks, num_kv_heads, head_size,
                 block_size]
             input_metadata: metadata for the inputs.
         Returns:
-            shape = [batch_size, seq_len, num_heads * head_size]
+            shape = [num_tokens, num_heads * head_size]
         """
-        batch_size, seq_len, hidden_size = query.shape
-        # Reshape the query, key, and value tensors.
+        num_tokens, hidden_size = query.shape
         query = query.view(-1, self.num_heads, self.head_size)
         key = key.view(-1, self.num_kv_heads, self.head_size)
         value = value.view(-1, self.num_kv_heads, self.head_size)
@@ -82,9 +96,10 @@ def forward(
 
         if input_metadata.is_prompt:
             # Prompt run.
+            # key_cache and value_cache are None when it is a profiling run.
+            # block tables are empty if the prompt has never been computed.
             if (key_cache is None or value_cache is None
                     or input_metadata.block_tables.numel() == 0):
-                # normal attention
                 if self.num_kv_heads != self.num_heads:
                     # As of Nov 2023, xformers only supports MHA. For MQA/GQA,
                     # project the key and value tensors to the desired number of
@@ -103,61 +118,33 @@ def forward(
                                                   self.num_queries_per_kv,
                                                   value.shape[-1])
 
-                # Set attention bias if not provided. This typically happens at
-                # the very attention layer of every iteration.
-                # FIXME(woosuk): This is a hack.
-                if input_metadata.attn_bias is None:
-                    if self.alibi_slopes is None:
-                        attn_bias = BlockDiagonalCausalMask.from_seqlens(
-                            [seq_len] * batch_size)
-                        if self.sliding_window is not None:
-                            attn_bias = attn_bias.make_local_attention(
-                                self.sliding_window)
-                        input_metadata.attn_bias = attn_bias
-                    else:
-                        input_metadata.attn_bias = _make_alibi_bias(
-                            self.alibi_slopes, self.num_kv_heads, batch_size,
-                            seq_len, query.dtype)
-
                 if self.use_ref_attention:
-                    output = _ref_masked_attention(
-                        query,
-                        key,
-                        value,
-                        self.num_heads,
-                        self.num_kv_heads,
-                        self.head_size,
-                        self.scale,
-                    )
+                    print("ref attention used.")
+                    output = torch.empty_like(query)
+                    start = 0
+                    for _, prompt_len in enumerate(input_metadata.prompt_lens):
+                        end = start + prompt_len
+                        out = _ref_masked_attention(
+                            query[None, start:end],
+                            key[None, start:end],
+                            value[None, start:end],
+                            self.num_heads,
+                            self.num_kv_heads,
+                            self.head_size,
+                            self.scale,
+                        )
+                        # TODO(woosuk): Unnecessary copy. Optimize.
+                        output[start:end].copy_(out)
+                        start += prompt_len
+
                     # Using view got RuntimeError: view size is not compatible
                     # with input tensor's size and stride (at least one
                     # dimension spans across two contiguous subspaces).
                     # Use reshape instead.
-                    return output.reshape(batch_size, seq_len, hidden_size)
-
-                # TODO(woosuk): Too many view operations. Let's try to reduce
-                # them in the future for code readability.
-                if self.alibi_slopes is None:
-                    query = query.unsqueeze(0)
-                    key = key.unsqueeze(0)
-                    value = value.unsqueeze(0)
-                else:
-                    query = query.unflatten(0, (batch_size, seq_len))
-                    key = key.unflatten(0, (batch_size, seq_len))
-                    value = value.unflatten(0, (batch_size, seq_len))
-
-                out = xops.memory_efficient_attention_forward(
-                    query,
-                    key,
-                    value,
-                    attn_bias=input_metadata.attn_bias,
-                    p=0.0,
-                    scale=self.scale,
-                    op=xops.fmha.MemoryEfficientAttentionFlashAttentionOp[0] if
-                    (is_hip()) else None,
-                )
-                output = out.view_as(query)
+                    return output.reshape(num_tokens, hidden_size)
 
+                output = self._run_memory_efficient_xformer_forward(
+                    query, key, value, input_metadata)
             else:
                 # prefix-enabled attention
                 output = PagedAttentionImpl.forward_prefix(
@@ -182,41 +169,117 @@ def forward(
             )
 
         # Reshape the output tensor.
-        return output.view(batch_size, seq_len, hidden_size)
+        return output.view(-1, self.num_heads * self.head_size)
+
+    def _run_memory_efficient_xformer_forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        """Attention for 1D query of multiple prompts. Multiple prompt
+        tokens are flattened in to `query` input.
+
+        Args:
+            output: shape = [num_prompt_tokens, num_heads, head_size]
+            query: shape = [num_prompt_tokens, num_heads, head_size]
+            key: shape = [num_prompt_tokens, num_kv_heads, head_size]
+            value: shape = [num_prompt_tokens, num_kv_heads, head_size]
+            input_metadata: metadata for paged attention.
+        """
+        # Set attention bias if not provided. This typically happens at
+        # the very attention layer of every iteration.
+        # FIXME(woosuk): This is a hack.
+        if input_metadata.attn_bias is None:
+            if self.alibi_slopes is None:
+                attn_bias = BlockDiagonalCausalMask.from_seqlens(
+                    input_metadata.prompt_lens)
+                if self.sliding_window is not None:
+                    attn_bias = attn_bias.make_local_attention(
+                        self.sliding_window)
+                input_metadata.attn_bias = [attn_bias]
+            else:
+                input_metadata.attn_bias = _make_alibi_bias(
+                    self.alibi_slopes, self.num_kv_heads, query.dtype,
+                    input_metadata)
+
+        op = xops.fmha.MemoryEfficientAttentionFlashAttentionOp[0] if (
+            is_hip()) else None
+        # No alibi slopes.
+        # TODO(woosuk): Too many view operations. Let's try to reduce
+        # them in the future for code readability.
+        if self.alibi_slopes is None:
+            query = query.unsqueeze(0)
+            key = key.unsqueeze(0)
+            value = value.unsqueeze(0)
+            out = xops.memory_efficient_attention_forward(
+                query,
+                key,
+                value,
+                attn_bias=input_metadata.attn_bias[0],
+                p=0.0,
+                scale=self.scale,
+                op=op)
+
+            return out.view_as(query)
+
+        # Attention with alibi slopes.
+        # FIXME(woosuk): Because xformers does not support dynamic sequence
+        # lengths with custom attention bias, we process each prompt one by
+        # one. This is inefficient, especially when we have many short prompts.
+        output = torch.empty_like(query)
+        start = 0
+        for i, prompt_len in enumerate(input_metadata.prompt_lens):
+            end = start + prompt_len
+            out = xops.memory_efficient_attention_forward(
+                query[None, start:end],
+                key[None, start:end],
+                value[None, start:end],
+                attn_bias=input_metadata.attn_bias[i],
+                p=0.0,
+                scale=self.scale,
+                op=op)
+            # TODO(woosuk): Unnecessary copy. Optimize.
+            output[start:end].copy_(out.squeeze(0))
+            start += prompt_len
+        return output
 
 
 def _make_alibi_bias(
     alibi_slopes: torch.Tensor,
     num_kv_heads: int,
-    batch_size: int,
-    seq_len: int,
     dtype: torch.dtype,
+    input_metadata: InputMetadata,
 ) -> LowerTriangularMaskWithTensorBias:
-    bias = torch.arange(seq_len, dtype=dtype)
-    # NOTE(zhuohan): HF uses
-    #     `bias = bias[None, :].repeat(prompt_len, 1)`
-    # here. We find that both biases give the same results, but
-    # the bias below more accurately follows the original ALiBi
-    # paper.
-    bias = bias[None, :] - bias[:, None]
-
-    # When using custom attention bias, xformers requires the bias to
-    # be sliced from a tensor whose length is a multiple of 8.
-    padded_len = (seq_len + 7) // 8 * 8
-    num_heads = alibi_slopes.shape[0]
-    bias = torch.empty(
-        batch_size,
-        num_heads,
-        seq_len,
-        padded_len,
-        device=alibi_slopes.device,
-        dtype=dtype,
-    )[:, :, :, :seq_len].copy_(bias)
-    bias.mul_(alibi_slopes[:, None, None])
-    if num_heads != num_kv_heads:
-        bias = bias.unflatten(1, (num_kv_heads, num_heads // num_kv_heads))
-    attn_bias = LowerTriangularMaskWithTensorBias(bias)
-    return attn_bias
+    attn_biases = []
+    for prompt_len in input_metadata.prompt_lens:
+        bias = torch.arange(prompt_len, dtype=dtype)
+        # NOTE(zhuohan): HF uses
+        #     `bias = bias[None, :].repeat(prompt_len, 1)`
+        # here. We find that both biases give the same results, but
+        # the bias below more accurately follows the original ALiBi
+        # paper.
+        # Calculate a matrix where each element represents ith element- jth
+        # element.
+        bias = bias[None, :] - bias[:, None]
+
+        padded_len = (prompt_len + 7) // 8 * 8
+        num_heads = alibi_slopes.shape[0]
+        bias = torch.empty(
+            1,  # batch size
+            num_heads,
+            prompt_len,
+            padded_len,
+            device=alibi_slopes.device,
+            dtype=dtype,
+        )[:, :, :, :prompt_len].copy_(bias)
+        bias.mul_(alibi_slopes[:, None, None])
+        if num_heads != num_kv_heads:
+            bias = bias.unflatten(1, (num_kv_heads, num_heads // num_kv_heads))
+        attn_biases.append(LowerTriangularMaskWithTensorBias(bias))
+
+    return attn_biases
 
 
 def _check_use_ref_attention() -> bool:
@@ -239,7 +302,6 @@ def _ref_masked_attention(
     query = query.view(-1, num_heads, head_size)
     key = key.view(-1, num_kv_heads, head_size)
     value = value.view(-1, num_kv_heads, head_size)
-
     seq_len, _, _ = query.shape
     attn_mask = torch.triu(torch.ones(seq_len,
                                       seq_len,
diff --git a/vllm/model_executor/layers/attention/ops/paged_attn.py b/vllm/model_executor/layers/attention/ops/paged_attn.py
index c5a9618c2395b..3105ba37b9832 100644
--- a/vllm/model_executor/layers/attention/ops/paged_attn.py
+++ b/vllm/model_executor/layers/attention/ops/paged_attn.py
@@ -128,11 +128,12 @@ def forward_prefix(
             output,
             key_cache,
             value_cache,
-            input_metadata.block_tables,  # [BS, max_block_per_request]
-            input_metadata.start_loc,
-            input_metadata.prompt_lens,
+            input_metadata.block_tables,
+            # subquery_start_loc is (batch_size + 1,)
+            input_metadata.subquery_start_loc[:-1],
+            input_metadata.prompt_lens_tensor,
             input_metadata.context_lens,
-            input_metadata.max_seq_len,
+            input_metadata.max_subquery_len,
             alibi_slopes,
         )
         return output
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index 1fab1e734e1d7..ac8336ca0f9ad 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -128,7 +128,6 @@ def _prune_hidden_states(
     hidden_states: torch.Tensor,
     sampling_metadata: SamplingMetadata,
 ) -> torch.Tensor:
-    hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
     return hidden_states.index_select(0,
                                       sampling_metadata.selected_token_indices)
 
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 7e25311fa2268..cfccbbb20adc5 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -28,9 +28,12 @@
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 _PAD_SLOT_ID = -1
 LORA_WARMUP_RANK = 8
-# Capture graphs for batch size 1, 2, 4, 8, 16, 24, 32, 40, ..., 256.
+_BATCH_SIZE_ALIGNMENT = 8
+# Capture graphs for token size 1, 2, 4, 8, 16, 24, 32, 40, ..., 256.
 # NOTE: _get_graph_batch_size needs to be updated if this list is changed.
-_BATCH_SIZES_TO_CAPTURE = [1, 2, 4] + [8 * i for i in range(1, 33)]
+_BATCH_SIZES_TO_CAPTURE = [1, 2, 4] + [
+    _BATCH_SIZE_ALIGNMENT * i for i in range(1, 33)
+]
 
 
 class ModelRunner:
@@ -107,8 +110,7 @@ def load_model(self) -> None:
                            ), "Model does not have embedding_padding_modules"
             self.lora_manager = LRUCacheWorkerLoRAManager(
                 self.scheduler_config.max_num_seqs,
-                self.scheduler_config.max_num_batched_tokens +
-                self.scheduler_config.max_paddings, self.vocab_size,
+                self.scheduler_config.max_num_batched_tokens, self.vocab_size,
                 self.lora_config, self.device, self.model.embedding_modules,
                 self.model.embedding_padding_modules)
             self.model = self.lora_manager.create_lora_manager(self.model)
@@ -116,10 +118,13 @@ def load_model(self) -> None:
     def set_block_size(self, block_size: int) -> None:
         self.block_size = block_size
 
-        max_num_blocks = (self.max_context_len_to_capture + block_size -
-                          1) // block_size
         self.graph_block_tables = np.zeros(
-            (max(_BATCH_SIZES_TO_CAPTURE), max_num_blocks), dtype=np.int32)
+            (max(_BATCH_SIZES_TO_CAPTURE), self.get_max_block_per_batch()),
+            dtype=np.int32)
+
+    def get_max_block_per_batch(self) -> int:
+        block_size = self.block_size
+        return (self.max_context_len_to_capture + block_size - 1) // block_size
 
     def _prepare_prompt(
         self,
@@ -127,9 +132,9 @@ def _prepare_prompt(
     ) -> Tuple[torch.Tensor, torch.Tensor, InputMetadata, List[int], List[int],
                List[int], List[int], Set[LoRARequest]]:
         assert len(seq_group_metadata_list) > 0
-        input_tokens: List[List[int]] = []
-        input_positions: List[List[int]] = []
-        slot_mapping: List[List[int]] = []
+        input_tokens: List[int] = []
+        input_positions: List[int] = []
+        slot_mapping: List[int] = []
         lora_index_mapping: List[int] = []
         lora_prompt_mapping: List[int] = []
         lora_requests: Set[LoRARequest] = set()
@@ -158,16 +163,18 @@ def _prepare_prompt(
                 computed_len = len(computed_block_nums) * self.block_size
                 prompt_tokens = prompt_tokens[computed_len:]
                 prefix_block_tables.append(computed_block_nums)
+                context_len = computed_len
             else:
                 prefix_block_tables.append([])
+                context_len = 0
             # actual prompt lens
-            context_lens.append(computed_len)
+            context_lens.append(context_len)
             subquery_lens.append(prompt_len - computed_len)
 
-            input_tokens.append(prompt_tokens)
+            input_tokens.extend(prompt_tokens)
             # NOTE(woosuk): Here we assume that the first token in the prompt
             # is always the first token in the sequence.
-            input_positions.append(
+            input_positions.extend(
                 list(range(computed_len, computed_len + len(prompt_tokens))))
 
             lora_id = seq_group_metadata.lora_int_id
@@ -175,7 +182,7 @@ def _prepare_prompt(
             if lora_id > 0:
                 lora_requests.add(seq_group_metadata.lora_request)
 
-            lora_index_mapping.append([lora_id] * (prompt_len - computed_len))
+            lora_index_mapping += [lora_id] * (prompt_len - computed_len)
             lora_prompt_mapping.extend(
                 [lora_id] *
                 (prompt_len - computed_len
@@ -184,11 +191,10 @@ def _prepare_prompt(
             if seq_group_metadata.block_tables is None:
                 # During memory profiling, the block tables are not initialized
                 # yet. In this case, we just use a dummy slot mapping.
-                slot_mapping.append([_PAD_SLOT_ID] * prompt_len)
+                slot_mapping.extend([_PAD_SLOT_ID] * prompt_len)
                 continue
 
             # Compute the slot mapping.
-            slot_mapping.append([])
             block_table = seq_group_metadata.block_tables[seq_id]
             # Mask the [0, start_idx) tokens of the prompt with _PAD_SLOT_ID,
             # where start_idx is max(0, prompt_len - sliding_window).
@@ -203,35 +209,30 @@ def _prepare_prompt(
                 start_idx = max(0, prompt_len - self.sliding_window)
             for i in range(computed_len, prompt_len):
                 if i < start_idx:
-                    slot_mapping[-1].append(_PAD_SLOT_ID)
+                    slot_mapping.append(_PAD_SLOT_ID)
                     continue
 
                 block_number = block_table[i // self.block_size]
                 block_offset = i % self.block_size
                 slot = block_number * self.block_size + block_offset
-                slot_mapping[-1].append(slot)
-
-        max_prompt_len = max(subquery_lens)
-        assert max_prompt_len > 0
-        input_tokens = _make_tensor_with_pad(input_tokens,
-                                             max_prompt_len,
-                                             pad=0,
-                                             dtype=torch.long,
-                                             device=self.device)
-        input_positions = _make_tensor_with_pad(input_positions,
-                                                max_prompt_len,
-                                                pad=0,
-                                                dtype=torch.long,
-                                                device=self.device)
-        slot_mapping = _make_tensor_with_pad(slot_mapping,
-                                             max_prompt_len,
-                                             pad=_PAD_SLOT_ID,
-                                             dtype=torch.long,
-                                             device=self.device)
-        lora_index_mapping = [
-            _pad_to_max(mapping, max_prompt_len, pad=0)
-            for mapping in lora_index_mapping
-        ]
+                slot_mapping.append(slot)
+
+        max_subquery_len = max(subquery_lens)
+        max_seq_len = max(prompt_lens)
+        num_prompt_tokens = len(input_tokens)
+        assert max_subquery_len > 0
+
+        input_tokens = torch.tensor(input_tokens,
+                                    dtype=torch.long,
+                                    device=self.device)
+        input_positions = torch.tensor(input_positions,
+                                       dtype=torch.long,
+                                       device=self.device)
+        slot_mapping = torch.tensor(slot_mapping,
+                                    dtype=torch.long,
+                                    device=self.device)
+        lora_index_mapping = lora_index_mapping
+
         context_lens_tensor = torch.tensor(context_lens,
                                            dtype=torch.int,
                                            device=self.device)
@@ -244,22 +245,45 @@ def _prepare_prompt(
             dtype=torch.int,
             device=self.device,
         )
-        start_loc_tensor = torch.arange(0,
-                                        len(prompt_lens) * max_prompt_len,
-                                        max_prompt_len,
-                                        dtype=torch.long,
-                                        device=self.device)
+
+        # Query length can be shorter than key (i.e., prompt) when prefill
+        # is chunked or prefix cached.
+        subquery_lens_tensor = torch.tensor(subquery_lens,
+                                            dtype=torch.long,
+                                            device=self.device)
+        subquery_start_loc = torch.zeros(subquery_lens_tensor.shape[0] + 1,
+                                         dtype=torch.int32,
+                                         device=self.device)
+
         prompt_lens_tensor = torch.tensor(prompt_lens,
                                           dtype=torch.long,
                                           device=self.device)
+        seq_start_loc = torch.zeros(prompt_lens_tensor.shape[0] + 1,
+                                    dtype=torch.int32,
+                                    device=self.device)
+
+        torch.cumsum(subquery_lens_tensor,
+                     dim=0,
+                     dtype=subquery_start_loc.dtype,
+                     out=subquery_start_loc[1:])
+
+        torch.cumsum(prompt_lens_tensor,
+                     dim=0,
+                     dtype=seq_start_loc.dtype,
+                     out=seq_start_loc[1:])
 
         input_metadata = InputMetadata(
             is_prompt=True,
             slot_mapping=slot_mapping,
-            prompt_lens=prompt_lens_tensor,
-            max_seq_len=max_prompt_len,
-            start_loc=start_loc_tensor,
+            prompt_lens=prompt_lens,
+            prompt_lens_tensor=prompt_lens_tensor,
+            num_prompt_tokens=num_prompt_tokens,
+            num_generation_tokens=0,
+            max_subquery_len=max_subquery_len,
             max_context_len=None,
+            max_seq_len=max_seq_len,
+            subquery_start_loc=subquery_start_loc,
+            seq_start_loc=seq_start_loc,
             context_lens=context_lens_tensor,
             block_tables=block_tables,
             use_cuda_graph=False,
@@ -275,9 +299,9 @@ def _prepare_decode(
     ) -> Tuple[torch.Tensor, torch.Tensor, InputMetadata, List[int], List[int],
                Set[LoRARequest]]:
         assert len(seq_group_metadata_list) > 0
-        input_tokens: List[List[int]] = []
-        input_positions: List[List[int]] = []
-        slot_mapping: List[List[int]] = []
+        input_tokens: List[int] = []
+        input_positions: List[int] = []
+        slot_mapping: List[int] = []
         context_lens: List[int] = []
         block_tables: List[List[int]] = []
         lora_index_mapping: List[int] = []
@@ -296,11 +320,11 @@ def _prepare_decode(
             for seq_id in seq_ids:
                 seq_data = seq_group_metadata.seq_data[seq_id]
                 generation_token = seq_data.get_last_token_id()
-                input_tokens.append([generation_token])
+                input_tokens.append(generation_token)
 
                 seq_len = seq_data.get_len()
                 position = seq_len - 1
-                input_positions.append([position])
+                input_positions.append(position)
 
                 context_len = seq_len if self.sliding_window is None else min(
                     seq_len, self.sliding_window)
@@ -310,8 +334,8 @@ def _prepare_decode(
                 block_number = block_table[position // self.block_size]
                 block_offset = position % self.block_size
                 slot = block_number * self.block_size + block_offset
-                slot_mapping.append([slot])
-                lora_index_mapping.append([lora_id])
+                slot_mapping.append(slot)
+                lora_index_mapping.append(lora_id)
                 lora_prompt_mapping.append(lora_id)
 
                 if self.sliding_window is not None:
@@ -320,6 +344,9 @@ def _prepare_decode(
                     block_table = block_table[-sliding_window_blocks:]
                 block_tables.append(block_table)
 
+        # vLLM uses cuda graph only for decoding requests.
+        # See `capture_model` API for more details.
+        # For decoding requests, batch_size == input_tokens.
         batch_size = len(input_tokens)
         max_context_len = max(context_lens)
         use_captured_graph = (
@@ -327,38 +354,37 @@ def _prepare_decode(
             and batch_size <= _BATCH_SIZES_TO_CAPTURE[-1]
             and max_context_len <= self.max_context_len_to_capture)
         if use_captured_graph:
-            # Pad the input tokens, positions, and slot mapping to match the
-            # batch size of the captured graph.
             graph_batch_size = _get_graph_batch_size(batch_size)
             assert graph_batch_size >= batch_size
             for _ in range(graph_batch_size - batch_size):
-                input_tokens.append([])
-                input_positions.append([])
-                slot_mapping.append([])
+                input_tokens.append(0)
+                input_positions.append(0)
+                slot_mapping.append(_PAD_SLOT_ID)
                 context_lens.append(1)
                 block_tables.append([])
+                lora_index_mapping.append(0)
             batch_size = graph_batch_size
 
-        input_tokens = _make_tensor_with_pad(input_tokens,
-                                             max_len=1,
-                                             pad=0,
-                                             dtype=torch.long,
-                                             device=self.device)
-        input_positions = _make_tensor_with_pad(input_positions,
-                                                max_len=1,
-                                                pad=0,
-                                                dtype=torch.long,
-                                                device=self.device)
-        slot_mapping = _make_tensor_with_pad(slot_mapping,
-                                             max_len=1,
-                                             pad=_PAD_SLOT_ID,
-                                             dtype=torch.long,
-                                             device=self.device)
+        input_tokens = torch.tensor(input_tokens,
+                                    dtype=torch.long,
+                                    device=self.device)
+        input_positions = torch.tensor(input_positions,
+                                       dtype=torch.long,
+                                       device=self.device)
+        slot_mapping = torch.tensor(slot_mapping,
+                                    dtype=torch.long,
+                                    device=self.device)
         context_lens = torch.tensor(context_lens,
                                     dtype=torch.int,
                                     device=self.device)
 
         if use_captured_graph:
+            # When using cuda-graph all these tensors should be
+            # padded.
+            assert context_lens.shape[0] == input_tokens.shape[0]
+            assert context_lens.shape[0] == input_positions.shape[0]
+            assert context_lens.shape[0] == slot_mapping.shape[0]
+
             # The shape of graph_block_tables is
             # [max batch size, max context len // block size].
             input_block_tables = self.graph_block_tables[:batch_size]
@@ -377,17 +403,18 @@ def _prepare_decode(
                 device=self.device,
             )
 
-        lora_index_mapping = [
-            _pad_to_max(mapping, 1, pad=0) for mapping in lora_index_mapping
-        ]
-
         input_metadata = InputMetadata(
             is_prompt=False,
             slot_mapping=slot_mapping,
             prompt_lens=None,
-            max_seq_len=None,
-            start_loc=None,
+            prompt_lens_tensor=None,
+            num_prompt_tokens=0,
+            num_generation_tokens=len(input_tokens),
+            max_subquery_len=None,
             max_context_len=max_context_len,
+            max_seq_len=None,
+            subquery_start_loc=None,
+            seq_start_loc=None,
             context_lens=context_lens,
             block_tables=block_tables,
             use_cuda_graph=use_captured_graph,
@@ -411,7 +438,6 @@ def _prepare_sample(
         categorized_sampled_token_indices_start_idx = 0
         pin_memory = not self.in_wsl and not self.device_config.is_neuron
 
-        max_subquery_len = max(subquery_lens) if subquery_lens else 1
         for i, seq_group_metadata in enumerate(seq_group_metadata_list):
             seq_ids = list(seq_group_metadata.seq_data.keys())
             sampling_params = seq_group_metadata.sampling_params
@@ -439,7 +465,7 @@ def _prepare_sample(
                               selected_token_start_idx + subquery_len - 1))
                 selected_token_indices.append(selected_token_start_idx +
                                               subquery_len - 1)
-                selected_token_start_idx += max_subquery_len
+                selected_token_start_idx += subquery_len
 
                 if sampling_params.seed is not None:
                     seq_group_metadata.state.generator = torch.Generator(
@@ -521,11 +547,8 @@ def prepare_input_tensors(
                                                      subquery_lens)
 
             if self.lora_config:
-                flat_lora_index_mapping = [
-                    item for sublist in lora_index_mapping for item in sublist
-                ]
                 lora_mapping = LoRAMapping(
-                    flat_lora_index_mapping,
+                    lora_index_mapping,
                     lora_prompt_mapping,
                 )
             else:
@@ -679,6 +702,18 @@ def list_loras(self) -> Set[int]:
 
     @torch.inference_mode()
     def capture_model(self, kv_caches: List[KVCache]) -> None:
+        """Cuda graph capture a model.
+
+        Note that CUDA graph's performance gain is negligible if number
+        of batched tokens are larger than 200. And since CUDA graph
+        requires fixed sized tensors, supporting large/variable batch
+        size requires high GPU memory overhead. Thus, vLLM only captures
+        decoding requests. Mixed batch (chunked prefill + decoding) or
+        prefill requests are not captured.
+
+        Since it is used for decoding-only, it assumes there's only 1 token
+        per sequence in the batch.
+        """
         # NOTE(woosuk): This is a hack to ensure that the NCCL backend is never
         # deleted before the CUDA graphs.
         self.cupy_nccl_backend = cupy_utils.get_nccl_backend()
@@ -697,10 +732,9 @@ def capture_model(self, kv_caches: List[KVCache]) -> None:
 
         # Prepare dummy inputs. These will be reused for all batch sizes.
         max_batch_size = max(_BATCH_SIZES_TO_CAPTURE)
-        input_tokens = torch.zeros(max_batch_size, 1, dtype=torch.long).cuda()
-        input_positions = torch.zeros(max_batch_size, 1,
-                                      dtype=torch.long).cuda()
-        slot_mapping = torch.empty(max_batch_size, 1, dtype=torch.long).cuda()
+        input_tokens = torch.zeros(max_batch_size, dtype=torch.long).cuda()
+        input_positions = torch.zeros(max_batch_size, dtype=torch.long).cuda()
+        slot_mapping = torch.empty(max_batch_size, dtype=torch.long).cuda()
         slot_mapping.fill_(_PAD_SLOT_ID)
         context_lens = torch.ones(max_batch_size, dtype=torch.int32).cuda()
         block_tables = torch.from_numpy(self.graph_block_tables).cuda()
@@ -726,9 +760,14 @@ def capture_model(self, kv_caches: List[KVCache]) -> None:
                     is_prompt=False,
                     slot_mapping=slot_mapping[:batch_size],
                     prompt_lens=None,
-                    max_seq_len=None,
-                    start_loc=None,
+                    prompt_lens_tensor=None,
+                    num_prompt_tokens=0,
+                    num_generation_tokens=batch_size,
+                    max_subquery_len=None,
                     max_context_len=self.max_context_len_to_capture,
+                    max_seq_len=None,
+                    subquery_start_loc=None,
+                    seq_start_loc=None,
                     context_lens=context_lens[:batch_size],
                     block_tables=block_tables[:batch_size],
                     use_cuda_graph=True,
@@ -845,7 +884,6 @@ def forward(
                                                  non_blocking=True)
         self.input_buffers["block_tables"].copy_(input_metadata.block_tables,
                                                  non_blocking=True)
-
         # Run the graph.
         self.graph.replay()
 
@@ -877,17 +915,28 @@ def _make_tensor_with_pad(
     dtype: torch.dtype,
     device: Optional[Union[str, torch.device]],
 ) -> torch.Tensor:
+    """Make a padded tensor of a 2D inputs.
+
+    The padding is applied to the end of each inner list until it reaches
+    `max_len`.
+    """
     padded_x = [_pad_to_max(x_i, max_len, pad) for x_i in x]
     return torch.tensor(padded_x, dtype=dtype, device=device)
 
 
 def _get_graph_batch_size(batch_size: int) -> int:
+    """Returns the padded batch size given actual batch size.
+
+    Batch sizes are 1, 2, 4, _BATCH_SIZE_ALIGNMENT,
+    2*_BATCH_SIZE_ALIGNMENT, 3*_BATCH_SIZE_ALIGNMENT...
+    """
     if batch_size <= 2:
         return batch_size
     elif batch_size <= 4:
         return 4
     else:
-        return (batch_size + 7) // 8 * 8
+        return ((batch_size + _BATCH_SIZE_ALIGNMENT - 1) //
+                _BATCH_SIZE_ALIGNMENT * _BATCH_SIZE_ALIGNMENT)
 
 
 def _async_h2d(

From f1c0fc391909e55fce5f109893f3c483f69a091f Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Thu, 21 Mar 2024 07:25:01 +0800
Subject: [PATCH 152/196] Migrate `logits` computation and gather to
 `model_runner` (#3233)

---
 .buildkite/test-pipeline.yaml                 |   3 +
 tests/lora/conftest.py                        |   7 +-
 tests/lora/test_layers.py                     |  66 ++++++-----
 tests/samplers/test_sampler.py                |  95 ++++------------
 tests/test_logits_processor.py                |  94 ++++++++++++++++
 vllm/lora/layers.py                           |  20 ++--
 vllm/lora/models.py                           |  13 ++-
 .../model_executor/layers/logits_processor.py | 106 ++++++++++++++++++
 vllm/model_executor/layers/sampler.py         |  81 +------------
 vllm/model_executor/models/baichuan.py        |  15 ++-
 vllm/model_executor/models/bloom.py           |  15 ++-
 vllm/model_executor/models/chatglm.py         |  15 ++-
 vllm/model_executor/models/deepseek.py        |  15 ++-
 vllm/model_executor/models/falcon.py          |  15 ++-
 vllm/model_executor/models/gemma.py           |  15 ++-
 vllm/model_executor/models/gpt2.py            |  14 ++-
 vllm/model_executor/models/gpt_bigcode.py     |  15 ++-
 vllm/model_executor/models/gpt_j.py           |  15 ++-
 vllm/model_executor/models/gpt_neox.py        |  15 ++-
 vllm/model_executor/models/internlm2.py       |  15 ++-
 vllm/model_executor/models/llama.py           |  18 ++-
 vllm/model_executor/models/mixtral.py         |  16 ++-
 vllm/model_executor/models/mixtral_quant.py   |  15 ++-
 vllm/model_executor/models/mpt.py             |  15 ++-
 vllm/model_executor/models/neuron/llama.py    |  15 ++-
 vllm/model_executor/models/neuron/mistral.py  |  15 ++-
 vllm/model_executor/models/olmo.py            |  15 ++-
 vllm/model_executor/models/opt.py             |  15 ++-
 vllm/model_executor/models/orion.py           |  15 ++-
 vllm/model_executor/models/phi.py             |  16 ++-
 vllm/model_executor/models/qwen.py            |  15 ++-
 vllm/model_executor/models/qwen2.py           |  24 ++--
 vllm/model_executor/models/stablelm.py        |  15 ++-
 vllm/model_executor/models/starcoder2.py      |  16 ++-
 vllm/worker/model_runner.py                   |   9 +-
 35 files changed, 577 insertions(+), 306 deletions(-)
 create mode 100644 tests/test_logits_processor.py
 create mode 100644 vllm/model_executor/layers/logits_processor.py

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 17f4c33670821..6d052d0f7f4a4 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -49,6 +49,9 @@ steps:
 - label: Samplers Test
   command: pytest -v -s samplers
 
+- label: LogitsProcessor Test
+  command: pytest -v -s test_logits_processor.py
+
 - label: Worker Test
   command: pytest -v -s worker
 
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
index 30a8ad03c8ada..38560c251696a 100644
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -13,6 +13,7 @@
 import vllm
 from vllm.config import LoRAConfig
 from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.model_loader import get_model
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                MergedColumnParallelLinear,
@@ -85,7 +86,8 @@ def dummy_model() -> nn.Module:
             ("outact", nn.Sigmoid()),
             # Special handling for lm_head & sampler
             ("lm_head", ParallelLMHead(512, 10)),
-            ("sampler", Sampler(512))
+            ("logits_processor", LogitsProcessor(512)),
+            ("sampler", Sampler())
         ]))
     model.config = MagicMock()
     return model
@@ -110,7 +112,8 @@ def dummy_model_gate_up() -> nn.Module:
             ("outact", nn.Sigmoid()),
             # Special handling for lm_head & sampler
             ("lm_head", ParallelLMHead(512, 10)),
-            ("sampler", Sampler(512))
+            ("logits_processor", LogitsProcessor(512)),
+            ("sampler", Sampler())
         ]))
     model.config = MagicMock()
     return model
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index 46f054c5b84ef..7dfc3952016f5 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -13,14 +13,14 @@
     QKVParallelLinearWithLora,
     VocabParallelEmbeddingWithLoRA,
     RowParallelLinearWithLoRA,
-    SamplerWithLoRA,
+    LogitsProcessorWithLoRA,
     LoRAMapping,
     BaseLayerWithLoRA,
 )
 from vllm.lora.models import (LoRALayerWeights, convert_mapping,
                               PackedLoRALayerWeights)
 from vllm.config import LoRAConfig
-from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.linear import (ColumnParallelLinear,
                                                MergedColumnParallelLinear,
                                                RowParallelLinear,
@@ -394,7 +394,7 @@ def create_random_embedding_layer():
 @torch.inference_mode()
 @pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
 @pytest.mark.parametrize("device", CUDA_DEVICES)
-def test_lm_head_sampler(dist_init, num_loras, device) -> None:
+def test_lm_head_logits_processor(dist_init, num_loras, device) -> None:
 
     torch.set_default_device(device)
     max_loras = 8
@@ -402,28 +402,29 @@ def test_lm_head_sampler(dist_init, num_loras, device) -> None:
                              max_lora_rank=8,
                              lora_dtype=torch.float16)
 
-    def create_random_sampler_layer():
+    def _pretest():
         linear = ParallelLMHead(32000 + lora_config.lora_extra_vocab_size,
                                 1024, 32000)
         linear.weight.data = torch.rand_like(linear.weight.data)
         linear.weight.data[:, 32000:] = 0
-        sampler = Sampler(32000 + lora_config.lora_extra_vocab_size, 32000)
-        lora_sampler = SamplerWithLoRA(sampler, 1024, linear.weight.dtype,
-                                       linear.weight.device)
-        lora_sampler.create_lora_weights(max_loras, lora_config)
+        logits_processor = LogitsProcessor(
+            32000 + lora_config.lora_extra_vocab_size, 32000)
+        lora_logits_processor = LogitsProcessorWithLoRA(
+            logits_processor, 1024, linear.weight.dtype, linear.weight.device)
+        lora_logits_processor.create_lora_weights(max_loras, lora_config)
 
-        return linear, sampler, lora_sampler
+        return linear, logits_processor, lora_logits_processor
 
     for i in range(10):
         set_random_seed(i)
 
         id_to_index = get_random_id_to_index(num_loras, max_loras)
-        linear, sampler, lora_sampler = create_random_sampler_layer()
+        linear, logits_processor, lora_logits_processor = _pretest()
 
         # NOTE: all the generated loras share the same embeddings tensor.
         lora_dict, _ = populate_loras(
             id_to_index,
-            layer=lora_sampler,
+            layer=lora_logits_processor,
             layer_weights=linear.weight,
             generate_embeddings_tensor=1024,
         )
@@ -447,34 +448,37 @@ def create_random_sampler_layer():
             32000,
             lora_config.lora_extra_vocab_size,
         )
-        lora_sampler.set_mapping(*mapping_info, )
+        lora_logits_processor.set_mapping(*mapping_info, )
 
-        lora_result = lora_sampler._get_logits(hidden_states=torch.cat(inputs),
-                                               embedding=linear.weight,
-                                               embedding_bias=None)
+        lora_result = lora_logits_processor._get_logits(
+            hidden_states=torch.cat(inputs),
+            embedding=linear.weight,
+            embedding_bias=None)
 
         original_weight = linear.weight.clone()
 
-        linear.weight[sampler.org_vocab_size:sampler.org_vocab_size +
+        linear.weight[logits_processor.
+                      org_vocab_size:logits_processor.org_vocab_size +
                       embeddings_tensor_len] = embeddings_tensor
 
-        sampler.org_vocab_size = 32000 + lora_config.lora_extra_vocab_size
+        logits_processor.org_vocab_size = (32000 +
+                                           lora_config.lora_extra_vocab_size)
         expected_results = []
         for input_, lora_id in zip(inputs, prompt_mapping):
             lora = lora_dict[lora_id]
-            result = sampler._get_logits(hidden_states=input_,
-                                         embedding=linear.weight,
-                                         embedding_bias=None)
+            result = logits_processor._get_logits(hidden_states=input_,
+                                                  embedding=linear.weight,
+                                                  embedding_bias=None)
             result[:, 32000 + embeddings_tensor_len:] = float("-inf")
             result += input_ @ lora.lora_a @ lora.lora_b * lora.scaling
             expected_results.append(result)
         expected_result = torch.cat(expected_results)
-        sampler.org_vocab_size = 32000
+        logits_processor.org_vocab_size = 32000
 
         # Check that resetting the lora weights succeeds
 
         for slot_idx in range(max_loras):
-            lora_sampler.reset_lora(slot_idx)
+            lora_logits_processor.reset_lora(slot_idx)
 
         inputs, index_mapping, prompt_mapping = create_random_inputs(
             active_lora_ids=[0],
@@ -488,14 +492,16 @@ def create_random_sampler_layer():
         mapping_info = convert_mapping(lora_mapping, id_to_index, max_loras,
                                        32000,
                                        lora_config.lora_extra_vocab_size)
-        lora_sampler.set_mapping(*mapping_info, )
-
-        lora_result = lora_sampler._get_logits(hidden_states=torch.cat(inputs),
-                                               embedding=original_weight,
-                                               embedding_bias=None)[:, :32000]
-        expected_result = sampler._get_logits(hidden_states=torch.cat(inputs),
-                                              embedding=original_weight,
-                                              embedding_bias=None)
+        lora_logits_processor.set_mapping(*mapping_info, )
+
+        lora_result = lora_logits_processor._get_logits(
+            hidden_states=torch.cat(inputs),
+            embedding=original_weight,
+            embedding_bias=None)[:, :32000]
+        expected_result = logits_processor._get_logits(
+            hidden_states=torch.cat(inputs),
+            embedding=original_weight,
+            embedding_bias=None)
 
         rtol, atol = TOLERANCES[lora_result.dtype]
         assert torch.allclose(lora_result,
diff --git a/tests/samplers/test_sampler.py b/tests/samplers/test_sampler.py
index b0c6e1c09eebc..92aec831d02e2 100644
--- a/tests/samplers/test_sampler.py
+++ b/tests/samplers/test_sampler.py
@@ -15,17 +15,12 @@
 
 class MockLogitsSampler(Sampler):
 
-    def __init__(self, vocab_size: int, fake_logits: torch.Tensor):
-        super().__init__(vocab_size=vocab_size)
+    def __init__(self, fake_logits: torch.Tensor):
+        super().__init__()
         self.fake_logits = fake_logits
 
     def forward(self, *args, **kwargs):
-        with patch(
-                "vllm.model_executor.layers.sampler._prune_hidden_states",
-                lambda x, y: x), patch(
-                    "vllm.model_executor.layers.sampler.Sampler._get_logits",
-                    lambda *args, **kwargs: self.fake_logits):
-            return super().forward(*args, **kwargs)
+        return super().forward(*args, **kwargs)
 
 
 def _prepare_test(
@@ -36,7 +31,7 @@ def _prepare_test(
     fake_logits = torch.full((batch_size, vocab_size),
                              1e-2,
                              dtype=input_tensor.dtype)
-    sampler = MockLogitsSampler(32000, fake_logits)
+    sampler = MockLogitsSampler(fake_logits)
     model_runner = ModelRunner(None, None, None, None, None)
     return input_tensor, fake_logits, sampler, model_runner
 
@@ -70,9 +65,7 @@ def _do_sample(
     sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
                                                      prompt_lens,
                                                      subquery_lens=prompt_lens)
-    return sampler(embedding=None,
-                   hidden_states=input_tensor,
-                   sampling_metadata=sampling_metadata)
+    return sampler(logits=input_tensor, sampling_metadata=sampling_metadata)
 
 
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
@@ -85,8 +78,8 @@ def test_sampler_all_greedy(seed: int, device: str):
         batch_size)
 
     sampling_params = SamplingParams(temperature=0)
-    sampler_output = _do_sample(batch_size, input_tensor, sampler,
-                                model_runner, sampling_params)
+    sampler_output = _do_sample(batch_size, fake_logits, sampler, model_runner,
+                                sampling_params)
     expected = torch.argmax(fake_logits, dim=-1)
     for i, sequence_output in enumerate(sampler_output):
         for nth_output in sequence_output.samples:
@@ -111,8 +104,8 @@ def test_sampler_all_random(seed: int, device: str):
         temperature=1.0,
         n=random.randint(1, 10),
     )
-    sampler_output = _do_sample(batch_size, input_tensor, sampler,
-                                model_runner, sampling_params)
+    sampler_output = _do_sample(batch_size, fake_logits, sampler, model_runner,
+                                sampling_params)
 
     for i, sequence_output in enumerate(sampler_output):
         for nth_output in sequence_output.samples:
@@ -127,8 +120,7 @@ def test_sampler_all_random_seed(seed: int, device: str):
     set_random_seed(seed)
     torch.set_default_device(device)
     batch_size = random.randint(1, 256)
-    input_tensor, fake_logits, sampler, model_runner = _prepare_test(
-        batch_size)
+    _, fake_logits, sampler, model_runner = _prepare_test(batch_size)
 
     for i in range(batch_size):
         fake_logits[i, i] = 1e2
@@ -138,8 +130,8 @@ def test_sampler_all_random_seed(seed: int, device: str):
         n=random.randint(1, 10),
         seed=random.randint(0, 10000),
     )
-    sampler_output = _do_sample(batch_size, input_tensor, sampler,
-                                model_runner, sampling_params)
+    sampler_output = _do_sample(batch_size, fake_logits, sampler, model_runner,
+                                sampling_params)
 
     for i, sequence_output in enumerate(sampler_output):
         for nth_output in sequence_output.samples:
@@ -154,18 +146,17 @@ def test_sampler_all_random_seed_deterministic(seed: int, device: str):
     set_random_seed(seed)
     torch.set_default_device(device)
     batch_size = random.randint(1, 256)
-    input_tensor, fake_logits, sampler, model_runner = _prepare_test(
-        batch_size)
+    _, fake_logits, sampler, model_runner = _prepare_test(batch_size)
 
     sampling_params = SamplingParams(
         temperature=1.0,
         n=random.randint(1, 10),
         seed=random.randint(0, 10000),
     )
-    first_sampler_output = _do_sample(batch_size, input_tensor, sampler,
+    first_sampler_output = _do_sample(batch_size, fake_logits, sampler,
                                       model_runner, sampling_params)
 
-    second_sampler_output = _do_sample(batch_size, input_tensor, sampler,
+    second_sampler_output = _do_sample(batch_size, fake_logits, sampler,
                                        model_runner, sampling_params)
 
     assert first_sampler_output == second_sampler_output
@@ -179,15 +170,14 @@ def test_sampler_all_beam(seed: int, device: str):
     set_random_seed(seed)
     torch.set_default_device(device)
     batch_size = random.randint(1, 256)
-    input_tensor, _, sampler, model_runner = _prepare_test(batch_size)
+    _, fake_logits, sampler, model_runner = _prepare_test(batch_size)
 
     sampling_params = SamplingParams(
         temperature=0,
         best_of=2,
         use_beam_search=True,
     )
-    _do_sample(batch_size, input_tensor, sampler, model_runner,
-               sampling_params)
+    _do_sample(batch_size, fake_logits, sampler, model_runner, sampling_params)
     # no assertion here as I am not sure how to determine whether
     # the outputs are expected - in other words, this just tests
     # whether there are no exceptions in the sampler
@@ -246,8 +236,7 @@ def test_sampler_mixed(seed: int, device: str):
     def test_sampling(model_runner: ModelRunner):
         sampling_metadata = model_runner._prepare_sample(
             seq_group_metadata_list, prompt_lens, subquery_lens=prompt_lens)
-        sampler_output = sampler(embedding=None,
-                                 hidden_states=input_tensor,
+        sampler_output = sampler(logits=fake_logits,
                                  sampling_metadata=sampling_metadata)
 
         for i, (sequence_output, metadata) in enumerate(
@@ -294,48 +283,6 @@ def test_sampling(model_runner: ModelRunner):
     del model_runner
 
 
-@pytest.mark.parametrize("seed", RANDOM_SEEDS)
-@pytest.mark.parametrize("device", CUDA_DEVICES)
-def test_sampler_logits_processors(seed: int, device: str):
-    set_random_seed(seed)
-    torch.set_default_device(device)
-    batch_size = random.randint(1, 256)
-    input_tensor, _, sampler, model_runner = _prepare_test(batch_size)
-
-    # This sample logits processor gives maximum score to the i-th token,
-    # where i is the length of the input sequence.
-    # We therefore expect the output token sequence to be [0, 1, 2, ...]
-    def pick_ith(token_ids, logits):
-        logits[len(token_ids)] = torch.finfo(logits.dtype).max
-        return logits
-
-    seq_group_metadata_list = []
-    prompt_lens = []
-    for i in range(batch_size):
-        seq_group_metadata_list.append(
-            SequenceGroupMetadata(
-                request_id=f"test_{i}",
-                is_prompt=True,
-                seq_data={0: SequenceData([1, 2, 3])},
-                sampling_params=SamplingParams(temperature=0,
-                                               logits_processors=[pick_ith]),
-                block_tables={0: [1]},
-            ))
-        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
-
-    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
-                                                     prompt_lens,
-                                                     subquery_lens=prompt_lens)
-    sampler_output = sampler(embedding=None,
-                             hidden_states=input_tensor,
-                             sampling_metadata=sampling_metadata)
-    for _, sequence_output in enumerate(sampler_output):
-        for idx, nth_output in enumerate(sequence_output.samples):
-            assert nth_output.output_token == idx
-
-    del model_runner
-
-
 @pytest.mark.parametrize("seed", RANDOM_SEEDS)
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_sampler_top_k_top_p(seed: int, device: str):
@@ -352,7 +299,7 @@ def test_sampler_top_k_top_p(seed: int, device: str):
                                size=(batch_size, vocab_size),
                                device=input_tensor.device,
                                dtype=input_tensor.dtype)
-    sampler = MockLogitsSampler(32000, fake_logits)
+    sampler = MockLogitsSampler(fake_logits)
     model_runner = ModelRunner(None, None, None, None, None)
 
     generation_model = GenerationMixin()
@@ -391,9 +338,7 @@ def mock_sample(probs, *args, **kwargs):
         return [[prob.topk(1, dim=-1).indices.tolist(), [0]] for prob in probs]
 
     with patch("vllm.model_executor.layers.sampler._sample", mock_sample):
-        sampler(embedding=None,
-                hidden_states=input_tensor,
-                sampling_metadata=sampling_metadata)
+        sampler(logits=fake_logits, sampling_metadata=sampling_metadata)
     hf_probs = warpers(torch.zeros_like(fake_logits), fake_logits.clone())
     hf_probs = torch.softmax(hf_probs, dim=-1, dtype=torch.float)
     assert torch.allclose(hf_probs, sample_probs, atol=1e-5)
diff --git a/tests/test_logits_processor.py b/tests/test_logits_processor.py
new file mode 100644
index 0000000000000..fe321520114f7
--- /dev/null
+++ b/tests/test_logits_processor.py
@@ -0,0 +1,94 @@
+import random
+from typing import Tuple
+from unittest.mock import patch
+
+import pytest
+import torch
+
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.utils import set_random_seed
+from vllm.sequence import SamplingParams, SequenceData, SequenceGroupMetadata
+from vllm.worker.model_runner import ModelRunner
+
+
+class MockLogitsProcessor(LogitsProcessor):
+
+    def __init__(self, vocab_size: int, scale: float,
+                 fake_logits: torch.Tensor):
+        super().__init__(vocab_size=vocab_size, scale=scale)
+        self.fake_logits = fake_logits.clone()
+
+    def forward(self, *args, **kwargs):
+        with patch(
+                "vllm.model_executor.layers.logits_processor._prune_hidden_states",
+                lambda x, y: x
+        ), patch(
+                "vllm.model_executor.layers.logits_processor.LogitsProcessor._get_logits",
+                lambda *args, **kwargs: self.fake_logits):
+            return super().forward(*args, **kwargs)
+
+
+def _prepare_test(
+    batch_size: int
+) -> Tuple[torch.Tensor, torch.Tensor, MockLogitsProcessor, ModelRunner]:
+    vocab_size = 32000
+    input_tensor = torch.rand((batch_size, 1024), dtype=torch.float16)
+    fake_logits = torch.full((batch_size, vocab_size),
+                             1e-2,
+                             dtype=input_tensor.dtype)
+    logits_processor = MockLogitsProcessor(32000, 0.5, fake_logits)
+    model_runner = ModelRunner(None, None, None, None, None)
+    return input_tensor, fake_logits, logits_processor, model_runner
+
+
+RANDOM_SEEDS = list(range(128))
+CUDA_DEVICES = [
+    f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2)
+]
+
+
+@pytest.mark.parametrize("seed", RANDOM_SEEDS)
+@pytest.mark.parametrize("device", CUDA_DEVICES)
+def test_logits_processors(seed: int, device: str):
+    set_random_seed(seed)
+    torch.set_default_device(device)
+    batch_size = random.randint(1, 256)
+    input_tensor, fake_logits, logits_processor, model_runner = _prepare_test(
+        batch_size)
+
+    # This sample logits processor gives infinite score to the i-th token,
+    # where i is the length of the input sequence.
+    # We therefore expect the output token sequence to be [0, 1, 2, ...]
+    def pick_ith(token_ids, logits):
+        logits[len(token_ids)] = float("inf")
+        return logits
+
+    seq_group_metadata_list = []
+    prompt_lens = []
+    for i in range(batch_size):
+        seq_group_metadata_list.append(
+            SequenceGroupMetadata(
+                request_id=f"test_{i}",
+                is_prompt=True,
+                seq_data={0: SequenceData([1, 2, 3])},
+                sampling_params=SamplingParams(temperature=0,
+                                               logits_processors=[pick_ith]),
+                block_tables={0: [1]},
+            ))
+        prompt_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len())
+
+    sampling_metadata = model_runner._prepare_sample(seq_group_metadata_list,
+                                                     prompt_lens,
+                                                     subquery_lens=prompt_lens)
+    logits_processor_output = logits_processor(
+        embedding=None,
+        hidden_states=input_tensor,
+        sampling_metadata=sampling_metadata)
+
+    assert torch.isinf(logits_processor_output[:, 0]).all()
+
+    fake_logits *= logits_processor.scale
+    assert torch.allclose(logits_processor_output[:, 1], fake_logits[:, 1],
+                          1e-4)
+
+    del model_runner
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index 99e6cdeee6364..f6cd1390d4bce 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -10,7 +10,6 @@
 
 from vllm.config import LoRAConfig
 from vllm.lora.punica import add_lora, add_lora_slice, bgmv
-from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.parallel_utils.communication_op import (
     tensor_model_parallel_all_gather,
     tensor_model_parallel_all_reduce,
@@ -20,6 +19,7 @@
                                                RowParallelLinear,
                                                QKVParallelLinear,
                                                MergedColumnParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
 from vllm.model_executor.parallel_utils.parallel_state import (
@@ -783,11 +783,11 @@ def weight(self):
         return self.base_layer.weight
 
 
-class SamplerWithLoRA(BaseLayerWithLoRA):
+class LogitsProcessorWithLoRA(BaseLayerWithLoRA):
 
     def __init__(
         self,
-        base_layer: Sampler,
+        base_layer: LogitsProcessor,
         hidden_size: int,
         dtype: torch.dtype,
         device: torch.device,
@@ -806,6 +806,10 @@ def logits_as_hidden_states(self):
     def vocab_size(self):
         return self.base_layer.vocab_size
 
+    @property
+    def scale(self):
+        return self.base_layer.scale
+
     @property
     def org_vocab_size(self):
         return self.base_layer.org_vocab_size
@@ -968,14 +972,14 @@ def from_layer(
     return layer
 
 
-def from_layer_sampler(
-    layer: Sampler,
+def from_layer_logits_processor(
+    layer: LogitsProcessor,
     lm_head: ParallelLMHead,
     max_loras: int,
     lora_config: LoRAConfig,
     model_config: Optional[PretrainedConfig] = None,
-) -> SamplerWithLoRA:
-    ret = SamplerWithLoRA(layer, lm_head.embedding_dim, lm_head.weight.dtype,
-                          lm_head.weight.device)
+) -> LogitsProcessorWithLoRA:
+    ret = LogitsProcessorWithLoRA(layer, lm_head.embedding_dim,
+                                  lm_head.weight.dtype, lm_head.weight.device)
     ret.create_lora_weights(max_loras, lora_config, model_config)
     return ret
diff --git a/vllm/lora/models.py b/vllm/lora/models.py
index 6fe07b69b3203..d1bac7617e1d4 100644
--- a/vllm/lora/models.py
+++ b/vllm/lora/models.py
@@ -14,7 +14,7 @@
 from vllm.utils import LRUCache, in_wsl
 
 from vllm.lora.layers import (BaseLayerWithLoRA, LoRAMapping, from_layer,
-                              from_layer_sampler)
+                              from_layer_logits_processor)
 from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.utils import parse_fine_tuned_lora_name, replace_submodule
 
@@ -421,11 +421,14 @@ def _create_lora_modules(self):
                            self.model.config))
             # (yard1): TODO make this more robust
             if "lm_head" in module_name:
-                sampler_module = self.model.get_submodule("sampler")
+                logits_processor_module = self.model.get_submodule(
+                    "logits_processor")
                 new_module = replace_submodule(
-                    self.model, "sampler",
-                    from_layer_sampler(sampler_module, module, self.lora_slots,
-                                       self.lora_config, self.model.config))
+                    self.model, "logits_processor",
+                    from_layer_logits_processor(logits_processor_module,
+                                                module, self.lora_slots,
+                                                self.lora_config,
+                                                self.model.config))
             self.register_module(module_name, new_module)
             self._register_packed_modules(module_name)
             new_module.set_mapping(self.base_indices, self.sampler_indices,
diff --git a/vllm/model_executor/layers/logits_processor.py b/vllm/model_executor/layers/logits_processor.py
new file mode 100644
index 0000000000000..baa113c342c28
--- /dev/null
+++ b/vllm/model_executor/layers/logits_processor.py
@@ -0,0 +1,106 @@
+"""A layer that compute logits from hidden_stats."""
+from typing import Optional
+
+import torch
+import torch.nn as nn
+
+from vllm.utils import is_neuron
+
+from vllm.model_executor.parallel_utils.communication_op import (
+    tensor_model_parallel_gather)
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+
+
+class LogitsProcessor(nn.Module):
+    """Process logits and apply logits processors from sampling metadata.
+
+    This layer does the following:
+    1. Gather logits from model hidden_states.
+    2. Scale logits if needed.
+    3. Apply logits processors (if any).
+    """
+
+    def __init__(self,
+                 vocab_size: int,
+                 org_vocab_size: Optional[int] = None,
+                 scale: Optional[float] = 1.0) -> None:
+        """
+        Args:
+            scale: A scaling factor to apply to the logits.
+        """
+        super().__init__()
+        self.scale = scale
+        self.vocab_size = vocab_size
+        # Transformers-neuronx generate outputs as logits directly.
+        self.logits_as_hidden_states = is_neuron()
+        # original vocabulary size (without LoRA).
+        self.org_vocab_size = org_vocab_size or vocab_size
+
+    def forward(
+        self,
+        embedding: torch.Tensor,
+        hidden_states: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+        embedding_bias: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        if self.logits_as_hidden_states:
+            logits = hidden_states
+        else:
+            hidden_states = _prune_hidden_states(hidden_states,
+                                                 sampling_metadata)
+
+            # Get the logits for the next tokens.
+            logits = self._get_logits(hidden_states, embedding, embedding_bias)
+
+        if logits is not None:
+            logits *= self.scale
+
+            # Apply logits processors (if any).
+            logits = _apply_logits_processors(logits, sampling_metadata)
+
+        return logits
+
+    def _get_logits(self, hidden_states: torch.Tensor, embedding: torch.Tensor,
+                    embedding_bias: Optional[torch.Tensor]) -> torch.Tensor:
+        # Get the logits for the next tokens.
+        logits = torch.matmul(hidden_states, embedding.t())
+        if embedding_bias is not None:
+            logits += embedding_bias
+        logits = tensor_model_parallel_gather(logits)
+        # Remove paddings in vocab (if any).
+        if logits is not None:
+            logits = logits[:, :self.org_vocab_size]
+        return logits
+
+
+def _prune_hidden_states(
+    hidden_states: torch.Tensor,
+    sampling_metadata: SamplingMetadata,
+) -> torch.Tensor:
+    hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
+    return hidden_states.index_select(0,
+                                      sampling_metadata.selected_token_indices)
+
+
+def _apply_logits_processors(
+    logits: torch.Tensor,
+    sampling_metadata: SamplingMetadata,
+) -> torch.Tensor:
+    logits_row_idx = 0
+    found_logits_processors = False
+    for seq_ids, sampling_params in sampling_metadata.seq_groups:
+        logits_processors = sampling_params.logits_processors
+        if logits_processors:
+            found_logits_processors = True
+            for seq_id in seq_ids:
+                logits_row = logits[logits_row_idx]
+                token_ids = sampling_metadata.seq_data[seq_id].output_token_ids
+                for logits_processor in logits_processors:
+                    logits_row = logits_processor(token_ids, logits_row)
+                logits[logits_row_idx] = logits_row
+                logits_row_idx += 1
+        else:
+            logits_row_idx += len(seq_ids)
+    if found_logits_processors:
+        assert logits_row_idx == logits.shape[0]
+    return logits
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index ac8336ca0f9ad..63e494586efb5 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -4,8 +4,6 @@
 import torch
 import torch.nn as nn
 
-from vllm.model_executor.parallel_utils.communication_op import (
-    tensor_model_parallel_gather)
 from vllm.model_executor.sampling_metadata import (SamplingMetadata,
                                                    SamplingTensors)
 from vllm.sampling_params import SamplingParams, SamplingType
@@ -13,7 +11,6 @@
                            SamplerOutput, SequenceData, SequenceGroupOutput,
                            SequenceOutput)
 from vllm.model_executor.layers.ops.sample import (sample as sample_triton)
-from vllm.utils import is_neuron
 
 
 class Sampler(nn.Module):
@@ -31,58 +28,14 @@ class Sampler(nn.Module):
     parameters (e.g., sampling method, temperature, top-p, top-k, etc.).
     """
 
-    def __init__(self,
-                 vocab_size: int,
-                 org_vocab_size: Optional[int] = None) -> None:
-        super().__init__()
-        self.vocab_size = vocab_size
-        # Transformers-neuronx generate outputs as logits directly.
-        self.logits_as_hidden_states = is_neuron()
-        # original vocabulary size (without LoRA).
-        self.org_vocab_size = org_vocab_size or vocab_size
-
-    def _get_logits(self, hidden_states: torch.Tensor, embedding: torch.Tensor,
-                    embedding_bias: Optional[torch.Tensor]) -> torch.Tensor:
-        # Get the logits for the next tokens.
-        logits = torch.matmul(hidden_states, embedding.t())
-        if embedding_bias is not None:
-            logits += embedding_bias
-        logits = tensor_model_parallel_gather(logits)
-        # Remove paddings in vocab (if any).
-        if logits is not None:
-            logits = logits[:, :self.org_vocab_size]
-        return logits
-
     def forward(
         self,
-        embedding: torch.Tensor,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
-        embedding_bias: Optional[torch.Tensor] = None,
     ) -> Optional[SamplerOutput]:
-        # Get the hidden states that we use for sampling.
-        if self.logits_as_hidden_states:
-            logits = hidden_states
-        else:
-            hidden_states = _prune_hidden_states(hidden_states,
-                                                 sampling_metadata)
-
-            # Get the logits for the next tokens.
-            logits = self._get_logits(hidden_states, embedding, embedding_bias)
-
-        # Only perform sampling in the driver worker.
-        # Note: `_get_logits` is still distributed across TP workers because
-        # the `embedding` weight is distributed across TP workers.
-        # TODO(zhuohan): Change the get_logits part to a separate stage.
-        if not sampling_metadata.perform_sampling:
-            return None
-
         assert logits is not None
         _, vocab_size = logits.shape
 
-        # Apply logits processors (if any).
-        logits = _apply_logits_processors(logits, sampling_metadata)
-
         # Prepare sampling tensors with pinned memory to avoid blocking.
         (sampling_tensors, do_penalties, do_top_p_top_k,
          do_min_p) = SamplingTensors.from_sampling_metadata(
@@ -124,14 +77,6 @@ def forward(
                                      prompt_logprobs, sample_logprobs)
 
 
-def _prune_hidden_states(
-    hidden_states: torch.Tensor,
-    sampling_metadata: SamplingMetadata,
-) -> torch.Tensor:
-    return hidden_states.index_select(0,
-                                      sampling_metadata.selected_token_indices)
-
-
 def _get_bin_counts_and_mask(
     tokens: torch.Tensor,
     vocab_size: int,
@@ -149,30 +94,6 @@ def _get_bin_counts_and_mask(
     return bin_counts, mask
 
 
-def _apply_logits_processors(
-    logits: torch.Tensor,
-    sampling_metadata: SamplingMetadata,
-) -> torch.Tensor:
-    logits_row_idx = 0
-    found_logits_processors = False
-    for seq_ids, sampling_params in sampling_metadata.seq_groups:
-        logits_processors = sampling_params.logits_processors
-        if logits_processors:
-            found_logits_processors = True
-            for seq_id in seq_ids:
-                logits_row = logits[logits_row_idx]
-                token_ids = sampling_metadata.seq_data[seq_id].output_token_ids
-                for logits_processor in logits_processors:
-                    logits_row = logits_processor(token_ids, logits_row)
-                logits[logits_row_idx] = logits_row
-                logits_row_idx += 1
-        else:
-            logits_row_idx += len(seq_ids)
-    if found_logits_processors:
-        assert logits_row_idx == logits.shape[0]
-    return logits
-
-
 def _apply_penalties(logits: torch.Tensor, prompt_tokens_tensor: torch.Tensor,
                      output_tokens_tensor: torch.Tensor,
                      presence_penalties: torch.Tensor,
diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py
index cbf472750e294..968b9ebba87b2 100644
--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
@@ -34,6 +34,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -295,7 +296,8 @@ def __init__(self,
         self.linear_method = linear_method
         self.model = BaiChuanModel(config, position_embedding, linear_method)
         self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -308,13 +310,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py
index 0548b2b140b1b..851c475206661 100644
--- a/vllm/model_executor/models/bloom.py
+++ b/vllm/model_executor/models/bloom.py
@@ -30,6 +30,7 @@
                                                LinearMethodBase,
                                                QKVParallelLinear,
                                                RowParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
@@ -273,7 +274,8 @@ def __init__(
         self.linear_method = linear_method
         self.transformer = BloomModel(config, linear_method)
         self.lm_head_weight = self.transformer.word_embeddings.weight
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -286,13 +288,18 @@ def forward(
                                          input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py
index 1c5dcfacaff2b..15e7de03b61f1 100644
--- a/vllm/model_executor/models/chatglm.py
+++ b/vllm/model_executor/models/chatglm.py
@@ -17,6 +17,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -332,7 +333,8 @@ def __init__(
         self.linear_method = linear_method
         self.transformer = ChatGLMModel(config, linear_method)
         self.lm_head_weight = self.transformer.output_layer.weight
-        self.sampler = Sampler(config.padded_vocab_size)
+        self.logits_processor = LogitsProcessor(config.padded_vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -345,13 +347,18 @@ def forward(
                                          input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py
index 13c080cb02774..eff93e706f5dc 100644
--- a/vllm/model_executor/models/deepseek.py
+++ b/vllm/model_executor/models/deepseek.py
@@ -38,6 +38,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -372,7 +373,8 @@ def __init__(
         self.linear_method = linear_method
         self.model = DeepseekModel(config, linear_method)
         self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -385,13 +387,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: Optional[torch.Tensor],
+        logits: Optional[torch.Tensor],
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py
index 3c148be5b10f4..7626dbe62293f 100644
--- a/vllm/model_executor/models/falcon.py
+++ b/vllm/model_executor/models/falcon.py
@@ -34,6 +34,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -373,7 +374,8 @@ def __init__(
             config.vocab_size,
             config.hidden_size,
         )
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -390,13 +392,18 @@ def forward(
         )
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
index 386a36cf492d6..fd3dbe798cd8e 100644
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -30,6 +30,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
@@ -281,7 +282,8 @@ def __init__(
         self.config = config
         self.linear_method = linear_method
         self.model = GemmaModel(config, linear_method)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     @torch.no_grad()
     def forward(
@@ -295,13 +297,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.model.embed_tokens.weight,
+                                       hidden_states, sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.model.embed_tokens.weight,
-                                   hidden_states, sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py
index 3f7b21e5a4133..263727cac19ff 100644
--- a/vllm/model_executor/models/gpt2.py
+++ b/vllm/model_executor/models/gpt2.py
@@ -30,6 +30,7 @@
                                                LinearMethodBase,
                                                QKVParallelLinear,
                                                RowParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
@@ -216,7 +217,8 @@ def __init__(
         self.linear_method = linear_method
         self.transformer = GPT2Model(config, linear_method)
         self.lm_head_weight = self.transformer.wte.weight
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -229,12 +231,18 @@ def forward(
                                          input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
+        next_tokens = self.sampler(self.lm_head_weight, logits,
                                    sampling_metadata)
         return next_tokens
 
diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py
index 5c30d47d93e36..65caabae60daa 100644
--- a/vllm/model_executor/models/gpt_bigcode.py
+++ b/vllm/model_executor/models/gpt_bigcode.py
@@ -31,6 +31,7 @@
                                                LinearMethodBase,
                                                QKVParallelLinear,
                                                RowParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
@@ -237,7 +238,8 @@ def __init__(
         self.linear_method = linear_method
         self.transformer = GPTBigCodeModel(config, linear_method)
         self.lm_head_weight = self.transformer.wte.weight
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -250,13 +252,18 @@ def forward(
                                          input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py
index 93dce7b67a7a5..c956a12f3e46e 100644
--- a/vllm/model_executor/models/gpt_j.py
+++ b/vllm/model_executor/models/gpt_j.py
@@ -30,6 +30,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -224,7 +225,8 @@ def __init__(
             config.n_embd,
             bias=True,
         )
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -237,13 +239,18 @@ def forward(
                                          input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata, self.lm_head.bias)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata, self.lm_head.bias)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py
index 98107350e60b9..db2173936e7d9 100644
--- a/vllm/model_executor/models/gpt_neox.py
+++ b/vllm/model_executor/models/gpt_neox.py
@@ -30,6 +30,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -238,7 +239,8 @@ def __init__(
             config.vocab_size,
             config.hidden_size,
         )
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -251,13 +253,18 @@ def forward(
                                       input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.embed_out.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.embed_out.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py
index 7b2215ef4bda5..93026fc01f0f0 100644
--- a/vllm/model_executor/models/internlm2.py
+++ b/vllm/model_executor/models/internlm2.py
@@ -14,6 +14,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -250,7 +251,8 @@ def __init__(
         self.linear_method = linear_method
         self.model = InternLM2Model(config, linear_method)
         self.output = ParallelLMHead(config.vocab_size, config.hidden_size)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -263,13 +265,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.output.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.output.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
index 4c163dfdab537..757b75129845c 100644
--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -37,6 +37,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead, DEFAULT_VOCAB_PADDING_SIZE)
@@ -325,7 +326,11 @@ def __init__(
             # compatibility
             if not lora_config else lora_config.lora_vocab_padding_size,
         )
-        self.sampler = Sampler(self.unpadded_vocab_size, config.vocab_size)
+
+        logit_scale = getattr(config, "logit_scale", 1.0)
+        self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
+                                                config.vocab_size, logit_scale)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -338,13 +343,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py
index d47834e519697..68a3a298444ae 100644
--- a/vllm/model_executor/models/mixtral.py
+++ b/vllm/model_executor/models/mixtral.py
@@ -37,6 +37,7 @@
                                                ReplicatedLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead, DEFAULT_VOCAB_PADDING_SIZE)
@@ -369,7 +370,9 @@ def __init__(
             # compatibility
             if not lora_config else lora_config.lora_vocab_padding_size,
         )
-        self.sampler = Sampler(self.unpadded_vocab_size, config.vocab_size)
+        self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
+                                                config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -382,13 +385,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: Optional[torch.Tensor],
+        logits: Optional[torch.Tensor],
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/mixtral_quant.py b/vllm/model_executor/models/mixtral_quant.py
index 25c7f1978c0dc..b4dfc439d50e9 100644
--- a/vllm/model_executor/models/mixtral_quant.py
+++ b/vllm/model_executor/models/mixtral_quant.py
@@ -39,6 +39,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -344,7 +345,8 @@ def __init__(
         self.linear_method = linear_method
         self.model = MixtralModel(config, linear_method)
         self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -357,13 +359,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: Optional[torch.Tensor],
+        logits: Optional[torch.Tensor],
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py
index 16ecac3d0529a..7a2568817858c 100644
--- a/vllm/model_executor/models/mpt.py
+++ b/vllm/model_executor/models/mpt.py
@@ -13,6 +13,7 @@
                                                LinearMethodBase,
                                                QKVParallelLinear,
                                                RowParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
@@ -259,7 +260,8 @@ def __init__(
 
         self.transformer = MPTModel(config, linear_method)
         self.lm_head_weight = self.transformer.wte.weight
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -272,13 +274,18 @@ def forward(
                                          input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/neuron/llama.py b/vllm/model_executor/models/neuron/llama.py
index e2856da99d9b1..32c43c4944fac 100644
--- a/vllm/model_executor/models/neuron/llama.py
+++ b/vllm/model_executor/models/neuron/llama.py
@@ -7,6 +7,7 @@
 from transformers import LlamaConfig
 
 from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import SamplerOutput
@@ -25,7 +26,8 @@ def __init__(
         self.config = config
         self.linear_method = linear_method
         self.model = None
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -45,13 +47,18 @@ def forward(
                                 start_ids=seq_ids.flatten())
         return logits
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.model.chkpt_model.lm_head,
+                                       hidden_states, sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.model.chkpt_model.lm_head,
-                                   hidden_states, sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/neuron/mistral.py b/vllm/model_executor/models/neuron/mistral.py
index a302cce30abab..24fc0fa0aacab 100755
--- a/vllm/model_executor/models/neuron/mistral.py
+++ b/vllm/model_executor/models/neuron/mistral.py
@@ -6,6 +6,7 @@
 from transformers import MistralConfig
 
 from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import SamplerOutput
@@ -26,7 +27,8 @@ def __init__(
         self.linear_method = linear_method
         self.model = None
         self.lm_head = None
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -48,13 +50,18 @@ def forward(
                                 start_ids=seq_ids)
         return logits
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.model.chkpt_model.lm_head,
+                                       hidden_states, sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.model.chkpt_model.lm_head,
-                                   hidden_states, sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py
index 2b0a420e82faf..19f2be6da8ed3 100644
--- a/vllm/model_executor/models/olmo.py
+++ b/vllm/model_executor/models/olmo.py
@@ -51,6 +51,7 @@
     RowParallelLinear,
 )
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
@@ -336,7 +337,8 @@ def __init__(self,
         self.lm_head_weight = (self.model.transformer.wte.weight
                                if config.weight_tying else
                                self.model.transformer.ff_out.weight)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -353,13 +355,18 @@ def forward(
         )
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(
diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py
index 782f43ce265bd..a12f63b58f52b 100644
--- a/vllm/model_executor/models/opt.py
+++ b/vllm/model_executor/models/opt.py
@@ -31,6 +31,7 @@
                                                QKVParallelLinear,
                                                ReplicatedLinear,
                                                RowParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding)
@@ -292,7 +293,8 @@ def __init__(
         self.linear_method = linear_method
         self.model = OPTModel(config, linear_method)
         self.lm_head_weight = self.model.decoder.embed_tokens.weight
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -305,13 +307,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py
index 6039b1cdc3534..86428e320e0f7 100644
--- a/vllm/model_executor/models/orion.py
+++ b/vllm/model_executor/models/orion.py
@@ -18,6 +18,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -256,7 +257,8 @@ def __init__(
         self.linear_method = linear_method
         self.model = OrionModel(config, linear_method)
         self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -269,13 +271,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/phi.py b/vllm/model_executor/models/phi.py
index 039dc7a9b7675..ef70c823dc905 100644
--- a/vllm/model_executor/models/phi.py
+++ b/vllm/model_executor/models/phi.py
@@ -49,6 +49,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -240,7 +241,8 @@ def __init__(self,
         self.lm_head = ParallelLMHead(config.vocab_size,
                                       config.hidden_size,
                                       bias=True)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -254,14 +256,18 @@ def forward(
 
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata, self.lm_head.bias)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        head = self.lm_head
-        next_tokens = self.sampler(head.weight, hidden_states,
-                                   sampling_metadata, head.bias)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py
index d4d5a4e8bb9a5..61ac2c6c605c6 100644
--- a/vllm/model_executor/models/qwen.py
+++ b/vllm/model_executor/models/qwen.py
@@ -19,6 +19,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -230,7 +231,8 @@ def __init__(
         self.linear_method = linear_method
         self.transformer = QWenModel(config, linear_method)
         self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -243,13 +245,18 @@ def forward(
                                          input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 12e0feddcb7f1..6698f01b7c701 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -37,6 +37,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -300,11 +301,15 @@ def __init__(
         self.linear_method = linear_method
         self.model = Qwen2Model(config, linear_method)
 
-        if not config.tie_word_embeddings:
+        if config.tie_word_embeddings:
+            self.lm_head_weight = self.model.embed_tokens.weight
+        else:
             self.lm_head = ParallelLMHead(config.vocab_size,
                                           config.hidden_size)
+            self.lm_head_weight = self.lm_head.weight
 
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -317,17 +322,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        if self.config.tie_word_embeddings:
-            lm_head_weight = self.model.embed_tokens.weight
-        else:
-            lm_head_weight = self.lm_head.weight
-        next_tokens = self.sampler(lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py
index c66f327beee7a..7624ca89ee670 100644
--- a/vllm/model_executor/models/stablelm.py
+++ b/vllm/model_executor/models/stablelm.py
@@ -33,6 +33,7 @@
                                                QKVParallelLinear,
                                                RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead)
@@ -238,7 +239,8 @@ def __init__(
         self.linear_method = linear_method
         self.model = StableLMEpochModel(config, linear_method)
         self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size)
-        self.sampler = Sampler(config.vocab_size)
+        self.logits_processor = LogitsProcessor(config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -251,13 +253,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: torch.Tensor,
+        logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head.weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py
index cfbb1bdb7909e..e418951a633ab 100644
--- a/vllm/model_executor/models/starcoder2.py
+++ b/vllm/model_executor/models/starcoder2.py
@@ -32,6 +32,7 @@
                                                LinearMethodBase,
                                                QKVParallelLinear,
                                                RowParallelLinear)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
     VocabParallelEmbedding, ParallelLMHead, DEFAULT_VOCAB_PADDING_SIZE)
@@ -254,7 +255,9 @@ def __init__(self,
                 padding_size=DEFAULT_VOCAB_PADDING_SIZE,
             )
             self.lm_head_weight = self.lm_head.weight
-        self.sampler = Sampler(self.unpadded_vocab_size, config.vocab_size)
+        self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
+                                                config.vocab_size)
+        self.sampler = Sampler()
 
     def forward(
         self,
@@ -267,13 +270,18 @@ def forward(
                                    input_metadata)
         return hidden_states
 
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
     def sample(
         self,
-        hidden_states: Optional[torch.Tensor],
+        logits: Optional[torch.Tensor],
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, hidden_states,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index cfccbbb20adc5..347b9380f1113 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -613,9 +613,16 @@ def execute_model(
             input_metadata=input_metadata,
         )
 
+        # Compute the logits.
+        logits = self.model.compute_logits(hidden_states, sampling_metadata)
+
+        # Only perform sampling in the driver worker.
+        if not sampling_metadata.perform_sampling:
+            return None
+
         # Sample the next token.
         output = self.model.sample(
-            hidden_states=hidden_states,
+            logits=logits,
             sampling_metadata=sampling_metadata,
         )
         return output

From 523e30ea0c5abcb447763dcd9a77b54d5c5f3239 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Wed, 20 Mar 2024 17:59:52 -0700
Subject: [PATCH 153/196] [BugFix] Hot fix in setup.py for neuron build (#3537)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 67575a0e04bf0..47cac5996f816 100644
--- a/setup.py
+++ b/setup.py
@@ -168,7 +168,7 @@ def build_extensions(self) -> None:
 
 
 def _is_cuda() -> bool:
-    return torch.version.cuda is not None
+    return torch.version.cuda is not None and not _is_neuron()
 
 
 def _is_hip() -> bool:

From 6ebd02bdef1eb08f9a7a11253a26cd49b5fb6d2d Mon Sep 17 00:00:00 2001
From: ElizaWszola <eliza@neuralmagic.com>
Date: Thu, 21 Mar 2024 07:20:04 +0100
Subject: [PATCH 154/196] [PREFIX CACHING FOLLOW UP] OrderedDict-based evictor
 (#3431)

Co-authored-by: rsnm2 <rshaw@neuralmagic.com>
Co-authored-by: Luka <luka@paperspace>
---
 vllm/core/evictor.py | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py
index 9f401cba3fbea..92515468a8a1f 100644
--- a/vllm/core/evictor.py
+++ b/vllm/core/evictor.py
@@ -1,5 +1,5 @@
 import enum
-from typing import Dict
+from typing import OrderedDict
 from abc import ABC, abstractmethod, abstractproperty
 
 from vllm.block import PhysicalTokenBlock
@@ -58,27 +58,26 @@ class LRUEvictor(Evictor):
     """
 
     def __init__(self):
-        self.free_table: Dict[int, PhysicalTokenBlock] = {}
+        self.free_table: OrderedDict[int, PhysicalTokenBlock] = OrderedDict()
 
     def __contains__(self, block_hash: int) -> bool:
         return block_hash in self.free_table
 
-    # TODO: The performance of this evict function can be optimized further.
     def evict(self) -> PhysicalTokenBlock:
         if len(self.free_table) == 0:
             raise ValueError("No usable cache memory left")
-        free_blocks = self.free_table.values()
 
-        # Get evicted block
-        evicted_block: PhysicalTokenBlock = next(iter(free_blocks))
-
-        for block in free_blocks:
-            if (block.last_accessed < evicted_block.last_accessed
-                    or block.last_accessed == evicted_block.last_accessed and
-                    block.num_hashed_tokens > evicted_block.num_hashed_tokens):
+        evicted_block = next(iter(self.free_table.values()))
+        # The blocks with the lowest timestamps should be placed consecutively
+        # at the start of OrderedDict. Loop through all these blocks to
+        # find the one with maximum number of hashed tokens.
+        for _, block in self.free_table.items():
+            if evicted_block.last_accessed < block.last_accessed:
+                break
+            if evicted_block.num_hashed_tokens < block.num_hashed_tokens:
                 evicted_block = block
 
-        del self.free_table[evicted_block.block_hash]
+        self.free_table.pop(evicted_block.block_hash)
 
         evicted_block.computed = False
         return evicted_block
@@ -91,7 +90,7 @@ def remove(self, block_hash: int) -> PhysicalTokenBlock:
             raise ValueError(
                 "Attempting to remove block that's not in the evictor")
         block: PhysicalTokenBlock = self.free_table[block_hash]
-        del self.free_table[block_hash]
+        self.free_table.pop(block_hash)
         return block
 
     @property

From 3bbff9e5ab964cf04897cebfc5e886a1113fef01 Mon Sep 17 00:00:00 2001
From: SangBin Cho <rkooo567@gmail.com>
Date: Thu, 21 Mar 2024 17:49:06 +0900
Subject: [PATCH 155/196] Fix 1D query issue from `_prune_hidden_states`
 (#3539)

---
 vllm/model_executor/layers/logits_processor.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vllm/model_executor/layers/logits_processor.py b/vllm/model_executor/layers/logits_processor.py
index baa113c342c28..e9d2a2708c1bb 100644
--- a/vllm/model_executor/layers/logits_processor.py
+++ b/vllm/model_executor/layers/logits_processor.py
@@ -77,7 +77,6 @@ def _prune_hidden_states(
     hidden_states: torch.Tensor,
     sampling_metadata: SamplingMetadata,
 ) -> torch.Tensor:
-    hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
     return hidden_states.index_select(0,
                                       sampling_metadata.selected_token_indices)
 

From 4c07dd28c0ef8642735222e077935b55f4c98017 Mon Sep 17 00:00:00 2001
From: Lalit Pradhan <136452006+grandiose-pizza@users.noreply.github.com>
Date: Thu, 21 Mar 2024 13:45:24 +0400
Subject: [PATCH 156/196] =?UTF-8?q?[=F0=9F=9A=80=20Ready=20to=20be=20merge?=
 =?UTF-8?q?d]=20Added=20support=20for=20Jais=20models=20(#3183)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                   |   1 +
 docs/source/models/supported_models.rst     |   6 +-
 vllm/model_executor/models/__init__.py      |   1 +
 vllm/model_executor/models/gpt2.py          |   3 +-
 vllm/model_executor/models/jais.py          | 351 ++++++++++++++++++++
 vllm/transformers_utils/config.py           |   1 +
 vllm/transformers_utils/configs/__init__.py |   2 +
 vllm/transformers_utils/configs/jais.py     | 234 +++++++++++++
 8 files changed, 596 insertions(+), 3 deletions(-)
 create mode 100644 vllm/model_executor/models/jais.py
 create mode 100644 vllm/transformers_utils/configs/jais.py

diff --git a/README.md b/README.md
index f57c3f7862ed1..9d3f742225ea8 100644
--- a/README.md
+++ b/README.md
@@ -76,6 +76,7 @@ vLLM seamlessly supports many Hugging Face models, including the following archi
 - GPT-NeoX (`EleutherAI/gpt-neox-20b`, `databricks/dolly-v2-12b`, `stabilityai/stablelm-tuned-alpha-7b`, etc.)
 - InternLM (`internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc.)
 - InternLM2 (`internlm/internlm2-7b`, `internlm/internlm2-chat-7b`, etc.)
+- Jais (`core42/jais-13b`, `core42/jais-13b-chat`, `core42/jais-30b-v3`, `core42/jais-30b-chat-v3`, etc.)
 - LLaMA & LLaMA-2 (`meta-llama/Llama-2-70b-hf`, `lmsys/vicuna-13b-v1.3`, `young-geng/koala`, `openlm-research/open_llama_13b`, etc.)
 - Mistral (`mistralai/Mistral-7B-v0.1`, `mistralai/Mistral-7B-Instruct-v0.1`, etc.)
 - Mixtral (`mistralai/Mixtral-8x7B-v0.1`, `mistralai/Mixtral-8x7B-Instruct-v0.1`, etc.)
diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
index 4019e0bbd90fb..af4eb81646ebe 100644
--- a/docs/source/models/supported_models.rst
+++ b/docs/source/models/supported_models.rst
@@ -66,7 +66,11 @@ Alongside each architecture, we include some popular models that use it.
   * - :code:`InternLM2ForCausalLM`
     - InternLM2
     - :code:`internlm/internlm2-7b`, :code:`internlm/internlm2-chat-7b`, etc.
-    - 
+    -
+  * - :code:`JAISLMHeadModel`
+    - Jais
+    - :code:`core42/jais-13b`, :code:`core42/jais-13b-chat`, :code:`core42/jais-30b-v3`, :code:`core42/jais-30b-chat-v3`, etc.
+    -
   * - :code:`LlamaForCausalLM`
     - LLaMA, LLaMA-2, Vicuna, Alpaca, Yi
     - :code:`meta-llama/Llama-2-13b-hf`, :code:`meta-llama/Llama-2-70b-hf`, :code:`openlm-research/open_llama_13b`, :code:`lmsys/vicuna-13b-v1.3`, :code:`01-ai/Yi-6B`, :code:`01-ai/Yi-34B`, etc.
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index bc3b6a582d53d..069830c4d7cb5 100755
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -27,6 +27,7 @@
     "GPTNeoXForCausalLM": ("gpt_neox", "GPTNeoXForCausalLM"),
     "InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
     "InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
+    "JAISLMHeadModel": ("jais", "JAISLMHeadModel"),
     "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
     # For decapoda-research/llama-*
     "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py
index 263727cac19ff..e75dda750cb26 100644
--- a/vllm/model_executor/models/gpt2.py
+++ b/vllm/model_executor/models/gpt2.py
@@ -242,8 +242,7 @@ def sample(
         logits: torch.Tensor,
         sampling_metadata: SamplingMetadata,
     ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.lm_head_weight, logits,
-                                   sampling_metadata)
+        next_tokens = self.sampler(logits, sampling_metadata)
         return next_tokens
 
     def load_weights(self,
diff --git a/vllm/model_executor/models/jais.py b/vllm/model_executor/models/jais.py
new file mode 100644
index 0000000000000..74c8e7f963026
--- /dev/null
+++ b/vllm/model_executor/models/jais.py
@@ -0,0 +1,351 @@
+# coding=utf-8
+# Adapted from
+# https://huggingface.co/core42/jais-30b-chat-v3/blob/main/modeling_jais.py
+# Copyright 2023 The vLLM team.
+# Copyright 2023 the Jais authors and HuggingFace Inc. team.  All rights
+# reserved.
+# Copyright 2023 Cerebras Systems.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Inference-only Jais model compatible with HuggingFace weights."""
+
+import math
+from typing import List, Optional, Tuple
+
+import torch
+from torch import nn
+from vllm.transformers_utils.configs import JAISConfig
+
+from vllm.model_executor.input_metadata import InputMetadata
+from vllm.model_executor.layers.attention import Attention
+from vllm.model_executor.layers.linear import (
+    ColumnParallelLinear,
+    LinearMethodBase,
+    QKVParallelLinear,
+    RowParallelLinear,
+)
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.layers.vocab_parallel_embedding import (
+    VocabParallelEmbedding, )
+from vllm.model_executor.parallel_utils.parallel_state import (
+    get_tensor_model_parallel_world_size,
+    get_tensor_model_parallel_rank,
+)
+from vllm.model_executor.weight_utils import (
+    default_weight_loader,
+    hf_model_weights_iterator,
+)
+from vllm.sequence import SamplerOutput
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+
+KVCache = Tuple[torch.Tensor, torch.Tensor]
+
+
+class SwiGLUActivation(nn.Module):
+
+    def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
+        return x1 * nn.functional.silu(x2)
+
+
+def _get_alibi_slopes(n):
+
+    def get_slopes_power_of_2(n):
+        start = 2**(-(2**-(math.log2(n) - 3)))
+        ratio = start
+        return [start * ratio**i for i in range(n)]
+
+    if math.log2(n).is_integer():
+        return get_slopes_power_of_2(n)
+    else:
+        closest_power_of_2 = 2**math.floor(math.log2(n))
+        return (get_slopes_power_of_2(closest_power_of_2) + _get_alibi_slopes(
+            2 * closest_power_of_2)[0::2][:n - closest_power_of_2])
+
+
+class JAISAttention(nn.Module):
+
+    def __init__(
+        self,
+        config: JAISConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        total_num_heads = config.num_attention_heads
+        tensor_model_parallel_world_size = (
+            get_tensor_model_parallel_world_size())
+        assert total_num_heads % tensor_model_parallel_world_size == 0
+        self.num_heads = total_num_heads // tensor_model_parallel_world_size
+        self.head_dim = self.hidden_size // total_num_heads
+        if hasattr(config, "scale_qk_dot_by_d"):
+            config.mup_scale_qk_dot_by_d = config.scale_qk_dot_by_d
+        self.attn_scale_power = 1.0 if config.mup_scale_qk_dot_by_d else 0.5
+        self.scale = self.head_dim**-self.attn_scale_power
+
+        self.c_attn = QKVParallelLinear(
+            self.hidden_size,
+            self.head_dim,
+            total_num_heads,
+            bias=True,
+            linear_method=linear_method,
+        )
+        self.c_proj = RowParallelLinear(
+            self.hidden_size,
+            self.hidden_size,
+            bias=True,
+            linear_method=linear_method,
+        )
+
+        tp_rank = get_tensor_model_parallel_rank()
+        head_start = tp_rank * self.num_heads
+        head_end = (tp_rank + 1) * self.num_heads
+        alibi_slopes = _get_alibi_slopes(total_num_heads)
+        alibi_slopes = alibi_slopes[head_start:head_end]
+        self.attn = Attention(
+            self.num_heads,
+            self.head_dim,
+            scale=self.scale,
+            alibi_slopes=alibi_slopes,
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        kv_cache: KVCache,
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        qkv, _ = self.c_attn(hidden_states)
+        q, k, v = qkv.chunk(chunks=3, dim=-1)
+        key_cache, value_cache = kv_cache
+        attn_output = self.attn(q, k, v, key_cache, value_cache,
+                                input_metadata)
+        attn_output, _ = self.c_proj(attn_output)
+        return attn_output
+
+
+class JAISMLP(nn.Module):
+
+    def __init__(
+        self,
+        intermediate_size: int,
+        config: JAISConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ):
+        super().__init__()
+        hidden_size = config.hidden_size
+        self.swiglu = config.activation_function == "swiglu"
+        self.c_fc = ColumnParallelLinear(
+            hidden_size,
+            intermediate_size,
+            bias=True,
+            linear_method=linear_method,
+        )
+        self.c_fc2 = (ColumnParallelLinear(
+            hidden_size,
+            intermediate_size,
+            bias=True,
+            linear_method=linear_method,
+        ) if self.swiglu else None)
+        self.c_proj = RowParallelLinear(
+            intermediate_size,
+            hidden_size,
+            bias=True,
+            linear_method=linear_method,
+        )
+
+        self.act = SwiGLUActivation()
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        if self.swiglu:
+            hidden_states2, _ = self.c_fc2(hidden_states)
+        hidden_states, _ = self.c_fc(hidden_states)
+        hidden_states = (self.act(hidden_states, hidden_states2)
+                         if self.swiglu else self.act(hidden_states))
+        hidden_states, _ = self.c_proj(hidden_states)
+        return hidden_states
+
+
+class JAISBlock(nn.Module):
+
+    def __init__(
+        self,
+        config: JAISConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ):
+        super().__init__()
+        hidden_size = config.hidden_size
+        inner_dim = (config.n_inner if config.n_inner is not None else 4 *
+                     hidden_size)
+
+        self.ln_1 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
+        self.attn = JAISAttention(config, linear_method)
+        self.ln_2 = nn.LayerNorm(hidden_size, eps=config.layer_norm_epsilon)
+        self.mlp = JAISMLP(inner_dim, config, linear_method)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        kv_cache: KVCache,
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        residual = hidden_states
+        hidden_states = self.ln_1(hidden_states)
+        attn_output = self.attn(
+            hidden_states=hidden_states,
+            kv_cache=kv_cache,
+            input_metadata=input_metadata,
+        )
+        # residual connection
+        hidden_states = attn_output + residual
+
+        residual = hidden_states
+        hidden_states = self.ln_2(hidden_states)
+        feed_forward_hidden_states = self.mlp(hidden_states)
+        # residual connection
+        hidden_states = residual + feed_forward_hidden_states
+        return hidden_states
+
+
+class JAISModel(nn.Module):
+
+    def __init__(
+        self,
+        config: JAISConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ):
+        super().__init__()
+        self.config = config
+        assert not config.add_cross_attention
+        assert not config.scale_attn_by_inverse_layer_idx
+        assert not config.reorder_and_upcast_attn
+        self.embed_dim = config.hidden_size
+        self.wte = VocabParallelEmbedding(config.vocab_size, self.embed_dim)
+        self.wpe = (nn.Embedding(config.max_position_embeddings,
+                                 self.embed_dim)
+                    if config.position_embedding_type != "alibi" else None)
+        if hasattr(config, "embeddings_scale"):
+            self.embeddings_scale = config.embeddings_scale
+        else:
+            self.embeddings_scale = config.mup_embeddings_scale
+        self.h = nn.ModuleList([
+            JAISBlock(config, linear_method)
+            for _ in range(config.num_hidden_layers)
+        ])
+        self.ln_f = nn.LayerNorm(self.embed_dim, eps=config.layer_norm_epsilon)
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        inputs_embeds = self.wte(input_ids)
+        if self.wpe is not None:
+            position_embeds = self.wpe(position_ids)
+            hidden_states = inputs_embeds + position_embeds
+        else:
+            hidden_states = inputs_embeds
+        hidden_states *= torch.tensor(float(self.embeddings_scale),
+                                      dtype=hidden_states.dtype)
+
+        for i in range(len(self.h)):
+            layer = self.h[i]
+            hidden_states = layer(hidden_states, kv_caches[i], input_metadata)
+
+        hidden_states = self.ln_f(hidden_states)
+        return hidden_states
+
+
+class JAISLMHeadModel(nn.Module):
+
+    def __init__(
+        self,
+        config: JAISConfig,
+        linear_method: Optional[LinearMethodBase] = None,
+    ):
+        super().__init__()
+        self.config = config
+        self.linear_method = linear_method
+        self.transformer = JAISModel(config, linear_method)
+        self.lm_head_weight = self.transformer.wte.weight
+        if hasattr(config, "width_scale"):
+            self.output_logits_scale = config.width_scale
+        else:
+            self.output_logits_scale = (config.mup_output_alpha *
+                                        config.mup_width_scale)
+        self.logits_processor = LogitsProcessor(vocab_size=config.vocab_size,
+                                                scale=self.output_logits_scale)
+        self.sampler = Sampler()
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        kv_caches: List[KVCache],
+        input_metadata: InputMetadata,
+    ) -> torch.Tensor:
+        hidden_states = self.transformer(input_ids, positions, kv_caches,
+                                         input_metadata)
+        return hidden_states
+
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
+                                       sampling_metadata)
+        return logits
+
+    def sample(
+        self,
+        logits: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        next_tokens = self.sampler(logits, sampling_metadata)
+        return next_tokens
+
+    def load_weights(
+        self,
+        model_name_or_path: str,
+        cache_dir: Optional[str] = None,
+        load_format: str = "auto",
+        revision: Optional[str] = None,
+    ):
+        params_dict = dict(self.named_parameters(remove_duplicate=False))
+        for name, loaded_weight in hf_model_weights_iterator(
+                model_name_or_path, cache_dir, load_format, revision):
+            if "lm_head.weight" in name:
+                # GPT-2 ties the weights of the embedding layer and the final
+                # linear layer.
+                continue
+            if ".attn.bias" in name or ".attn.masked_bias" in name:
+                # Skip attention mask.
+                # NOTE: "c_attn.bias" should not be skipped.
+                continue
+            if "relative_pe" in name:
+                continue
+            if not name.startswith("transformer."):
+                name = "transformer." + name
+            param = params_dict[name]
+            # The HF's GPT-2 implementation uses Conv1D instead of Linear.
+            # Because of this, we need to transpose the weights.
+            # Note(zhuohan): the logic below might break quantized models.
+            for conv1d_weight_name in ["c_attn", "c_proj", "c_fc"]:
+                if conv1d_weight_name not in name:
+                    continue
+                if not name.endswith(".weight"):
+                    continue
+                loaded_weight = loaded_weight.t()
+            weight_loader = getattr(param, "weight_loader",
+                                    default_weight_loader)
+            weight_loader(param, loaded_weight)
\ No newline at end of file
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 5e1f0439aec51..081e81768b236 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -10,6 +10,7 @@
     "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
     "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
     "starcoder2": Starcoder2Config,
+    "jais": JAISConfig,
 }
 
 
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 4966526f15184..150ee2ce97ad5 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -5,10 +5,12 @@
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
 from vllm.transformers_utils.configs.starcoder2 import Starcoder2Config
+from vllm.transformers_utils.configs.jais import JAISConfig
 
 __all__ = [
     "ChatGLMConfig",
     "MPTConfig",
     "RWConfig",
     "Starcoder2Config",
+    "JAISConfig",
 ]
diff --git a/vllm/transformers_utils/configs/jais.py b/vllm/transformers_utils/configs/jais.py
new file mode 100644
index 0000000000000..94f438716f8bf
--- /dev/null
+++ b/vllm/transformers_utils/configs/jais.py
@@ -0,0 +1,234 @@
+# coding=utf-8
+# Copyright 2023 The OpenAI Team Authors and HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2023 Cerebras Systems.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""JAIS configuration"""
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+logger = logging.get_logger(__name__)
+
+
+class JAISConfig(PretrainedConfig):
+    """
+    This is the configuration class to store the configuration of a
+    [`JAISModel`]. It is used to instantiate a JAIS model according to the
+    specified arguments, defining the model architecture.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used
+    to control the model outputs. Read the documentation from
+    [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 50257):
+            Vocabulary size of the JAIS model. Defines the number of different
+            tokens that can be represented by the
+            `inputs_ids` passed when calling [`JAISModel`].
+        n_positions (`int`, *optional*, defaults to 1024):
+            The maximum sequence length that this model might ever be used
+            with. Typically set this to something large just in case
+            (e.g., 512 or 1024 or 2048).
+        n_embd (`int`, *optional*, defaults to 768):
+            Dimensionality of the embeddings and hidden states.
+        n_layer (`int`, *optional*, defaults to 12):
+            Number of hidden layers in the Transformer encoder.
+        n_head (`int`, *optional*, defaults to 12):
+            Number of attention heads for each attention layer in the
+            Transformer encoder.
+        n_inner (`int`, *optional*, defaults to None):
+            Dimensionality of the inner feed-forward layers. `None` will set
+            it to 4 times n_embd
+        activation_function (`str`, *optional*, defaults to `"gelu"`):
+            Activation function, to be selected in the list
+            `["relu", "silu", "gelu", "tanh", "gelu_new", "swiglu"]`.
+        resid_pdrop (`float`, *optional*, defaults to 0.1):
+            The dropout probability for all fully connected layers in
+            the embeddings, encoder, and pooler.
+        embd_pdrop (`float`, *optional*, defaults to 0.1):
+            The dropout ratio for the embeddings.
+        attn_pdrop (`float`, *optional*, defaults to 0.1):
+            The dropout ratio for the attention.
+        layer_norm_epsilon (`float`, *optional*, defaults to 1e-5):
+            The epsilon to use in the layer normalization layers.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for
+            initializing all weight matrices.
+        scale_attn_weights (`bool`, *optional*, defaults to `True`):
+            Scale attention weights by dividing by sqrt(hidden_size)..
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values
+            attentions (not used by all models).
+        scale_attn_by_inverse_layer_idx (`bool`, *optional*,
+            defaults to `False`):
+            Whether to additionally scale attention weights by
+            `1 / layer_idx + 1`.
+        reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
+            Whether to scale keys (K) prior to computing attention
+            (dot-product)
+            and upcast attention dot-product/softmax to float() when training
+            with mixed precision.
+        position_embedding_type (`str`, *optional*, defaults to `"learned"`):
+            Positional embedding can be either `"alibi"` or `"learned"`.
+        mup_width_scale (`float`, *optional*, defaults to 1.0):
+            muP parameter to scale learning rate and initializers. Calculated
+            as (`d_model,0 / d_model`), where
+            `d_model` is the model's width and `d_model,0` is the proxy
+            model's width.
+        mup_embeddings_scale (`float`, *optional*, defaults to 1.0):
+            muP parameter to scale token and position embeddings.
+        mup_output_alpha (`float`, *optional*, defaults to 1.0):
+            muP parameter to scale output logits
+            (`output_logits_scale = mup_output_alpha * mup_width_scale`).
+        mup_scale_qk_dot_by_d (`bool`, *optional*, defaults to `False`):
+            Scale attention weights by dividing by hidden_size instead of
+            sqrt(hidden_size). Need to set scale_attn_weights to `True` as
+            well.
+        alibi_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for ALiBi
+            embeddings. Currently only supports linear
+            scaling strategy. Can specify either the scaling `factor` (must be
+            a float greater than 1) for fixed scaling
+            or `train_seq_len` for dynamic scaling on input samples with
+            sequence length > `train_seq_len`. The expected
+            formats are `{"type": strategy name, "factor": scaling factor}` or
+            `{"type": strategy name,
+            "train_seq_len": training sequence length}`.
+        architectures (`List`, *optional*, defaults to ['JAISLMHeadModel']):
+            architecture names for Jais.
+
+    Example:
+
+    ```python
+    >>> from transformers import JAISConfig, JAISModel
+
+    >>> # Initializing a JAIS configuration
+    >>> configuration = JAISConfig()
+
+    >>> # Initializing a model (with random weights) from the configuration
+    >>> model = JAISModel(configuration)
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+
+    model_type = "jais"
+    keys_to_ignore_at_inference = ["past_key_values"]
+    attribute_map = {
+        "hidden_size": "n_embd",
+        "max_position_embeddings": "n_positions",
+        "num_attention_heads": "n_head",
+        "num_hidden_layers": "n_layer",
+    }
+
+    def __init__(
+        self,
+        vocab_size=50257,
+        n_positions=1024,
+        n_embd=768,
+        n_layer=12,
+        n_head=12,
+        n_inner=None,
+        activation_function="gelu_new",
+        resid_pdrop=0.1,
+        embd_pdrop=0.1,
+        attn_pdrop=0.1,
+        layer_norm_epsilon=1e-5,
+        initializer_range=0.02,
+        scale_attn_weights=True,
+        use_cache=True,
+        bos_token_id=50256,
+        eos_token_id=50256,
+        scale_attn_by_inverse_layer_idx=False,
+        reorder_and_upcast_attn=False,
+        position_embedding_type="learned",
+        mup_width_scale=1.0,
+        mup_embeddings_scale=1.0,
+        mup_output_alpha=1.0,
+        mup_scale_qk_dot_by_d=False,
+        alibi_scaling=None,
+        architectures=None,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.n_positions = n_positions
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.n_inner = n_inner
+        self.activation_function = activation_function
+        self.resid_pdrop = resid_pdrop
+        self.embd_pdrop = embd_pdrop
+        self.attn_pdrop = attn_pdrop
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.initializer_range = initializer_range
+        self.scale_attn_weights = scale_attn_weights
+        self.use_cache = use_cache
+        self.scale_attn_by_inverse_layer_idx = scale_attn_by_inverse_layer_idx
+        self.reorder_and_upcast_attn = reorder_and_upcast_attn
+
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+
+        self.position_embedding_type = position_embedding_type
+        self.mup_width_scale = mup_width_scale
+        self.mup_embeddings_scale = mup_embeddings_scale
+        self.mup_output_alpha = mup_output_alpha
+        self.mup_scale_qk_dot_by_d = mup_scale_qk_dot_by_d
+
+        self.alibi_scaling = alibi_scaling
+        self._alibi_scaling_validation()
+        if architectures is None:
+            architectures = ["JAISLMHeadModel"]
+
+        super().__init__(
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            architectures=architectures,
+            **kwargs,
+        )
+
+    def _alibi_scaling_validation(self):
+        """
+        Validate the `alibi_scaling` configuration.
+        """
+        if self.alibi_scaling is None:
+            return
+
+        if (not isinstance(self.alibi_scaling, dict)
+                or len(self.alibi_scaling) != 2):
+            raise ValueError(
+                "`alibi_scaling` must be a dictionary with two fields,"
+                "`type` and `factor` or `type` and `train_seq_len`, "
+                f"got {self.alibi_scaling}")
+        alibi_scaling_type = self.alibi_scaling.get("type", None)
+        alibi_scaling_factor = self.alibi_scaling.get("factor", None)
+        alibi_dynamic_scaling = self.alibi_scaling.get("train_seq_len", None)
+        if alibi_scaling_type is None or alibi_scaling_type != "linear":
+            raise ValueError(f"`alibi_scaling`'s type field must be 'linear',"
+                             f"got {alibi_scaling_type}")
+        if (alibi_scaling_factor is not None
+                and not isinstance(alibi_scaling_factor, float)
+                or alibi_scaling_factor <= 1.0):
+            raise ValueError(
+                f"`alibi_scaling`'s factor field must be a float > 1.0,"
+                f"got {alibi_scaling_factor}")
+        if (alibi_dynamic_scaling is not None
+                and not isinstance(alibi_dynamic_scaling, int)
+                or alibi_dynamic_scaling <= 1):
+            raise ValueError(
+                f"`alibi_scaling`'s `train_seq_len` field must be an"
+                f"integer > 1, got {alibi_dynamic_scaling}")

From 865732342b4e3b8a4ef38f28a2a5bdb87cf3f970 Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Thu, 21 Mar 2024 18:07:48 +0800
Subject: [PATCH 157/196] [Misc][Log] Add log for tokenizer length not equal to
 vocabulary size (#3500)

---
 vllm/engine/llm_engine.py                 | 8 ++++++++
 vllm/entrypoints/openai/serving_engine.py | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 2280481cca9cb..b726cdd7a2048 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -169,6 +169,14 @@ def _init_tokenizer(self, **tokenizer_init_kwargs):
         self.tokenizer: BaseTokenizerGroup = get_tokenizer_group(
             self.parallel_config.tokenizer_pool_config, **init_kwargs)
 
+        if len(self.get_tokenizer()) != self.model_config.get_vocab_size():
+            logger.warning(
+                f"The tokenizer's vocabulary size {len(self.get_tokenizer())}"
+                f" does not match the model's vocabulary size "
+                f"{self.model_config.get_vocab_size()}. This might "
+                f"cause an error in decoding. Please change config.json "
+                "to match the tokenizer's vocabulary size.")
+
     def _verify_args(self) -> None:
         self.model_config.verify_with_parallel_config(self.parallel_config)
         self.cache_config.verify_with_parallel_config(self.parallel_config)
diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py
index 2db884945c491..976046beec245 100644
--- a/vllm/entrypoints/openai/serving_engine.py
+++ b/vllm/entrypoints/openai/serving_engine.py
@@ -68,6 +68,14 @@ async def _post_init(self):
             tokenizer_mode=engine_model_config.tokenizer_mode,
             trust_remote_code=engine_model_config.trust_remote_code)
 
+        if len(self.tokenizer) != engine_model_config.get_vocab_size():
+            logger.warning(
+                f"The tokenizer's vocabulary size {len(self.tokenizer)}"
+                f" does not match the model's vocabulary size "
+                f"{engine_model_config.get_vocab_size()}. This might "
+                f"cause an error in decoding. Please change config.json "
+                "to match the tokenizer's vocabulary size.")
+
     async def show_available_models(self) -> ModelList:
         """Show available models. Right now we only have one model."""
         model_cards = [

From c188ecb080501c5ccb34bbd6542978284c547122 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Thu, 21 Mar 2024 07:58:12 -0700
Subject: [PATCH 158/196] [Misc] Bump up transformers to v4.39.0 & Remove
 StarCoder2Config (#3551)

Co-authored-by: Roy <jasonailu87@gmail.com>
Co-authored-by: Roger Meier <r.meier@siemens.com>
---
 requirements-rocm.txt                         |  2 +-
 requirements.txt                              |  2 +-
 vllm/model_executor/models/starcoder2.py      |  8 +--
 vllm/transformers_utils/config.py             | 10 ----
 vllm/transformers_utils/configs/__init__.py   |  2 -
 vllm/transformers_utils/configs/starcoder2.py | 55 -------------------
 6 files changed, 3 insertions(+), 76 deletions(-)
 delete mode 100644 vllm/transformers_utils/configs/starcoder2.py

diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index c30479e40f521..07d94cd94f5fa 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -7,7 +7,7 @@ ray >= 2.9
 sentencepiece  # Required for LLaMA tokenizer.
 numpy
 tokenizers>=0.15.0
-transformers >= 4.38.0  # Required for Gemma.
+transformers >= 4.39.0  # Required for StarCoder2.
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
diff --git a/requirements.txt b/requirements.txt
index c9a5bd6619402..e136defad4943 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ ray >= 2.9
 sentencepiece  # Required for LLaMA tokenizer.
 numpy
 torch == 2.1.2
-transformers >= 4.38.0  # Required for Gemma.
+transformers >= 4.39.0  # Required for StarCoder2.
 xformers == 0.0.23.post1  # Required for CUDA 12.1.
 fastapi
 uvicorn[standard]
diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py
index e418951a633ab..e5003361bdf2a 100644
--- a/vllm/model_executor/models/starcoder2.py
+++ b/vllm/model_executor/models/starcoder2.py
@@ -22,6 +22,7 @@
 
 import torch
 from torch import nn
+from transformers import Starcoder2Config
 
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.sampling_metadata import SamplingMetadata
@@ -42,13 +43,6 @@
                                               hf_model_weights_iterator)
 from vllm.sequence import SamplerOutput
 
-try:
-    from transformers import Starcoder2Config
-except ImportError:
-    # fallback to PretrainedConfig
-    # NOTE: Please install transformers from source or use transformers>=4.39.0
-    from transformers import PretrainedConfig as Starcoder2Config
-
 KVCache = Tuple[torch.Tensor, torch.Tensor]
 
 
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index 081e81768b236..dc226248910e2 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -9,7 +9,6 @@
     "mpt": MPTConfig,
     "RefinedWeb": RWConfig,  # For tiiuae/falcon-40b(-instruct)
     "RefinedWebModel": RWConfig,  # For tiiuae/falcon-7b(-instruct)
-    "starcoder2": Starcoder2Config,
     "jais": JAISConfig,
 }
 
@@ -18,15 +17,6 @@ def get_config(model: str,
                trust_remote_code: bool,
                revision: Optional[str] = None,
                code_revision: Optional[str] = None) -> PretrainedConfig:
-    # FIXME(woosuk): This is a temporary fix for StarCoder2.
-    # Remove this when the model is supported by HuggingFace transformers.
-    if "bigcode" in model and "starcoder2" in model:
-        config_class = _CONFIG_REGISTRY["starcoder2"]
-        config = config_class.from_pretrained(model,
-                                              revision=revision,
-                                              code_revision=code_revision)
-        return config
-
     try:
         config = AutoConfig.from_pretrained(
             model,
diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py
index 150ee2ce97ad5..6fed2fab8c438 100644
--- a/vllm/transformers_utils/configs/__init__.py
+++ b/vllm/transformers_utils/configs/__init__.py
@@ -4,13 +4,11 @@
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
-from vllm.transformers_utils.configs.starcoder2 import Starcoder2Config
 from vllm.transformers_utils.configs.jais import JAISConfig
 
 __all__ = [
     "ChatGLMConfig",
     "MPTConfig",
     "RWConfig",
-    "Starcoder2Config",
     "JAISConfig",
 ]
diff --git a/vllm/transformers_utils/configs/starcoder2.py b/vllm/transformers_utils/configs/starcoder2.py
deleted file mode 100644
index 2879cd0445275..0000000000000
--- a/vllm/transformers_utils/configs/starcoder2.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from transformers import PretrainedConfig
-
-
-class Starcoder2Config(PretrainedConfig):
-    model_type = "starcoder2"
-    keys_to_ignore_at_inference = ["past_key_values"]
-
-    def __init__(
-        self,
-        vocab_size=49152,
-        hidden_size=3072,
-        intermediate_size=12288,
-        num_hidden_layers=30,
-        num_attention_heads=24,
-        num_key_value_heads=2,
-        hidden_act="gelu_pytorch_tanh",
-        max_position_embeddings=4096,
-        initializer_range=0.018042,
-        norm_epsilon=1e-5,
-        use_cache=True,
-        bos_token_id=50256,
-        eos_token_id=50256,
-        rope_theta=10000.0,
-        sliding_window=None,
-        attention_dropout=0.0,
-        residual_dropout=0.0,
-        embedding_dropout=0.0,
-        use_bias=True,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.sliding_window = sliding_window
-        self.use_bias = use_bias
-        self.num_key_value_heads = num_key_value_heads
-        self.hidden_act = hidden_act
-        self.initializer_range = initializer_range
-        self.norm_epsilon = norm_epsilon
-        self.use_cache = use_cache
-        self.rope_theta = rope_theta
-        self.attention_dropout = attention_dropout
-        self.residual_dropout = residual_dropout
-        self.embedding_dropout = embedding_dropout
-
-        super().__init__(
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            **kwargs,
-        )
-        if self.architectures is None:
-            self.architectures = ['Starcoder2ForCausalLM']

From b7050ca7df640326f53e89f518f3ee045dfbbdef Mon Sep 17 00:00:00 2001
From: Taemin Lee <persuade@gmail.com>
Date: Fri, 22 Mar 2024 05:16:57 +0900
Subject: [PATCH 159/196] [BugFix] gemma loading after quantization or LoRA.
 (#3553)

---
 vllm/model_executor/models/gemma.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
index fd3dbe798cd8e..fa8ce60e74056 100644
--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
@@ -340,6 +340,10 @@ def load_weights(self,
                 weight_loader(param, loaded_weight, shard_id)
                 break
             else:
+                # lm_head is not used in vllm as it is tied with embed_token.
+                # To prevent errors, skip loading lm_head.weight.
+                if "lm_head.weight" in name:
+                    continue
                 # Skip loading extra bias for GPTQ models.
                 if name.endswith(".bias") and name not in params_dict:
                     continue

From ea5f14e6ffafcb9c660a3eea5a935122aa9f84ae Mon Sep 17 00:00:00 2001
From: Roy <jasonailu87@gmail.com>
Date: Fri, 22 Mar 2024 08:18:58 +0800
Subject: [PATCH 160/196] [Bugfix][Model] Fix Qwen2 (#3554)

---
 tests/models/test_models.py         | 1 +
 vllm/model_executor/models/qwen2.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/models/test_models.py b/tests/models/test_models.py
index fb567e837d281..81189e25d4f1c 100644
--- a/tests/models/test_models.py
+++ b/tests/models/test_models.py
@@ -20,6 +20,7 @@
     "stabilityai/stablelm-3b-4e1t",
     "allenai/OLMo-1B",
     "bigcode/starcoder2-3b",
+    "Qwen/Qwen1.5-0.5B",
 ]
 
 
diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index 6698f01b7c701..49c2a8b732fed 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -349,7 +349,7 @@ def load_weights(self,
             ("gate_up_proj", "gate_proj", 0),
             ("gate_up_proj", "up_proj", 1),
         ]
-        params_dict = dict(self.named_parameters())
+        params_dict = dict(self.named_parameters(remove_duplicate=False))
         for name, loaded_weight in hf_model_weights_iterator(
                 model_name_or_path, cache_dir, load_format, revision):
             if "rotary_emb.inv_freq" in name:

From e90fc21f2eda7e53f692398ee2c0cb5a0ac19693 Mon Sep 17 00:00:00 2001
From: Zhuohan Li <zhuohan123@gmail.com>
Date: Thu, 21 Mar 2024 18:22:17 -0700
Subject: [PATCH 161/196] [Hardware][Neuron] Refactor neuron support (#3471)

---
 examples/offline_inference_neuron.py          |   5 +-
 tests/lora/test_worker.py                     |   2 +-
 tests/spec_decode/test_spec_decode_worker.py  |  18 +-
 tests/spec_decode/utils.py                    |   2 +-
 tests/worker/test_swap.py                     |   2 +-
 vllm/config.py                                |  17 +-
 vllm/engine/async_llm_engine.py               |   7 +-
 vllm/engine/llm_engine.py                     |   6 +-
 vllm/executor/gpu_executor.py                 |  18 +-
 vllm/executor/neuron_executor.py              |  80 +++++
 vllm/executor/ray_gpu_executor.py             |  18 +-
 vllm/lora/layers.py                           |   4 +-
 vllm/lora/lora.py                             |   4 +-
 vllm/lora/models.py                           |   4 +-
 vllm/model_executor/__init__.py               |   3 +-
 vllm/model_executor/input_metadata.py         |   7 +-
 .../model_executor/layers/logits_processor.py |  11 +-
 vllm/model_executor/layers/sampler.py         |   2 +-
 vllm/model_executor/models/__init__.py        |  15 +-
 vllm/model_executor/models/neuron/llama.py    |  86 ------
 vllm/model_executor/models/neuron/mistral.py  |  89 ------
 vllm/model_executor/neuron_model_loader.py    | 111 +++++--
 vllm/model_executor/sampling_metadata.py      |   8 +-
 vllm/model_executor/utils.py                  |  17 --
 vllm/spec_decode/metrics.py                   |   4 +-
 vllm/spec_decode/multi_step_worker.py         |   4 +-
 vllm/spec_decode/spec_decode_worker.py        |   6 +-
 vllm/utils.py                                 |  63 +++-
 vllm/worker/cache_engine.py                   |  13 +-
 vllm/worker/model_runner.py                   |  82 ++---
 vllm/worker/neuron_model_runner.py            | 287 ++++++++++++++++++
 vllm/worker/neuron_worker.py                  | 165 +---------
 vllm/worker/worker.py                         |   4 +-
 33 files changed, 615 insertions(+), 549 deletions(-)
 create mode 100644 vllm/executor/neuron_executor.py
 delete mode 100644 vllm/model_executor/models/neuron/llama.py
 delete mode 100755 vllm/model_executor/models/neuron/mistral.py
 create mode 100644 vllm/worker/neuron_model_runner.py

diff --git a/examples/offline_inference_neuron.py b/examples/offline_inference_neuron.py
index da8874abd92a2..5ecbbf020ab8b 100755
--- a/examples/offline_inference_neuron.py
+++ b/examples/offline_inference_neuron.py
@@ -12,7 +12,7 @@
 
 # Create an LLM.
 llm = LLM(
-    model="openlm-research/open_llama_3b",
+    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     max_num_seqs=8,
     # The max_model_len and block_size arguments are required to be same as
     # max sequence length when targeting neuron device.
@@ -24,7 +24,8 @@
     # The device can be automatically detected when AWS Neuron SDK is installed.
     # The device argument can be either unspecified for automated detection,
     # or explicitly assigned.
-    device="neuron")
+    device="neuron",
+    tensor_parallel_size=2)
 # Generate texts from the prompts. The output is a list of RequestOutput objects
 # that contain the prompt, generated text, and other information.
 outputs = llm.generate(prompts, sampling_params)
diff --git a/tests/lora/test_worker.py b/tests/lora/test_worker.py
index e4538de35169b..8e640ea2bac49 100644
--- a/tests/lora/test_worker.py
+++ b/tests/lora/test_worker.py
@@ -33,7 +33,7 @@ def test_worker_apply_lora(sql_lora_files):
                                max_loras=32),
         distributed_init_method=f"file://{tempfile.mkstemp()[1]}",
     )
-    worker.init_model()
+    worker.init_device()
     worker.load_model()
 
     worker.model_runner.set_active_loras([], LoRAMapping([], []))
diff --git a/tests/spec_decode/test_spec_decode_worker.py b/tests/spec_decode/test_spec_decode_worker.py
index bfc69e01e3eb9..39c3f18b20bb3 100644
--- a/tests/spec_decode/test_spec_decode_worker.py
+++ b/tests/spec_decode/test_spec_decode_worker.py
@@ -71,7 +71,7 @@ def test_correctly_calls_target_model(k: int, batch_size: int):
 
     worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
                               metrics_collector)
-    worker.init_model()
+    worker.init_device()
 
     vocab_size = 32_000
 
@@ -151,7 +151,7 @@ def test_correctly_calls_rejection_sampler(k: int, batch_size: int):
 
     worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
                               metrics_collector)
-    worker.init_model()
+    worker.init_device()
 
     proposal_token_ids = torch.randint(low=0,
                                        high=vocab_size,
@@ -230,7 +230,7 @@ def test_correctly_formats_output(k: int, batch_size: int):
 
     worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
                               metrics_collector)
-    worker.init_model()
+    worker.init_device()
 
     proposal_token_ids = torch.randint(low=0,
                                        high=vocab_size,
@@ -342,7 +342,7 @@ def test_collects_metrics(k: int, batch_size: int, returns_metrics: bool):
 
     worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
                               metrics_collector)
-    worker.init_model()
+    worker.init_device()
 
     proposal_token_ids = torch.randint(low=0,
                                        high=vocab_size,
@@ -486,8 +486,8 @@ def test_empty_input_batch(k: int, batch_size: int):
 
 
 @torch.inference_mode()
-def test_init_model():
-    """Verify SpecDecodeWorker invokes proposer/scorer worker init_model, as
+def test_init_device():
+    """Verify SpecDecodeWorker invokes proposer/scorer worker init_device, as
     well as other GPU initialization.
     """
     draft_worker = mock_worker(cls=MultiStepWorker)
@@ -499,11 +499,11 @@ def test_init_model():
     worker = SpecDecodeWorker(draft_worker, target_worker, rejection_sampler,
                               metrics_collector)
 
-    worker.init_model()
+    worker.init_device()
 
-    draft_worker.init_model.assert_called_once()
+    draft_worker.init_device.assert_called_once()
 
-    target_worker.init_model.assert_called_once()
+    target_worker.init_device.assert_called_once()
 
     metrics_collector.init_gpu_tensors.assert_called_once()
     rejection_sampler.init_gpu_tensors.assert_called_once()
diff --git a/tests/spec_decode/utils.py b/tests/spec_decode/utils.py
index 997093988c0eb..b7e9edbea88e2 100644
--- a/tests/spec_decode/utils.py
+++ b/tests/spec_decode/utils.py
@@ -123,7 +123,7 @@ def create_worker(cls: type,
         is_driver_worker=is_driver_worker,
     )
 
-    worker.init_model()
+    worker.init_device()
     worker.load_model()
 
     cache_config.num_gpu_blocks = num_gpu_blocks
diff --git a/tests/worker/test_swap.py b/tests/worker/test_swap.py
index 35630a06a900f..5548b2c795222 100644
--- a/tests/worker/test_swap.py
+++ b/tests/worker/test_swap.py
@@ -30,7 +30,7 @@ def test_swap() -> None:
     )
 
     # Initialize the worker.
-    worker.init_model()
+    worker.init_device()
     worker.load_model()
     worker.init_cache_engine(cache_config)
     worker.warm_up_model()
diff --git a/vllm/config.py b/vllm/config.py
index b769ecdce8808..a86114f35e916 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -474,15 +474,7 @@ def __init__(
         placement_group: Optional["PlacementGroup"] = None,
     ) -> None:
         self.pipeline_parallel_size = pipeline_parallel_size
-        if is_neuron():
-            # For Neuron device support, here we assign TP=1 to avoid sharding
-            # within vLLM directly. Transformer-neuronx would take
-            # neuron_tp_degree attribute, and distribute the workload
-            # to multiple NeuronCores.
-            self.tensor_parallel_size = 1
-            self.neuron_tp_degree = tensor_parallel_size
-        else:
-            self.tensor_parallel_size = tensor_parallel_size
+        self.tensor_parallel_size = tensor_parallel_size
         self.worker_use_ray = worker_use_ray
         self.max_parallel_loading_workers = max_parallel_loading_workers
         self.disable_custom_all_reduce = disable_custom_all_reduce
@@ -491,8 +483,7 @@ def __init__(
         self.placement_group = placement_group
 
         self.world_size = pipeline_parallel_size * self.tensor_parallel_size
-        # Ray worker is not supported for Neuron backend.
-        if self.world_size > 1 and not is_neuron():
+        if self.world_size > 1:
             self.worker_use_ray = True
         self._verify_args()
 
@@ -591,10 +582,6 @@ def __init__(self, device: str = "auto") -> None:
             # Set device with device type
             self.device = torch.device(self.device_type)
 
-    @property
-    def is_neuron(self):
-        return self.device_type == "neuron"
-
 
 @dataclass
 class LoRAConfig:
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 8bcd1e0ede6e5..1a463ab1baae7 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -325,7 +325,12 @@ def from_engine_args(cls,
         # Create the engine configs.
         engine_configs = engine_args.create_engine_configs()
         parallel_config = engine_configs[2]
-        if parallel_config.worker_use_ray or engine_args.engine_use_ray:
+        device_config = engine_configs[4]
+
+        if device_config.device_type == "neuron":
+            raise NotImplementedError("Neuron is not supported for "
+                                      "async engine yet.")
+        elif parallel_config.worker_use_ray or engine_args.engine_use_ray:
             initialize_ray_cluster(parallel_config)
             from vllm.executor.ray_gpu_executor import RayGPUExecutorAsync
             executor_class = RayGPUExecutorAsync
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index b726cdd7a2048..7247828418da5 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -125,9 +125,13 @@ def from_engine_args(cls, engine_args: EngineArgs) -> "LLMEngine":
         # Create the engine configs.
         engine_configs = engine_args.create_engine_configs()
         parallel_config = engine_configs[2]
+        device_config = engine_configs[4]
 
         # Initialize the cluster and specify the executor class.
-        if parallel_config.worker_use_ray:
+        if device_config.device_type == "neuron":
+            from vllm.executor.neuron_executor import NeuronExecutor
+            executor_class = NeuronExecutor
+        elif parallel_config.worker_use_ray:
             initialize_ray_cluster(parallel_config)
             from vllm.executor.ray_gpu_executor import RayGPUExecutor
             executor_class = RayGPUExecutor
diff --git a/vllm/executor/gpu_executor.py b/vllm/executor/gpu_executor.py
index 9019ee7763c77..eb2ee262b6733 100644
--- a/vllm/executor/gpu_executor.py
+++ b/vllm/executor/gpu_executor.py
@@ -1,4 +1,3 @@
-import importlib
 from typing import Dict, List, Optional
 
 from vllm.lora.request import LoRARequest
@@ -13,12 +12,6 @@
 
 logger = init_logger(__name__)
 
-# A map between the device type (in device config) to its worker module.
-DEVICE_TO_WORKER_MODULE_MAP = {
-    "cuda": "vllm.worker.worker",
-    "neuron": "vllm.worker.neuron_worker",
-}
-
 
 class GPUExecutor(ExecutorBase):
 
@@ -44,17 +37,10 @@ def __init__(
         # Profile the memory usage and initialize the cache.
         self._init_cache()
 
-    def _dispatch_worker(self):
-        worker_module = DEVICE_TO_WORKER_MODULE_MAP[
-            self.device_config.device_type]
-        imported_worker = importlib.import_module(worker_module)
-        Worker = imported_worker.Worker
-        return Worker
-
     def _init_worker(self):
         # Lazy import the Worker to avoid importing torch.cuda/xformers
         # before CUDA_VISIBLE_DEVICES is set in the Worker
-        Worker = self._dispatch_worker()
+        from vllm.worker.worker import Worker
 
         assert self.parallel_config.world_size == 1, (
             "GPUExecutor only supports single GPU.")
@@ -73,7 +59,7 @@ def _init_worker(self):
             kv_cache_dtype=self.cache_config.cache_dtype,
             is_driver_worker=True,
         )
-        self.driver_worker.init_model()
+        self.driver_worker.init_device()
         self.driver_worker.load_model()
 
     def _init_cache(self) -> None:
diff --git a/vllm/executor/neuron_executor.py b/vllm/executor/neuron_executor.py
new file mode 100644
index 0000000000000..c0ade4767156c
--- /dev/null
+++ b/vllm/executor/neuron_executor.py
@@ -0,0 +1,80 @@
+from typing import Dict, List, Optional
+
+from vllm.lora.request import LoRARequest
+from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
+                         ParallelConfig, SchedulerConfig, LoRAConfig)
+from vllm.executor.executor_base import ExecutorBase
+from vllm.logger import init_logger
+from vllm.sequence import SamplerOutput, SequenceGroupMetadata
+
+logger = init_logger(__name__)
+
+
+class NeuronExecutor(ExecutorBase):
+
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        cache_config: CacheConfig,
+        parallel_config: ParallelConfig,
+        scheduler_config: SchedulerConfig,
+        device_config: DeviceConfig,
+        lora_config: Optional[LoRAConfig],
+    ) -> None:
+        self.model_config = model_config
+        self.cache_config = cache_config
+        assert lora_config is None, "LoRA is not supported for Neuron backend."
+        self.parallel_config = parallel_config
+        self.scheduler_config = scheduler_config
+        self.device_config = device_config
+
+        # Set the number of GPU blocks to be the same as the maximum number of
+        # sequences that can be processed in a single batch. This is equivalent
+        # to schedule without PagedAttention.
+        self.cache_config.num_gpu_blocks = self.scheduler_config.max_num_seqs
+        self.cache_config.num_cpu_blocks = 0
+
+        # Instantiate the worker and load the model to the device.
+        self._init_worker()
+
+    def _init_worker(self):
+        from vllm.worker.neuron_worker import NeuronWorker
+
+        self.driver_worker = NeuronWorker(
+            self.model_config,
+            self.parallel_config,
+            self.scheduler_config,
+            self.device_config,
+        )
+        self.driver_worker.init_device()
+        self.driver_worker.load_model()
+
+    def execute_model(self,
+                      seq_group_metadata_list: List[SequenceGroupMetadata],
+                      blocks_to_swap_in: Dict[int, int],
+                      blocks_to_swap_out: Dict[int, int],
+                      blocks_to_copy: Dict[int, List[int]]) -> SamplerOutput:
+        assert (blocks_to_swap_in == {} and blocks_to_swap_out == {}
+                and blocks_to_copy == {}), (
+                    "Cache operations are not supported for Neuron backend.")
+
+        output = self.driver_worker.execute_model(
+            seq_group_metadata_list=seq_group_metadata_list)
+        return output
+
+    def add_lora(self, lora_request: LoRARequest) -> bool:
+        raise NotImplementedError(
+            "LoRA is not implemented for neuron backend.")
+
+    def remove_lora(self, lora_id: int) -> bool:
+        raise NotImplementedError(
+            "LoRA is not implemented for neuron backend.")
+
+    def list_loras(self) -> List[int]:
+        raise NotImplementedError(
+            "LoRA is not implemented for neuron backend.")
+
+    def check_health(self) -> None:
+        # NeuronExecutor will always be healthy as long as
+        # it's running.
+        return
diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
index 82a2b456895e8..1faf5b7d68faf 100644
--- a/vllm/executor/ray_gpu_executor.py
+++ b/vllm/executor/ray_gpu_executor.py
@@ -3,7 +3,6 @@
 from collections import defaultdict
 import os
 import pickle
-import importlib
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
@@ -25,12 +24,6 @@
 
 logger = init_logger(__name__)
 
-# A map between the device type (in device config) to its worker module.
-DEVICE_TO_WORKER_MODULE_MAP = {
-    "cuda": "vllm.worker.worker",
-    "neuron": "vllm.worker.neuron_worker",
-}
-
 # If the env var is set, it uses the Ray's compiled DAG API
 # which optimizes the control plane overhead.
 # Run vLLM with VLLM_USE_RAY_COMPILED_DAG=1 to enable it.
@@ -73,13 +66,6 @@ def __init__(
         if USE_RAY_COMPILED_DAG:
             self.forward_dag = self._compiled_ray_dag()
 
-    def _dispatch_worker(self):
-        worker_module = DEVICE_TO_WORKER_MODULE_MAP[
-            self.device_config.device_type]
-        imported_worker = importlib.import_module(worker_module)
-        Worker = imported_worker.Worker
-        return Worker
-
     def _init_workers_ray(self, placement_group: "PlacementGroup",
                           **ray_remote_kwargs):
         if self.parallel_config.tensor_parallel_size == 1:
@@ -155,7 +141,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
 
         # Lazy import the Worker to avoid importing torch.cuda/xformers
         # before CUDA_VISIBLE_DEVICES is set in the Worker
-        Worker = self._dispatch_worker()
+        from vllm.worker.worker import Worker
 
         model_config = copy.deepcopy(self.model_config)
         parallel_config = copy.deepcopy(self.parallel_config)
@@ -201,7 +187,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
 
         # FIXME(woosuk): We are not properly initializing cupy NCCL when
         # we have multiple nodes.
-        self._run_workers("init_model",
+        self._run_workers("init_device",
                           cupy_port=get_open_port()
                           if not model_config.enforce_eager else None)
         self._run_workers(
diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py
index f6cd1390d4bce..9975df37b320b 100644
--- a/vllm/lora/layers.py
+++ b/vllm/lora/layers.py
@@ -799,8 +799,8 @@ def __init__(
         self.device = device
 
     @property
-    def logits_as_hidden_states(self):
-        return self.base_layer.logits_as_hidden_states
+    def logits_as_input(self):
+        return self.base_layer.logits_as_input
 
     @property
     def vocab_size(self):
diff --git a/vllm/lora/lora.py b/vllm/lora/lora.py
index fbb228c9582d4..f4b3762a53f13 100644
--- a/vllm/lora/lora.py
+++ b/vllm/lora/lora.py
@@ -1,7 +1,7 @@
 from typing import List, Optional
 
 import torch
-from vllm.utils import in_wsl
+from vllm.utils import is_pin_memory_available
 
 
 class LoRALayerWeights:
@@ -64,7 +64,7 @@ def create_dummy_lora_weights(
             dtype: torch.dtype,
             device: torch.device,
             embeddings_tensor_dim: Optional[int] = None) -> "LoRALayerWeights":
-        pin_memory = str(device) == "cpu" and not in_wsl()
+        pin_memory = str(device) == "cpu" and is_pin_memory_available()
         lora_a = torch.zeros([input_dim, rank],
                              dtype=dtype,
                              device=device,
diff --git a/vllm/lora/models.py b/vllm/lora/models.py
index d1bac7617e1d4..a96b49c236eda 100644
--- a/vllm/lora/models.py
+++ b/vllm/lora/models.py
@@ -11,7 +11,7 @@
 from torch import nn
 
 from vllm.config import LoRAConfig
-from vllm.utils import LRUCache, in_wsl
+from vllm.utils import LRUCache, is_pin_memory_available
 
 from vllm.lora.layers import (BaseLayerWithLoRA, LoRAMapping, from_layer,
                               from_layer_logits_processor)
@@ -143,7 +143,7 @@ def from_lora_tensors(
         embedding_padding_modules: Optional[List[str]] = None,
     ) -> "LoRAModel":
         """Create a LoRAModel from a dictionary of tensors."""
-        pin_memory = str(device) == "cpu" and not in_wsl()
+        pin_memory = str(device) == "cpu" and is_pin_memory_available()
         loras: Dict[str, LoRALayerWeights] = {}
         for tensor_name, tensor in tensors.items():
             module_name, is_lora_a = parse_fine_tuned_lora_name(tensor_name)
diff --git a/vllm/model_executor/__init__.py b/vllm/model_executor/__init__.py
index cd6dbde5f54cf..5f3c78360e2d7 100644
--- a/vllm/model_executor/__init__.py
+++ b/vllm/model_executor/__init__.py
@@ -1,10 +1,9 @@
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.model_executor.utils import set_random_seed, get_model
+from vllm.model_executor.utils import set_random_seed
 
 __all__ = [
     "InputMetadata",
-    "get_model",
     "SamplingMetadata",
     "set_random_seed",
 ]
diff --git a/vllm/model_executor/input_metadata.py b/vllm/model_executor/input_metadata.py
index 35245865fb1b1..8fdac06c82dd7 100644
--- a/vllm/model_executor/input_metadata.py
+++ b/vllm/model_executor/input_metadata.py
@@ -1,8 +1,9 @@
 from dataclasses import dataclass, fields
-from typing import Optional, List, Any, Dict
+from typing import TYPE_CHECKING, Optional, List, Any, Dict
 
 import torch
-from xformers.ops.fmha.attn_bias import AttentionBias
+if TYPE_CHECKING:
+    from xformers.ops.fmha.attn_bias import AttentionBias
 
 
 @dataclass
@@ -82,7 +83,7 @@ def __post_init__(self):
         # when alibi slopes is used. It is because of the limitation
         # from xformer API.
         # will not appear in the __repr__ and __init__
-        self.attn_bias: Optional[List[AttentionBias]] = None
+        self.attn_bias: Optional[List["AttentionBias"]] = None
 
         # Cuda graph is only used for decoding now.
         if self.use_cuda_graph:
diff --git a/vllm/model_executor/layers/logits_processor.py b/vllm/model_executor/layers/logits_processor.py
index e9d2a2708c1bb..28e8f6bb7e638 100644
--- a/vllm/model_executor/layers/logits_processor.py
+++ b/vllm/model_executor/layers/logits_processor.py
@@ -4,8 +4,6 @@
 import torch
 import torch.nn as nn
 
-from vllm.utils import is_neuron
-
 from vllm.model_executor.parallel_utils.communication_op import (
     tensor_model_parallel_gather)
 from vllm.model_executor.sampling_metadata import SamplingMetadata
@@ -23,7 +21,8 @@ class LogitsProcessor(nn.Module):
     def __init__(self,
                  vocab_size: int,
                  org_vocab_size: Optional[int] = None,
-                 scale: Optional[float] = 1.0) -> None:
+                 scale: Optional[float] = 1.0,
+                 logits_as_input: bool = False) -> None:
         """
         Args:
             scale: A scaling factor to apply to the logits.
@@ -31,8 +30,8 @@ def __init__(self,
         super().__init__()
         self.scale = scale
         self.vocab_size = vocab_size
-        # Transformers-neuronx generate outputs as logits directly.
-        self.logits_as_hidden_states = is_neuron()
+        # Whether the input is logits (default is hidden states).
+        self.logits_as_input = logits_as_input
         # original vocabulary size (without LoRA).
         self.org_vocab_size = org_vocab_size or vocab_size
 
@@ -43,7 +42,7 @@ def forward(
         sampling_metadata: SamplingMetadata,
         embedding_bias: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
-        if self.logits_as_hidden_states:
+        if self.logits_as_input:
             logits = hidden_states
         else:
             hidden_states = _prune_hidden_states(hidden_states,
diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py
index 63e494586efb5..84b2125c0b09c 100644
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -4,13 +4,13 @@
 import torch
 import torch.nn as nn
 
+from vllm.model_executor.layers.ops.sample import sample as sample_triton
 from vllm.model_executor.sampling_metadata import (SamplingMetadata,
                                                    SamplingTensors)
 from vllm.sampling_params import SamplingParams, SamplingType
 from vllm.sequence import (Logprob, PromptLogprobs, SampleLogprobs,
                            SamplerOutput, SequenceData, SequenceGroupOutput,
                            SequenceOutput)
-from vllm.model_executor.layers.ops.sample import (sample as sample_triton)
 
 
 class Sampler(nn.Module):
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index 069830c4d7cb5..efadb1c504ca8 100755
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 from vllm.logger import init_logger
-from vllm.utils import is_hip, is_neuron
+from vllm.utils import is_hip
 
 logger = init_logger(__name__)
 
@@ -63,12 +63,6 @@
     "Sliding window attention is not yet supported in ROCm's flash attention",
 }
 
-# Models supported by Neuron.
-_NEURON_SUPPORTED_MODELS = {
-    "LlamaForCausalLM": "neuron.llama",
-    "MistralForCausalLM": "neuron.mistral"
-}
-
 
 class ModelRegistry:
 
@@ -85,15 +79,8 @@ def load_model_cls(model_arch: str) -> Optional[Type[nn.Module]]:
                 logger.warning(
                     f"Model architecture {model_arch} is partially supported "
                     "by ROCm: " + _ROCM_PARTIALLY_SUPPORTED_MODELS[model_arch])
-        elif is_neuron():
-            if model_arch not in _NEURON_SUPPORTED_MODELS:
-                raise ValueError(
-                    f"Model architecture {model_arch} is not supported by "
-                    "Neuron for now.")
 
         module_name, model_cls_name = _MODELS[model_arch]
-        if is_neuron():
-            module_name = _NEURON_SUPPORTED_MODELS[model_arch]
         module = importlib.import_module(
             f"vllm.model_executor.models.{module_name}")
         return getattr(module, model_cls_name, None)
diff --git a/vllm/model_executor/models/neuron/llama.py b/vllm/model_executor/models/neuron/llama.py
deleted file mode 100644
index 32c43c4944fac..0000000000000
--- a/vllm/model_executor/models/neuron/llama.py
+++ /dev/null
@@ -1,86 +0,0 @@
-"""Inference-only LLaMA model compatible with HuggingFace weights."""
-import os
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from transformers import LlamaConfig
-
-from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.logits_processor import LogitsProcessor
-from vllm.model_executor.layers.sampler import Sampler
-from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.sequence import SamplerOutput
-
-KVCache = Tuple[torch.Tensor, torch.Tensor]
-
-
-class LlamaForCausalLM(nn.Module):
-
-    def __init__(
-        self,
-        config: LlamaConfig,
-        linear_method=None,
-    ) -> None:
-        super().__init__()
-        self.config = config
-        self.linear_method = linear_method
-        self.model = None
-        self.logits_processor = LogitsProcessor(config.vocab_size)
-        self.sampler = Sampler()
-
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        positions: torch.Tensor,
-        kv_caches: List[KVCache],
-        input_metadata: InputMetadata,
-    ) -> torch.Tensor:
-        with torch.inference_mode():
-            block_size = self.model.context_buckets[-1]
-            if input_metadata.is_prompt:
-                seq_ids = input_metadata.slot_mapping[:, 0] // block_size
-            else:
-                seq_ids = input_metadata.block_tables
-            logits = self.model(input_ids,
-                                cache_ids=positions,
-                                start_ids=seq_ids.flatten())
-        return logits
-
-    def compute_logits(self, hidden_states: torch.Tensor,
-                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
-        logits = self.logits_processor(self.model.chkpt_model.lm_head,
-                                       hidden_states, sampling_metadata)
-        return logits
-
-    def sample(
-        self,
-        logits: torch.Tensor,
-        sampling_metadata: SamplingMetadata,
-    ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(logits, sampling_metadata)
-        return next_tokens
-
-    def load_weights(self,
-                     model_name_or_path: str,
-                     cache_dir: Optional[str] = None,
-                     load_format: str = "auto",
-                     revision: Optional[str] = None,
-                     **kwargs):
-        from transformers_neuronx.llama.model import LlamaForSampling
-
-        split_model_dir = f"{model_name_or_path}-split"
-        if os.path.isdir(os.path.join(model_name_or_path,
-                                      "pytorch_model.bin")):
-            split_model_dir = model_name_or_path
-        elif not os.path.exists(f"{model_name_or_path}-split"):
-            from transformers.models.llama import LlamaForCausalLM
-            from transformers_neuronx.module import save_pretrained_split
-
-            hf_model = LlamaForCausalLM.from_pretrained(model_name_or_path,
-                                                        low_cpu_mem_usage=True)
-            save_pretrained_split(hf_model, f"{model_name_or_path}-split")
-
-        self.model = LlamaForSampling.from_pretrained(split_model_dir,
-                                                      **kwargs)
-        self.model.to_neuron()
diff --git a/vllm/model_executor/models/neuron/mistral.py b/vllm/model_executor/models/neuron/mistral.py
deleted file mode 100755
index 24fc0fa0aacab..0000000000000
--- a/vllm/model_executor/models/neuron/mistral.py
+++ /dev/null
@@ -1,89 +0,0 @@
-"""Inference-only Mistral model compatible with HuggingFace weights."""
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from transformers import MistralConfig
-
-from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.logits_processor import LogitsProcessor
-from vllm.model_executor.layers.sampler import Sampler
-from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.sequence import SamplerOutput
-import os
-
-KVCache = Tuple[torch.Tensor, torch.Tensor]
-
-
-class MistralForCausalLM(nn.Module):
-
-    def __init__(
-        self,
-        config: MistralConfig,
-        linear_method=None,
-    ) -> None:
-        super().__init__()
-        self.config = config
-        self.linear_method = linear_method
-        self.model = None
-        self.lm_head = None
-        self.logits_processor = LogitsProcessor(config.vocab_size)
-        self.sampler = Sampler()
-
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        positions: torch.Tensor,
-        kv_caches: List[KVCache],
-        input_metadata: InputMetadata,
-    ) -> SamplerOutput:
-        with torch.inference_mode():
-            seq_ids = []
-            block_size = self.model.context_buckets[-1]
-            if input_metadata.is_prompt:
-                seq_ids = input_metadata.slot_mapping[:, 0] // block_size
-            else:
-                seq_ids = input_metadata.block_tables
-
-            logits = self.model(input_ids,
-                                cache_ids=positions,
-                                start_ids=seq_ids)
-        return logits
-
-    def compute_logits(self, hidden_states: torch.Tensor,
-                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
-        logits = self.logits_processor(self.model.chkpt_model.lm_head,
-                                       hidden_states, sampling_metadata)
-        return logits
-
-    def sample(
-        self,
-        logits: torch.Tensor,
-        sampling_metadata: SamplingMetadata,
-    ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(logits, sampling_metadata)
-        return next_tokens
-
-    def load_weights(self,
-                     model_name_or_path: str,
-                     cache_dir: Optional[str] = None,
-                     load_format: str = "auto",
-                     revision: Optional[str] = None,
-                     **kwargs):
-        from transformers_neuronx.mistral.model import MistralForSampling
-
-        split_model_dir = f"{model_name_or_path}-split"
-        if os.path.isdir(os.path.join(model_name_or_path,
-                                      "pytorch_model.bin")):
-            split_model_dir = model_name_or_path
-        elif not os.path.exists(f"{model_name_or_path}-split"):
-            from transformers import MistralForCausalLM
-            from transformers_neuronx.module import save_pretrained_split
-
-            hf_model = MistralForCausalLM.from_pretrained(
-                model_name_or_path, low_cpu_mem_usage=True)
-            save_pretrained_split(hf_model, f"{model_name_or_path}-split")
-
-        self.model = MistralForSampling.from_pretrained(
-            split_model_dir, **kwargs)
-        self.model.to_neuron()
diff --git a/vllm/model_executor/neuron_model_loader.py b/vllm/model_executor/neuron_model_loader.py
index c434b270a5562..5ad9040478398 100644
--- a/vllm/model_executor/neuron_model_loader.py
+++ b/vllm/model_executor/neuron_model_loader.py
@@ -1,12 +1,18 @@
-"""Utilities for selecting and loading models."""
-from typing import Type
+"""Utilities for selecting and loading neuron models."""
+import importlib
+import os
+from typing import Optional, Type
 
 import torch
 import torch.nn as nn
+import transformers
 from transformers import PretrainedConfig
 
-from vllm.config import ModelConfig, DeviceConfig
-from vllm.model_executor.models import ModelRegistry
+from vllm.config import ModelConfig, ParallelConfig, SchedulerConfig
+from vllm.model_executor.layers.logits_processor import LogitsProcessor
+from vllm.model_executor.layers.sampler import Sampler
+from vllm.model_executor.sampling_metadata import SamplingMetadata
+from vllm.sequence import SamplerOutput
 
 TORCH_DTYPE_TO_NEURON_AMP = {
     "auto": "f32",
@@ -20,31 +26,95 @@
     torch.float32: "f32",
 }
 
+# Models supported by Neuron.
+_NEURON_SUPPORTED_MODELS = {
+    "LlamaForCausalLM": ("transformers_neuronx.llama.model",
+                         "LlamaForSampling", "LlamaForCausalLM"),
+    "MistralForCausalLM": ("transformers_neuronx.mistral.model",
+                           "MistralForSampling", "MistralForCausalLM")
+}
+
+
+class NeuronCasualLM(nn.Module):
+
+    def __init__(
+        self,
+        config: PretrainedConfig,
+    ) -> None:
+        super().__init__()
+        self.config = config
+        self.model = None
+        self.logits_processor = LogitsProcessor(config.vocab_size,
+                                                logits_as_input=True)
+        self.sampler = Sampler()
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+        input_block_ids: torch.Tensor,
+    ) -> torch.Tensor:
+        logits = self.model(input_ids,
+                            cache_ids=positions,
+                            start_ids=input_block_ids)
+        return logits
+
+    def compute_logits(self, hidden_states: torch.Tensor,
+                       sampling_metadata: SamplingMetadata) -> torch.Tensor:
+        logits = self.logits_processor(None, hidden_states, sampling_metadata)
+        return logits
+
+    def sample(
+        self,
+        logits: torch.Tensor,
+        sampling_metadata: SamplingMetadata,
+    ) -> Optional[SamplerOutput]:
+        next_tokens = self.sampler(logits, sampling_metadata)
+        return next_tokens
+
+    def load_weights(self, model_name_or_path: str, **kwargs):
+        arch = _get_model_architecture(self.config)
+        neuronx_module_path, neuronx_model_cls, hf_model_cls = (
+            _NEURON_SUPPORTED_MODELS[arch])
+        neuronx_module = importlib.import_module(neuronx_module_path)
+        neuronx_model_cls = getattr(neuronx_module, neuronx_model_cls)
+
+        split_model_dir = f"{model_name_or_path}-split"
+        if os.path.isdir(os.path.join(model_name_or_path,
+                                      "pytorch_model.bin")):
+            split_model_dir = model_name_or_path
+        elif not os.path.exists(f"{model_name_or_path}-split"):
+            hf_model_cls = getattr(transformers, hf_model_cls)
+            from transformers_neuronx.module import save_pretrained_split
+
+            hf_model = hf_model_cls.from_pretrained(model_name_or_path,
+                                                    low_cpu_mem_usage=True)
+            save_pretrained_split(hf_model, f"{model_name_or_path}-split")
+
+        self.model = neuronx_model_cls.from_pretrained(split_model_dir,
+                                                       **kwargs)
+        self.model.to_neuron()
+
 
 def _get_model_architecture(config: PretrainedConfig) -> Type[nn.Module]:
     architectures = getattr(config, "architectures", [])
     for arch in architectures:
-        model_cls = ModelRegistry.load_model_cls(arch)
-        if model_cls is not None:
-            return model_cls
+        if arch in _NEURON_SUPPORTED_MODELS:
+            return arch
     raise ValueError(
-        f"Model architectures {architectures} are not supported for now. "
-        f"Supported architectures: {ModelRegistry.get_supported_archs()}")
+        f"Model architectures {architectures} are not supported on Neuron "
+        f"for now. Supported architectures: "
+        f"{list(_NEURON_SUPPORTED_MODELS.keys())}")
 
 
-def get_model(model_config: ModelConfig, device_config: DeviceConfig,
-              **kwargs) -> nn.Module:
+def get_neuron_model(model_config: ModelConfig,
+                     parallel_config: ParallelConfig,
+                     scheduler_config: SchedulerConfig) -> nn.Module:
     from transformers_neuronx.config import (NeuronConfig,
                                              ContinuousBatchingConfig)
 
-    parallel_config = kwargs.get("parallel_config")
-    scheduler_config = kwargs.get("scheduler_config")
-
-    model_class = _get_model_architecture(model_config.hf_config)
-    linear_method = None
-
     # Create a model instance.
-    model = model_class(model_config.hf_config, linear_method)
+    model = NeuronCasualLM(model_config.hf_config)
 
     continuous_batching_config = ContinuousBatchingConfig(
         batch_size_for_shared_caches=scheduler_config.max_num_seqs)
@@ -54,10 +124,7 @@ def get_model(model_config: ModelConfig, device_config: DeviceConfig,
     # Load the weights from the cached or downloaded files.
     model.load_weights(
         model_config.model,
-        model_config.download_dir,
-        model_config.load_format,
-        model_config.revision,
-        tp_degree=parallel_config.neuron_tp_degree,
+        tp_degree=parallel_config.tensor_parallel_size,
         amp=TORCH_DTYPE_TO_NEURON_AMP[model_config.dtype],
         neuron_config=neuron_config,
         context_length_estimate=[scheduler_config.max_model_len],
diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py
index 7d08feb3fee1c..0ea850791cf4b 100644
--- a/vllm/model_executor/sampling_metadata.py
+++ b/vllm/model_executor/sampling_metadata.py
@@ -4,11 +4,11 @@
 import torch
 import random
 
-from vllm.sampling_params import SamplingParams, SamplingType
-from vllm.sequence import SequenceData
-from vllm.utils import in_wsl, is_neuron
 from vllm.model_executor.layers.ops.sample import (
     get_num_triton_sampler_splits)
+from vllm.sampling_params import SamplingParams, SamplingType
+from vllm.sequence import SequenceData
+from vllm.utils import is_pin_memory_available
 
 _SAMPLING_EPS = 1e-5
 _SEED_0_REPLACEMENT = 3403598558
@@ -213,7 +213,7 @@ def from_lists(cls, temperatures: List[float], top_ps: List[float],
                    dtype: torch.dtype) -> "SamplingTensors":
         # Note that the performance will be very bad without
         # pinned memory.
-        pin_memory = not in_wsl() and not is_neuron()
+        pin_memory = is_pin_memory_available()
         prompt_max_len = max(len(tokens) for tokens in prompt_tokens)
         prompt_padded_tokens = [
             tokens + [vocab_size] * (prompt_max_len - len(tokens))
diff --git a/vllm/model_executor/utils.py b/vllm/model_executor/utils.py
index 0113e3edf0675..336bc1cd005cf 100644
--- a/vllm/model_executor/utils.py
+++ b/vllm/model_executor/utils.py
@@ -1,18 +1,10 @@
 """Utils for model executor."""
 import random
-import importlib
 from typing import Any, Dict, Optional
 
 import numpy as np
 import torch
 
-from vllm.config import DeviceConfig, ModelConfig
-
-DEVICE_TO_MODEL_LOADER_MAP = {
-    "cuda": "model_loader",
-    "neuron": "neuron_model_loader",
-}
-
 
 def set_random_seed(seed: int) -> None:
     random.seed(seed)
@@ -41,12 +33,3 @@ def set_weight_attrs(
         assert not hasattr(
             weight, key), (f"Overwriting existing tensor attribute: {key}")
         setattr(weight, key, value)
-
-
-def get_model(model_config: ModelConfig, device_config: DeviceConfig,
-              **kwargs) -> torch.nn.Module:
-    model_loader_module = DEVICE_TO_MODEL_LOADER_MAP[device_config.device_type]
-    imported_model_loader = importlib.import_module(
-        f"vllm.model_executor.{model_loader_module}")
-    get_model_fn = imported_model_loader.get_model
-    return get_model_fn(model_config, device_config, **kwargs)
diff --git a/vllm/spec_decode/metrics.py b/vllm/spec_decode/metrics.py
index 65a2a4a63a98f..1d9b00b3e4d38 100644
--- a/vllm/spec_decode/metrics.py
+++ b/vllm/spec_decode/metrics.py
@@ -2,7 +2,7 @@
 from dataclasses import dataclass
 from vllm.model_executor.layers.rejection_sampler import RejectionSampler
 from typing import Optional
-from vllm.utils import in_wsl
+from vllm.utils import is_pin_memory_available
 import time
 from typing import Callable
 
@@ -63,7 +63,7 @@ def __init__(self,
 
         self._in_flight_copy: Optional[torch.cuda.Event] = None
 
-        pin_memory = not in_wsl()
+        pin_memory = is_pin_memory_available()
         self._aggregate_num_accepted_tokens = torch.tensor(
             0, dtype=torch.long, device="cpu", pin_memory=pin_memory)
         self._aggregate_num_emitted_tokens = torch.tensor(
diff --git a/vllm/spec_decode/multi_step_worker.py b/vllm/spec_decode/multi_step_worker.py
index 0915c275b0408..0d9a6f9187cbc 100644
--- a/vllm/spec_decode/multi_step_worker.py
+++ b/vllm/spec_decode/multi_step_worker.py
@@ -27,8 +27,8 @@ def __init__(self, *args, **kwargs):
 
         self._proposer: Optional[DraftModelTop1Proposer] = None
 
-    def init_model(self):
-        super().init_model()
+    def init_device(self):
+        super().init_device()
 
         self._proposer = DraftModelTop1Proposer(
             self,
diff --git a/vllm/spec_decode/spec_decode_worker.py b/vllm/spec_decode/spec_decode_worker.py
index 1e56741347008..87837ad1aa71b 100644
--- a/vllm/spec_decode/spec_decode_worker.py
+++ b/vllm/spec_decode/spec_decode_worker.py
@@ -79,13 +79,13 @@ def __init__(
 
         self.scorer: SpeculativeScorer = None
 
-    def init_model(self) -> None:
+    def init_device(self) -> None:
         """Initialize both scorer and proposer models.
         """
         # The scorer worker model is initialized first in case the proposer
         # model has a smaller TP degree than the target worker.
-        self.scorer_worker.init_model()
-        self.proposer_worker.init_model()
+        self.scorer_worker.init_device()
+        self.proposer_worker.init_device()
 
         self._metrics.init_gpu_tensors(self.rank)
         self.rejection_sampler.init_gpu_tensors(self.rank)
diff --git a/vllm/utils.py b/vllm/utils.py
index 8fa372b5f7f09..13b3621a89638 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -338,7 +338,27 @@ def create_kv_caches_with_random(
     return key_caches, value_caches
 
 
-class measure_cuda_memory:
+@lru_cache
+def print_warning_once(msg: str) -> None:
+    logger.warning(msg)
+
+
+@lru_cache(maxsize=None)
+def is_pin_memory_available() -> bool:
+
+    if in_wsl():
+        # Pinning memory in WSL is not supported.
+        # https://docs.nvidia.com/cuda/wsl-user-guide/index.html#known-limitations-for-linux-cuda-applications
+        print_warning_once("Using 'pin_memory=False' as WSL is detected. "
+                           "This may slow down the performance.")
+        return False
+    elif is_neuron():
+        print_warning_once("Pin memory is not supported on Neuron.")
+        return False
+    return True
+
+
+class CudaMemoryProfiler:
 
     def __init__(self, device=None):
         self.device = device
@@ -360,3 +380,44 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
         # Force garbage collection
         gc.collect()
+
+
+def pad_to_max_length(x: List[int], max_len: int, pad: int) -> List[int]:
+    assert len(x) <= max_len
+    return x + [pad] * (max_len - len(x))
+
+
+def make_tensor_with_pad(
+    x: List[List[int]],
+    max_len: int,
+    pad: int,
+    dtype: torch.dtype,
+    device: Optional[Union[str, torch.device]],
+) -> torch.Tensor:
+    """Make a padded tensor of a 2D inputs.
+
+    The padding is applied to the end of each inner list until it reaches
+    `max_len`.
+    """
+    padded_x = [pad_to_max_length(x_i, max_len, pad) for x_i in x]
+    return torch.tensor(padded_x, dtype=dtype, device=device)
+
+
+def async_tensor_h2d(
+    data: list,
+    dtype: torch.dtype,
+    target_device: Union[str, torch.device],
+    pin_memory: bool,
+) -> torch.Tensor:
+    """Asynchronously create a tensor and copy it from host to device."""
+    t = torch.tensor(data, dtype=dtype, pin_memory=pin_memory, device="cpu")
+    return t.to(device=target_device, non_blocking=True)
+
+
+def maybe_expand_dim(tensor: torch.Tensor,
+                     target_dims: int,
+                     size: int = 1) -> torch.Tensor:
+    """Expand the tensor to the target_dims."""
+    if tensor.ndim < target_dims:
+        tensor = tensor.view(-1, *([size] * (target_dims - tensor.ndim)))
+    return tensor
diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
index 1782fe7e57177..307b7b778cb3f 100644
--- a/vllm/worker/cache_engine.py
+++ b/vllm/worker/cache_engine.py
@@ -5,7 +5,7 @@
 
 from vllm.config import CacheConfig, ModelConfig, ParallelConfig
 from vllm.logger import init_logger
-from vllm.utils import in_wsl, is_neuron, STR_DTYPE_TO_TORCH_DTYPE
+from vllm.utils import is_pin_memory_available, STR_DTYPE_TO_TORCH_DTYPE
 
 logger = init_logger(__name__)
 
@@ -38,10 +38,6 @@ def __init__(
         self.num_gpu_blocks = cache_config.num_gpu_blocks
         self.num_cpu_blocks = cache_config.num_cpu_blocks
 
-        # Skip initializing KV cache for Neuron backend.
-        if is_neuron():
-            return
-
         if cache_config.cache_dtype == "auto":
             self.dtype = model_config.dtype
         else:
@@ -90,12 +86,7 @@ def allocate_cpu_cache(self) -> List[KVCache]:
         cpu_cache: List[KVCache] = []
         key_block_shape = self.get_key_block_shape()
         value_block_shape = self.get_value_block_shape()
-        pin_memory = not in_wsl()
-        if not pin_memory:
-            # Pinning memory in WSL is not supported.
-            # https://docs.nvidia.com/cuda/wsl-user-guide/index.html#known-limitations-for-linux-cuda-applications
-            logger.warning("Using 'pin_memory=False' as WSL is detected. "
-                           "This may slow down the performance.")
+        pin_memory = is_pin_memory_available()
         for _ in range(self.num_layers):
             key_blocks = torch.empty(
                 size=(self.num_cpu_blocks, *key_block_shape),
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 347b9380f1113..b8eeb51379f49 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -1,6 +1,6 @@
 import contextlib
 import time
-from typing import Dict, List, Optional, Tuple, Set, Union
+from typing import Dict, List, Optional, Tuple, Set
 
 import numpy as np
 import torch
@@ -9,7 +9,8 @@
 from vllm.config import (DeviceConfig, ModelConfig, LoRAConfig, ParallelConfig,
                          SchedulerConfig)
 from vllm.logger import init_logger
-from vllm.model_executor import get_model, InputMetadata, SamplingMetadata
+from vllm.model_executor import InputMetadata, SamplingMetadata
+from vllm.model_executor.model_loader import get_model
 from vllm.model_executor.parallel_utils import cupy_utils
 from vllm.model_executor.parallel_utils.communication_op import (
     broadcast_tensor_dict)
@@ -21,7 +22,9 @@
 from vllm.lora.worker_manager import LRUCacheWorkerLoRAManager
 from vllm.lora.layers import LoRAMapping
 from vllm.lora.request import LoRARequest
-from vllm.utils import in_wsl, measure_cuda_memory
+from vllm.utils import (async_tensor_h2d, CudaMemoryProfiler,
+                        is_pin_memory_available, make_tensor_with_pad,
+                        maybe_expand_dim)
 
 logger = init_logger(__name__)
 
@@ -79,16 +82,11 @@ def __init__(
         # The shape of the cached block table will be
         # (max batch size to capture, max context len to capture / block size).
         self.graph_block_tables = None  # Set after initial profiling.
-        # cache in_wsl result
-        self.in_wsl = in_wsl()
+        self.pin_memory = is_pin_memory_available()
         self.kv_cache_dtype = kv_cache_dtype
 
-        # Set enforce_eager to True for Neuron backend, to avoid capturing graph
-        if self.device_config.is_neuron:
-            self.model_config.enforce_eager = True
-
     def load_model(self) -> None:
-        with measure_cuda_memory() as m:
+        with CudaMemoryProfiler() as m:
             self.model = get_model(self.model_config,
                                    self.device_config,
                                    lora_config=self.lora_config,
@@ -238,7 +236,7 @@ def _prepare_prompt(
                                            device=self.device)
         # Prepare prefix block tables
         max_prompt_block_table_len = max(len(t) for t in prefix_block_tables)
-        block_tables = _make_tensor_with_pad(
+        block_tables = make_tensor_with_pad(
             prefix_block_tables,
             max_len=max_prompt_block_table_len,
             pad=0,
@@ -395,7 +393,7 @@ def _prepare_decode(
         else:
             max_block_table_len = max(
                 len(block_table) for block_table in block_tables)
-            block_tables = _make_tensor_with_pad(
+            block_tables = make_tensor_with_pad(
                 block_tables,
                 max_len=max_block_table_len,
                 pad=0,
@@ -436,7 +434,6 @@ def _prepare_sample(
         categorized_sample_indices = {t: [] for t in SamplingType}
         categorized_sample_indices_start_idx = 0
         categorized_sampled_token_indices_start_idx = 0
-        pin_memory = not self.in_wsl and not self.device_config.is_neuron
 
         for i, seq_group_metadata in enumerate(seq_group_metadata_list):
             seq_ids = list(seq_group_metadata.seq_data.keys())
@@ -469,7 +466,7 @@ def _prepare_sample(
 
                 if sampling_params.seed is not None:
                     seq_group_metadata.state.generator = torch.Generator(
-                        device="cuda").manual_seed(sampling_params.seed)
+                        device=self.device).manual_seed(sampling_params.seed)
             else:
                 num_seqs = len(seq_ids)
                 selected_token_indices.extend(
@@ -494,17 +491,17 @@ def _prepare_sample(
             if sampling_params.seed is not None:
                 generators.append(seq_group_metadata.state.generator)
 
-        selected_token_indices = _async_h2d(selected_token_indices,
-                                            dtype=torch.long,
-                                            target_device=self.device,
-                                            pin_memory=not self.in_wsl)
+        selected_token_indices = async_tensor_h2d(selected_token_indices,
+                                                  dtype=torch.long,
+                                                  target_device=self.device,
+                                                  pin_memory=self.pin_memory)
 
         categorized_sample_indices = {
-            t: _maybe_expand_dim(
-                _async_h2d(seq_ids,
-                           dtype=torch.int,
-                           target_device=self.device,
-                           pin_memory=pin_memory), 2, 2)
+            t: maybe_expand_dim(
+                async_tensor_h2d(seq_ids,
+                                 dtype=torch.int,
+                                 target_device=self.device,
+                                 pin_memory=self.pin_memory), 2, 2)
             for t, seq_ids in categorized_sample_indices.items()
         }
 
@@ -910,27 +907,6 @@ def _maybe_cupy_nccl():
         yield
 
 
-def _pad_to_max(x: List[int], max_len: int, pad: int) -> List[int]:
-    assert len(x) <= max_len
-    return x + [pad] * (max_len - len(x))
-
-
-def _make_tensor_with_pad(
-    x: List[List[int]],
-    max_len: int,
-    pad: int,
-    dtype: torch.dtype,
-    device: Optional[Union[str, torch.device]],
-) -> torch.Tensor:
-    """Make a padded tensor of a 2D inputs.
-
-    The padding is applied to the end of each inner list until it reaches
-    `max_len`.
-    """
-    padded_x = [_pad_to_max(x_i, max_len, pad) for x_i in x]
-    return torch.tensor(padded_x, dtype=dtype, device=device)
-
-
 def _get_graph_batch_size(batch_size: int) -> int:
     """Returns the padded batch size given actual batch size.
 
@@ -944,21 +920,3 @@ def _get_graph_batch_size(batch_size: int) -> int:
     else:
         return ((batch_size + _BATCH_SIZE_ALIGNMENT - 1) //
                 _BATCH_SIZE_ALIGNMENT * _BATCH_SIZE_ALIGNMENT)
-
-
-def _async_h2d(
-    data: list,
-    dtype: torch.dtype,
-    target_device: Union[str, torch.device],
-    pin_memory: bool,
-) -> torch.Tensor:
-    t = torch.tensor(data, dtype=dtype, pin_memory=pin_memory, device="cpu")
-    return t.to(device=target_device, non_blocking=True)
-
-
-def _maybe_expand_dim(tensor: torch.Tensor,
-                      target_dims: int,
-                      size: int = 1) -> torch.Tensor:
-    if tensor.ndim < target_dims:
-        tensor = tensor.view(-1, *([size] * (target_dims - tensor.ndim)))
-    return tensor
diff --git a/vllm/worker/neuron_model_runner.py b/vllm/worker/neuron_model_runner.py
new file mode 100644
index 0000000000000..ded22b9a3ac0f
--- /dev/null
+++ b/vllm/worker/neuron_model_runner.py
@@ -0,0 +1,287 @@
+from typing import Dict, List, Optional, Tuple
+
+import torch
+
+from vllm.config import (DeviceConfig, ModelConfig, ParallelConfig,
+                         SchedulerConfig)
+from vllm.logger import init_logger
+from vllm.model_executor import SamplingMetadata
+from vllm.model_executor.neuron_model_loader import get_neuron_model
+from vllm.sampling_params import SamplingParams, SamplingType
+from vllm.sequence import SamplerOutput, SequenceData, SequenceGroupMetadata
+from vllm.utils import (async_tensor_h2d, is_pin_memory_available,
+                        make_tensor_with_pad, maybe_expand_dim)
+
+logger = init_logger(__name__)
+
+KVCache = Tuple[torch.Tensor, torch.Tensor]
+
+
+class NeuronModelRunner:
+
+    def __init__(
+        self,
+        model_config: ModelConfig,
+        parallel_config: ParallelConfig,
+        scheduler_config: SchedulerConfig,
+        device_config: DeviceConfig,
+    ):
+        self.model_config = model_config
+        self.parallel_config = parallel_config
+        self.scheduler_config = scheduler_config
+
+        if model_config is not None and model_config.get_sliding_window():
+            logger.warning("Sliding window is not supported on Neuron. "
+                           "The model will run without sliding window.")
+        self.device_config = (device_config
+                              if device_config is not None else DeviceConfig())
+        self.device = self.device_config.device
+        self.model = None
+        self.pin_memory = is_pin_memory_available()
+
+    def load_model(self) -> None:
+        self.model = get_neuron_model(self.model_config,
+                                      parallel_config=self.parallel_config,
+                                      scheduler_config=self.scheduler_config)
+
+    def _prepare_prompt(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, List[int]]:
+        assert len(seq_group_metadata_list) > 0
+        input_tokens: List[List[int]] = []
+        input_positions: List[List[int]] = []
+        input_block_ids: List[int] = []
+
+        prompt_lens: List[int] = []
+        for seq_group_metadata in seq_group_metadata_list:
+            assert seq_group_metadata.is_prompt
+            seq_ids = list(seq_group_metadata.seq_data.keys())
+            assert len(seq_ids) == 1
+            seq_id = seq_ids[0]
+
+            seq_data = seq_group_metadata.seq_data[seq_id]
+            prompt_tokens = seq_data.get_token_ids()
+            prompt_len = len(prompt_tokens)
+            prompt_lens.append(prompt_len)
+
+            input_tokens.append(prompt_tokens)
+            input_positions.append(list(range(prompt_len)))
+
+            assert seq_group_metadata.block_tables is not None
+            block_table = seq_group_metadata.block_tables[seq_id]
+            assert len(block_table) == 1
+            input_block_ids.append(block_table[0])
+
+        max_prompt_len = max(prompt_lens)
+        assert max_prompt_len > 0
+        input_tokens = make_tensor_with_pad(input_tokens,
+                                            max_prompt_len,
+                                            pad=0,
+                                            dtype=torch.long,
+                                            device=self.device)
+        input_positions = make_tensor_with_pad(input_positions,
+                                               max_prompt_len,
+                                               pad=0,
+                                               dtype=torch.long,
+                                               device=self.device)
+        input_block_ids = torch.tensor(input_block_ids,
+                                       dtype=torch.long,
+                                       device=self.device)
+
+        return input_tokens, input_positions, input_block_ids, prompt_lens
+
+    def _prepare_decode(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        assert len(seq_group_metadata_list) > 0
+        input_tokens: List[List[int]] = []
+        input_positions: List[List[int]] = []
+        input_block_ids: List[int] = []
+        context_lens: List[int] = []
+
+        for seq_group_metadata in seq_group_metadata_list:
+            assert not seq_group_metadata.is_prompt
+
+            seq_ids = list(seq_group_metadata.seq_data.keys())
+
+            for seq_id in seq_ids:
+                seq_data = seq_group_metadata.seq_data[seq_id]
+                generation_token = seq_data.get_last_token_id()
+                input_tokens.append([generation_token])
+
+                seq_len = seq_data.get_len()
+                position = seq_len - 1
+                input_positions.append([position])
+                context_lens.append(seq_len)
+
+                assert seq_group_metadata.block_tables is not None
+                block_table = seq_group_metadata.block_tables[seq_id]
+                assert len(block_table) == 1
+                input_block_ids.append(block_table[0])
+
+        input_tokens = make_tensor_with_pad(input_tokens,
+                                            max_len=1,
+                                            pad=0,
+                                            dtype=torch.long,
+                                            device=self.device)
+        input_positions = make_tensor_with_pad(input_positions,
+                                               max_len=1,
+                                               pad=0,
+                                               dtype=torch.long,
+                                               device=self.device)
+        context_lens = torch.tensor(context_lens,
+                                    dtype=torch.int,
+                                    device=self.device)
+        input_block_ids = torch.tensor(input_block_ids,
+                                       dtype=torch.long,
+                                       device=self.device)
+
+        return input_tokens, input_positions, input_block_ids
+
+    def _prepare_sample(
+        self,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
+        prompt_lens: List[int],
+    ) -> SamplingMetadata:
+        seq_groups: List[Tuple[List[int], SamplingParams]] = []
+        selected_token_indices: List[int] = []
+        generators: List[torch.Generator] = []
+        selected_token_start_idx = 0
+        categorized_sample_indices = {t: [] for t in SamplingType}
+        categorized_sample_indices_start_idx = 0
+        categorized_sampled_token_indices_start_idx = 0
+
+        for i, seq_group_metadata in enumerate(seq_group_metadata_list):
+            seq_ids = list(seq_group_metadata.seq_data.keys())
+            sampling_params = seq_group_metadata.sampling_params
+            seq_groups.append((seq_ids, sampling_params))
+
+            if seq_group_metadata.is_prompt:
+                assert len(seq_ids) == 1
+                assert prompt_lens is not None
+                prompt_len = prompt_lens[i]
+                if sampling_params.prompt_logprobs is not None:
+                    # NOTE: prompt token positions do not need sample, skip
+                    categorized_sample_indices_start_idx += prompt_len - 1
+
+                categorized_sample_indices[
+                    sampling_params.sampling_type].append([
+                        categorized_sample_indices_start_idx,
+                        categorized_sampled_token_indices_start_idx
+                    ])
+                categorized_sample_indices_start_idx += 1
+                categorized_sampled_token_indices_start_idx += 1
+
+                if sampling_params.prompt_logprobs is not None:
+                    selected_token_indices.extend(
+                        range(selected_token_start_idx,
+                              selected_token_start_idx + prompt_len - 1))
+                selected_token_indices.append(selected_token_start_idx +
+                                              prompt_len - 1)
+                selected_token_start_idx += prompt_len
+
+                if sampling_params.seed is not None:
+                    seq_group_metadata.state.generator = torch.Generator(
+                        device=self.device).manual_seed(sampling_params.seed)
+            else:
+                num_seqs = len(seq_ids)
+                selected_token_indices.extend(
+                    range(selected_token_start_idx,
+                          selected_token_start_idx + num_seqs))
+                selected_token_start_idx += num_seqs
+
+                categorized_sample_indices[
+                    sampling_params.sampling_type].extend(
+                        zip(
+                            range(
+                                categorized_sample_indices_start_idx,
+                                categorized_sample_indices_start_idx +
+                                num_seqs),
+                            range(
+                                categorized_sampled_token_indices_start_idx,
+                                categorized_sampled_token_indices_start_idx +
+                                num_seqs)))
+                categorized_sample_indices_start_idx += num_seqs
+                categorized_sampled_token_indices_start_idx += num_seqs
+
+            if sampling_params.seed is not None:
+                generators.append(seq_group_metadata.state.generator)
+
+        selected_token_indices = async_tensor_h2d(selected_token_indices,
+                                                  dtype=torch.long,
+                                                  target_device=self.device,
+                                                  pin_memory=self.pin_memory)
+
+        categorized_sample_indices = {
+            t: maybe_expand_dim(
+                async_tensor_h2d(seq_ids,
+                                 dtype=torch.int,
+                                 target_device=self.device,
+                                 pin_memory=self.pin_memory), 2, 2)
+            for t, seq_ids in categorized_sample_indices.items()
+        }
+
+        seq_data: Dict[int, SequenceData] = {}
+        for seq_group_metadata in seq_group_metadata_list:
+            seq_data.update(seq_group_metadata.seq_data)
+
+        sampling_metadata = SamplingMetadata(
+            seq_groups=seq_groups,
+            seq_data=seq_data,
+            prompt_lens=prompt_lens,
+            selected_token_indices=selected_token_indices,
+            categorized_sample_indices=categorized_sample_indices,
+            generators=generators,
+        )
+        return sampling_metadata
+
+    def prepare_input_tensors(
+        self,
+        seq_group_metadata_list: Optional[List[SequenceGroupMetadata]],
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, SamplingMetadata]:
+        # NOTE: We assume that all sequences in the group are all prompts or
+        # all decodes.
+        is_prompt = seq_group_metadata_list[0].is_prompt
+        # Prepare input tensors.
+        if is_prompt:
+            (input_tokens, input_positions, input_block_ids,
+             prompt_lens) = self._prepare_prompt(seq_group_metadata_list)
+        else:
+            (input_tokens, input_positions,
+             input_block_ids) = self._prepare_decode(seq_group_metadata_list)
+            prompt_lens = []
+        sampling_metadata = self._prepare_sample(seq_group_metadata_list,
+                                                 prompt_lens)
+
+        return (input_tokens, input_positions, input_block_ids,
+                sampling_metadata)
+
+    @torch.inference_mode()
+    def execute_model(
+        self,
+        seq_group_metadata_list: Optional[List[SequenceGroupMetadata]],
+    ) -> Optional[SamplerOutput]:
+        (input_tokens, input_positions, input_block_ids, sampling_metadata
+         ) = self.prepare_input_tensors(seq_group_metadata_list)
+
+        hidden_states = self.model(
+            input_ids=input_tokens,
+            positions=input_positions,
+            input_block_ids=input_block_ids,
+        )
+
+        # Compute the logits.
+        logits = self.model.compute_logits(hidden_states, sampling_metadata)
+
+        # Sample the next token.
+        output = self.model.sample(
+            logits=logits,
+            sampling_metadata=sampling_metadata,
+        )
+        return output
+
+    @property
+    def vocab_size(self) -> int:
+        return self.model_config.get_vocab_size()
diff --git a/vllm/worker/neuron_worker.py b/vllm/worker/neuron_worker.py
index 340c079600c78..0ae067aafb29b 100644
--- a/vllm/worker/neuron_worker.py
+++ b/vllm/worker/neuron_worker.py
@@ -1,22 +1,17 @@
 """A Neuron worker class."""
-from typing import Dict, List, Optional, Tuple
+from typing import List, Optional
 
 import torch
 import torch.distributed
 
-from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
-                         ParallelConfig, SchedulerConfig, LoRAConfig)
+from vllm.config import (DeviceConfig, ModelConfig, ParallelConfig,
+                         SchedulerConfig)
 from vllm.model_executor import set_random_seed
-from vllm.model_executor.parallel_utils.communication_op import (
-    broadcast_tensor_dict)
-from vllm.model_executor.parallel_utils.parallel_state import (
-    ensure_model_parallel_initialized)
 from vllm.sequence import SamplerOutput, SequenceGroupMetadata
-from vllm.worker.cache_engine import CacheEngine
-from vllm.worker.model_runner import ModelRunner
+from vllm.worker.neuron_model_runner import NeuronModelRunner
 
 
-class Worker:
+class NeuronWorker:
     """A worker class that executes the model on a group of neuron cores.
     """
 
@@ -26,168 +21,32 @@ def __init__(
         parallel_config: ParallelConfig,
         scheduler_config: SchedulerConfig,
         device_config: DeviceConfig,
-        local_rank: int,
-        rank: int,
-        distributed_init_method: str,
-        lora_config: Optional[LoRAConfig] = None,
-        kv_cache_dtype: Optional[str] = "auto",
-        is_driver_worker: bool = False,
     ) -> None:
         self.model_config = model_config
         self.parallel_config = parallel_config
         self.scheduler_config = scheduler_config
         self.device_config = device_config
-        self.local_rank = local_rank
-        self.rank = rank
-        self.distributed_init_method = distributed_init_method
-        self.lora_config = lora_config
-        self.is_driver_worker = is_driver_worker
-        if self.is_driver_worker:
-            assert self.rank == 0, "The driver worker must have rank 0."
 
-        self.model_runner = ModelRunner(model_config,
-                                        parallel_config,
-                                        scheduler_config,
-                                        device_config,
-                                        lora_config=self.lora_config,
-                                        is_driver_worker=is_driver_worker)
-        # Uninitialized cache engine. Will be initialized by
-        # self.init_cache_engine().
-        self.cache_config = None
-        self.cache_engine = None
-        self.cache_events = None
-        self.gpu_cache = None
+        self.model_runner = NeuronModelRunner(model_config, parallel_config,
+                                              scheduler_config, device_config)
 
-    def init_model(self) -> None:
-        # Initialize the distributed environment.
-        _init_distributed_environment(self.parallel_config,
-                                      self.rank,
-                                      self.distributed_init_method,
-                                      distributed_backend="gloo")
-
-        # Initialize the model.
+    def init_device(self) -> None:
+        # Set random seed.
         set_random_seed(self.model_config.seed)
 
     def load_model(self):
         self.model_runner.load_model()
 
-    @torch.inference_mode()
-    def profile_num_available_blocks(
-        self,
-        block_size: int = 128,
-        gpu_memory_utilization: float = 0.9,
-        cpu_swap_space: int = 0,
-        cache_dtype: str = "float16",
-    ) -> Tuple[int, int]:
-        """Simply returns max_num_seqs as num_gpu_blocks, 0 as
-        num_cpu_blocks."""
-        num_gpu_blocks = self.scheduler_config.max_num_seqs
-        num_cpu_blocks = 0
-        return num_gpu_blocks, num_cpu_blocks
-
-    def init_cache_engine(self, cache_config: CacheConfig) -> None:
-        self.cache_config = cache_config
-        self.cache_engine = CacheEngine(self.cache_config, self.model_config,
-                                        self.parallel_config)
-        self.model_runner.set_block_size(self.cache_engine.block_size)
-
-    def warm_up_model(self) -> None:
-        # Warm up is maintained in transformers-neuronx
-        pass
-
-    def cache_swap(
-        self,
-        blocks_to_swap_in: Dict[int, int],
-        blocks_to_swap_out: Dict[int, int],
-        blocks_to_copy: Dict[int, List[int]],
-    ) -> None:
-        # Issue cache operations.
-        issued_cache_op = False
-        if blocks_to_swap_in:
-            self.cache_engine.swap_in(blocks_to_swap_in)
-            issued_cache_op = True
-        if blocks_to_swap_out:
-            self.cache_engine.swap_out(blocks_to_swap_out)
-            issued_cache_op = True
-        if blocks_to_copy:
-            self.cache_engine.copy(blocks_to_copy)
-            issued_cache_op = True
-
-        cache_events = self.cache_events if issued_cache_op else None
-
-        # Wait for cache operations to finish.
-        if cache_events is not None:
-            raise NotImplementedError(
-                "cache operations are not implemented for neuron backend.")
-
     @torch.inference_mode()
     def execute_model(
         self,
-        seq_group_metadata_list: Optional[List[SequenceGroupMetadata]] = None,
-        blocks_to_swap_in: Optional[Dict[int, int]] = None,
-        blocks_to_swap_out: Optional[Dict[int, int]] = None,
-        blocks_to_copy: Optional[Dict[int, List[int]]] = None,
+        seq_group_metadata_list: List[SequenceGroupMetadata],
     ) -> Optional[SamplerOutput]:
-        if self.is_driver_worker:
-            assert seq_group_metadata_list is not None
-            num_seq_groups = len(seq_group_metadata_list)
-            assert blocks_to_swap_in is not None
-            assert blocks_to_swap_out is not None
-            assert blocks_to_copy is not None
-            data = {
-                "num_seq_groups": num_seq_groups,
-                "blocks_to_swap_in": blocks_to_swap_in,
-                "blocks_to_swap_out": blocks_to_swap_out,
-                "blocks_to_copy": blocks_to_copy,
-            }
-            broadcast_tensor_dict(data, src=0)
-        else:
-            data = broadcast_tensor_dict(src=0)
-            num_seq_groups = data["num_seq_groups"]
-            blocks_to_swap_in = data["blocks_to_swap_in"]
-            blocks_to_swap_out = data["blocks_to_swap_out"]
-            blocks_to_copy = data["blocks_to_copy"]
-
-        self.cache_swap(blocks_to_swap_in, blocks_to_swap_out, blocks_to_copy)
+        num_seq_groups = len(seq_group_metadata_list)
 
         # If there is no input, we don't need to execute the model.
         if num_seq_groups == 0:
             return {}
 
-        output = self.model_runner.execute_model(seq_group_metadata_list,
-                                                 self.gpu_cache)
+        output = self.model_runner.execute_model(seq_group_metadata_list)
         return output
-
-
-def _init_distributed_environment(
-    parallel_config: ParallelConfig,
-    rank: int,
-    distributed_init_method: Optional[str] = None,
-    distributed_backend: Optional[str] = None,
-) -> None:
-    """Initialize the distributed environment."""
-    if torch.distributed.is_initialized():
-        torch_world_size = torch.distributed.get_world_size()
-        if torch_world_size != parallel_config.world_size:
-            raise RuntimeError(
-                "torch.distributed is already initialized but the torch world "
-                "size does not match parallel_config.world_size "
-                f"({torch_world_size} vs. {parallel_config.world_size}).")
-    elif not distributed_init_method:
-        raise ValueError(
-            "distributed_init_method must be set if torch.distributed "
-            "is not already initialized")
-    else:
-        distributed_backend = (distributed_backend
-                               if distributed_backend else "nccl")
-        torch.distributed.init_process_group(
-            backend=distributed_backend,
-            world_size=parallel_config.world_size,
-            rank=rank,
-            init_method=distributed_init_method,
-        )
-
-    # A small all_reduce for warmup.
-    torch.distributed.all_reduce(torch.zeros(1))
-    ensure_model_parallel_initialized(parallel_config.tensor_parallel_size,
-                                      parallel_config.pipeline_parallel_size)
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
index 81beb5ce4d8d4..d8999dc172127 100644
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -67,7 +67,7 @@ def __init__(
         self.cache_engine = None
         self.gpu_cache = None
 
-    def init_model(self, cupy_port: Optional[int] = None) -> None:
+    def init_device(self, cupy_port: Optional[int] = None) -> None:
         if self.device_config.device.type == "cuda":
             # torch.distributed.all_reduce does not free the input tensor until
             # the synchronization point. This causes the memory usage to grow
@@ -91,7 +91,7 @@ def init_model(self, cupy_port: Optional[int] = None) -> None:
         # Initialize the distributed environment.
         init_distributed_environment(self.parallel_config, self.rank,
                                      cupy_port, self.distributed_init_method)
-        # Initialize the model.
+        # Set random seed.
         set_random_seed(self.model_config.seed)
 
     def load_model(self):

From f721096d48a7e3b98dffcb9b400bf58989cef64d Mon Sep 17 00:00:00 2001
From: Hanzhi Zhou <hanzhi713@gmail.com>
Date: Thu, 21 Mar 2024 23:02:58 -0700
Subject: [PATCH 162/196] [BugFix] Some fixes for custom allreduce kernels
 (#2760)

---
 csrc/custom_all_reduce.cu                     |  10 +-
 csrc/custom_all_reduce.cuh                    | 227 ++++++------------
 csrc/custom_all_reduce_test.cu                | 184 ++++++++------
 vllm/config.py                                |   9 -
 vllm/entrypoints/llm.py                       |   2 +-
 .../parallel_utils/custom_all_reduce.py       |  50 +++-
 6 files changed, 232 insertions(+), 250 deletions(-)

diff --git a/csrc/custom_all_reduce.cu b/csrc/custom_all_reduce.cu
index 88e4af9d4a99f..3906dcfc80dbf 100644
--- a/csrc/custom_all_reduce.cu
+++ b/csrc/custom_all_reduce.cu
@@ -29,7 +29,7 @@ fptr_t init_custom_ar(torch::Tensor &meta, torch::Tensor &rank_data,
     std::memcpy(&ipc_handles[i], handles[i].data(), sizeof(cudaIpcMemHandle_t));
   }
   return (fptr_t) new vllm::CustomAllreduce(
-      reinterpret_cast<vllm::Metadata *>(meta.data_ptr()), rank_data.data_ptr(),
+      reinterpret_cast<vllm::Signal *>(meta.data_ptr()), rank_data.data_ptr(),
       rank_data.numel(), ipc_handles, offsets, rank, full_nvlink);
 }
 
@@ -62,9 +62,9 @@ bool should_custom_ar(torch::Tensor &inp, int max_size, int world_size,
   if (inp_size % 16 != 0) return false;
   if (!_is_weak_contiguous(inp)) return false;
   if (world_size == 2 || full_nvlink) return inp_size <= max_size;
-  // 4 PCIE GPUs use 2 stage allreduce, and is only faster than NCCL when size
-  // <= 512k
-  return world_size <= 4 && inp_size <= 512 * 1024;
+  // for 4 or more non NVLink-capable GPUs, custom allreduce provides little
+  // performance improvement over NCCL.
+  return false;
 }
 
 void _all_reduce(fptr_t _fa, torch::Tensor &inp, torch::Tensor &out,
@@ -126,7 +126,7 @@ void dispose(fptr_t _fa) {
   delete fa;
 }
 
-int meta_size() { return sizeof(vllm::Metadata); }
+int meta_size() { return sizeof(vllm::Signal); }
 
 void register_buffer(fptr_t _fa, torch::Tensor &t,
                      const std::vector<std::string> &handles,
diff --git a/csrc/custom_all_reduce.cuh b/csrc/custom_all_reduce.cuh
index 54409e19eb455..750e68d42f6c6 100644
--- a/csrc/custom_all_reduce.cuh
+++ b/csrc/custom_all_reduce.cuh
@@ -23,29 +23,17 @@
 
 namespace vllm {
 
+constexpr int kMaxBlocks = 64;
+// note: we don't want to use atomics for signals because peer atomics are no
+// supported on PCIe links
 struct Signal {
-  alignas(64) union {
-    uint64_t flag;
-    unsigned char data[8];
-  } start;
-  alignas(64) union {
-    uint64_t flag;
-    unsigned char data[8];
-  } end;
+  alignas(128) uint32_t start[kMaxBlocks][8];
+  alignas(128) uint32_t end[kMaxBlocks][8];
 };
 
-struct Metadata {
-  alignas(128) Signal sg;
-  alignas(128) int counter;
-};
-static_assert(offsetof(Metadata, counter) == 128);
-static_assert(sizeof(Metadata) == 256);
-
 struct __align__(16) RankData { const void *__restrict__ ptrs[8]; };
 
-struct RankSignals {
-  volatile Signal *signals[8];
-};
+struct __align__(16) RankSignals { volatile Signal *signals[8]; };
 
 // like std::array, but aligned
 template <typename T, int sz>
@@ -135,70 +123,49 @@ DINLINE O downcast(array_t<float, O::size> val) {
   }
 }
 
-// compute flag at compile time
-__host__ __device__ constexpr uint64_t compute_flag(int ngpus) {
-  auto m = std::numeric_limits<uint64_t>::max();
-  return m >> ((8 - ngpus) * 8);
-}
-
+// This function is meant to be used as the first synchronization in the all
+// reduce kernel. Thus, it doesn't need to make any visibility guarantees for
+// prior memory accesses. Note: volatile writes will not be reordered against
+// other volatile writes.
 template <int ngpus>
-DINLINE void start_sync(const RankSignals &sg, volatile Metadata *meta,
+DINLINE void start_sync(const RankSignals &sg, volatile Signal *self_sg,
                         int rank) {
-  constexpr auto FLAG = compute_flag(ngpus);
-  if (blockIdx.x == 0) {
-    if (threadIdx.x < ngpus)
-      // simultaneously write to the corresponding byte to all other ranks.
-      // Latency = 1 p2p write
-      sg.signals[threadIdx.x]->start.data[rank] = 255;
-    else if (threadIdx.x == 32)
-      // reset
-      meta->sg.end.flag = 0;
-  }
-  if (threadIdx.x == 0) {
-    while (meta->sg.start.flag != FLAG)
+  if (threadIdx.x < ngpus) {
+    // reset flag for next time
+    self_sg->end[blockIdx.x][threadIdx.x] = 0;
+    // simultaneously write to the corresponding flag of all ranks.
+    // Latency = 1 p2p write
+    sg.signals[threadIdx.x]->start[blockIdx.x][rank] = 1;
+    // wait until we got true from all ranks
+    while (!self_sg->start[blockIdx.x][threadIdx.x])
       ;
   }
   __syncthreads();
 }
 
+// This function is meant to be used as the second or the final synchronization
+// barrier in the all reduce kernel. If it's the final synchronization barrier,
+// we don't need to make any visibility guarantees for prior memory accesses.
 template <int ngpus, bool final_sync = false>
-DINLINE void end_sync(const RankSignals &sg, volatile Metadata *meta,
+DINLINE void end_sync(const RankSignals &sg, volatile Signal *self_sg,
                       int rank) {
-  constexpr auto FLAG = compute_flag(ngpus);
   __syncthreads();
-  __shared__ int num;
-  if (threadIdx.x == 0) num = atomicAdd((int *)&meta->counter, 1);
-  __syncthreads();
-
-  // Only the last completing block can perform the end synchronization
-  // This can ensures when the final busy wait ends, all ranks must have
-  // finished reading each other's buffer.
-  if (num == gridDim.x - 1) {
-    if (threadIdx.x == 32) {
-      // reset in a different warp
-      meta->counter = 0;
-      meta->sg.start.flag = 0;
-    } else if (threadIdx.x < ngpus) {
-      // simultaneously write to the corresponding byte to all other ranks.
-      // Latency = 1 p2p write
-      sg.signals[threadIdx.x]->end.data[rank] = 255;
-    }
-    // if this is the final sync, only one block needs it
-    // because kernel exit can serve as sync
-    if constexpr (final_sync) {
-      if (threadIdx.x == 0) {
-        while (meta->sg.end.flag != FLAG)
-          ;
-      }
-    }
-  }
-  if constexpr (!final_sync) {
-    if (threadIdx.x == 0) {
-      while (meta->sg.end.flag != FLAG)
-        ;
-    }
-    __syncthreads();
+  // eliminate the case that prior writes are not visible after signals become
+  // visible. Note that I did not managed to make this happen through a lot of
+  // testing. Might be the case that hardware provides stronger guarantee than
+  // the memory model. 
+  if constexpr (!final_sync) __threadfence_system();
+  if (threadIdx.x < ngpus) {
+    // reset flag for next time
+    self_sg->start[blockIdx.x][threadIdx.x] = 0;
+    // simultaneously write to the corresponding flag of all ranks.
+    // Latency = 1 p2p write
+    sg.signals[threadIdx.x]->end[blockIdx.x][rank] = 1;
+    // wait until we got true from all ranks
+    while (!self_sg->end[blockIdx.x][threadIdx.x])
+      ;
   }
+  if constexpr (!final_sync) __syncthreads();
 }
 
 template <typename P, int ngpus, typename A>
@@ -214,32 +181,32 @@ DINLINE P packed_reduce(const P *ptrs[], int idx) {
 template <typename T, int ngpus>
 __global__ void __launch_bounds__(512, 1)
     cross_device_reduce_1stage(RankData *_dp, RankSignals sg,
-                               volatile Metadata *meta, T *__restrict__ result,
+                               volatile Signal *self_sg, T *__restrict__ result,
                                int rank, int size) {
   using P = typename packed_t<T>::P;
   using A = typename packed_t<T>::A;
   // note: we don't reorder the address so the accumulation order is the same
   // for all ranks, ensuring bitwise identical results
   auto dp = *_dp;
-  start_sync<ngpus>(sg, meta, rank);
+  start_sync<ngpus>(sg, self_sg, rank);
   // do the actual reduction
   for (int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size;
        idx += gridDim.x * blockDim.x) {
     ((P *)result)[idx] =
         packed_reduce<P, ngpus, A>((const P **)&dp.ptrs[0], idx);
   }
-  end_sync<ngpus, true>(sg, meta, rank);
+  end_sync<ngpus, true>(sg, self_sg, rank);
 }
 
 template <typename P>
 DINLINE P *get_tmp_buf(volatile Signal *sg) {
-  return (P *)(((Metadata *)sg) + 1);
+  return (P *)(((Signal *)sg) + 1);
 }
 
 template <typename T, int ngpus>
 __global__ void __launch_bounds__(512, 1)
     cross_device_reduce_2stage(RankData *_dp, RankSignals sg,
-                               volatile Metadata *meta, T *__restrict__ result,
+                               volatile Signal *self_sg, T *__restrict__ result,
                                int rank, int size) {
   int tid = blockIdx.x * blockDim.x + threadIdx.x;
   int stride = gridDim.x * blockDim.x;
@@ -248,6 +215,7 @@ __global__ void __launch_bounds__(512, 1)
   int part = size / ngpus;
   int start = rank * part;
   int end = rank == ngpus - 1 ? size : start + part;
+  int largest_part = part + size % ngpus;
   const P *ptrs[ngpus];
   P *tmps[ngpus];
 #pragma unroll
@@ -257,75 +225,28 @@ __global__ void __launch_bounds__(512, 1)
     tmps[i] = get_tmp_buf<P>(sg.signals[target]);
   }
   auto tmp_out = tmps[0];
-  start_sync<ngpus>(sg, meta, rank);
+  start_sync<ngpus>(sg, self_sg, rank);
   // stage 1: reduce scatter
   for (int idx = start + tid; idx < end; idx += stride) {
     tmp_out[idx - start] = packed_reduce<P, ngpus, A>(ptrs, idx);
   }
-  // Maybe TODO: replace this with per-block release-acquire
-  // can save about 1-2us (not a lot though)
-  end_sync<ngpus>(sg, meta, rank);
-
-  // stage 2: allgather
-  for (int idx = tid; idx < part; idx += stride) {
+  end_sync<ngpus>(sg, self_sg, rank);
+
+  // stage 2: allgather. Note: it's important to match the tid between
+  // the two stages, because visibility across devices is only guaranteed
+  // between threads that have the same tid. If thread i computes the sum of
+  // start + i in the first stage, then thread i also gathers start + i from all
+  // ranks.
+  for (int idx = tid; idx < largest_part; idx += stride) {
 #pragma unroll
     for (int i = 0; i < ngpus; i++) {
-      int dst_idx = ((rank + i) % ngpus) * part + idx;
-      ((P *)result)[dst_idx] = tmps[i][idx];
-    }
-  }
-  // process the last larger partition
-  int remaining = size - part * ngpus;
-  if (tid < remaining) {
-    int dst_idx = tid + part * ngpus;
-    ((P *)result)[dst_idx] = get_tmp_buf<P>(sg.signals[ngpus - 1])[part + tid];
-  }
-
-  // faster than this
-  // for (int idx = tid; idx < size; idx += stride) {
-  //   int target_rank = idx / part;
-  //   if (target_rank == ngpus) target_rank -= 1;
-  //   ((P *)result)[idx] = tmps[target_rank][idx - target_rank * part];
-  // }
-}
-
-template <typename T, int ngpus>
-__global__ void __launch_bounds__(512, 1)
-    cross_device_reduce_half_butterfly(RankData *_dp, RankSignals sg,
-                                       volatile Metadata *meta,
-                                       T *__restrict__ result, int rank,
-                                       int size) {
-  int tid = blockIdx.x * blockDim.x + threadIdx.x;
-  int stride = gridDim.x * blockDim.x;
-  using P = typename packed_t<T>::P;
-  using A = typename packed_t<T>::A;
-  auto tmp_out = get_tmp_buf<P>(sg.signals[rank]);
-  constexpr int hg = ngpus / 2;
-  // Actually not quite half butterfly.
-  // This is an all-to-all within each group containing half of the ranks
-  // followed by cross-group add. Equivalent to half butterfly when there
-  // are 4 GPUs, a common case for PCIe cards like T4 and A10.
-  const P *ptrs[hg];
-  {
-    int start = rank - rank % hg;
-#pragma unroll
-    for (int i = 0; i < hg; i++) {
-      ptrs[i] = (const P *)_dp->ptrs[i + start];
+      int gather_from_rank = ((rank + i) % ngpus);
+      if (gather_from_rank == ngpus - 1 || idx < part) {
+        int dst_idx = gather_from_rank * part + idx;
+        ((P *)result)[dst_idx] = tmps[i][idx];
+      }
     }
   }
-  start_sync<ngpus>(sg, meta, rank);
-  for (int idx = tid; idx < size; idx += stride) {
-    tmp_out[idx] = packed_reduce<P, hg, A>(ptrs, idx);
-  }
-  end_sync<ngpus>(sg, meta, rank);
-
-  auto src = get_tmp_buf<P>(sg.signals[(ngpus - 1) - rank % ngpus]);
-  // do the cross group reduction
-  for (int idx = tid; idx < size; idx += stride) {
-    auto tmp = tmp_out[idx];
-    packed_assign_add(tmp, src[idx]);
-    ((P *)result)[idx] = tmp;
-  }
 }
 
 using IPC_KEY = std::array<uint8_t, sizeof(cudaIpcMemHandle_t)>;
@@ -341,7 +262,7 @@ class CustomAllreduce {
   // below are device pointers
   RankSignals sg_;
   std::unordered_map<void *, RankData *> buffers_;
-  Metadata *meta_;
+  Signal *self_sg_;
 
   // stores the registered device pointers from all ranks
   RankData *d_rank_data_base_, *d_rank_data_end_;
@@ -352,32 +273,32 @@ class CustomAllreduce {
   /**
    * meta is a pointer to device metadata and temporary buffer for allreduce.
    *
-   * There's a total of sizeof(Metadata) of prefix before the actual data,
+   * There's a total of sizeof(Signal) of prefix before the actual data,
    * so meta + 1 points to actual temporary buffer.
    *
    * note: this class does not own any device memory. Any required buffers
    * are passed in from the constructor
    */
-  CustomAllreduce(Metadata *meta, void *rank_data, size_t rank_data_sz,
+  CustomAllreduce(Signal *meta, void *rank_data, size_t rank_data_sz,
                   const cudaIpcMemHandle_t *handles,
                   const std::vector<int64_t> &offsets, int rank,
                   bool full_nvlink = true)
       : rank_(rank),
         world_size_(offsets.size()),
         full_nvlink_(full_nvlink),
-        meta_(meta),
+        self_sg_(meta),
         d_rank_data_base_(reinterpret_cast<RankData *>(rank_data)),
         d_rank_data_end_(d_rank_data_base_ + rank_data_sz / sizeof(RankData)) {
     for (int i = 0; i < world_size_; i++) {
-      Metadata *rank_meta;
+      Signal *rank_sg;
       if (i != rank_) {
         char *handle = open_ipc_handle(&handles[i]);
         handle += offsets[i];
-        rank_meta = (Metadata *)handle;
+        rank_sg = (Signal *)handle;
       } else {
-        rank_meta = meta_;
+        rank_sg = self_sg_;
       }
-      sg_.signals[i] = &rank_meta->sg;
+      sg_.signals[i] = rank_sg;
     }
   }
 
@@ -492,6 +413,10 @@ class CustomAllreduce {
           "custom allreduce currently requires input length to be multiple "
           "of " +
           std::to_string(d));
+    if (block_limit > kMaxBlocks)
+      throw std::runtime_error("max supported block limit is " +
+                               std::to_string(kMaxBlocks) + ". Got " +
+                               std::to_string(block_limit));
 
     RankData *ptrs;
     cudaStreamCaptureStatus status;
@@ -512,9 +437,9 @@ class CustomAllreduce {
     size /= d;
     auto bytes = size * sizeof(typename packed_t<T>::P);
     int blocks = std::min(block_limit, (size + threads - 1) / threads);
-#define KL(ngpus, name) \
-  name<T, ngpus>        \
-      <<<blocks, threads, 0, stream>>>(ptrs, sg_, meta_, output, rank_, size);
+#define KL(ngpus, name)                                                       \
+  name<T, ngpus><<<blocks, threads, 0, stream>>>(ptrs, sg_, self_sg_, output, \
+                                                 rank_, size);
 #define REDUCE_CASE(ngpus)                            \
   case ngpus: {                                       \
     if (world_size_ == 2) {                           \
@@ -526,8 +451,6 @@ class CustomAllreduce {
       } else {                                        \
         KL(ngpus, cross_device_reduce_2stage);        \
       }                                               \
-    } else {                                          \
-      KL(ngpus, cross_device_reduce_half_butterfly);  \
     }                                                 \
     break;                                            \
   }
@@ -556,7 +479,7 @@ class CustomAllreduce {
 /**
  * To inspect PTX/SASS, copy paste this header file to compiler explorer and add
  a template instantiation:
- * template void CustomAllreduce::allreduce<half>(cudaStream_t, half *, half *,
- int, int, int);
+ * template void vllm::CustomAllreduce::allreduce<half>(cudaStream_t, half *,
+ half *, int, int, int);
 */
 }  // namespace vllm
diff --git a/csrc/custom_all_reduce_test.cu b/csrc/custom_all_reduce_test.cu
index 6b094e2fdc9ba..c34a50389c21c 100644
--- a/csrc/custom_all_reduce_test.cu
+++ b/csrc/custom_all_reduce_test.cu
@@ -92,7 +92,7 @@ __global__ void gen_data(curandState_t *state, T *data, double *ground_truth,
 
 template <typename T>
 void run(int myRank, int nRanks, ncclComm_t &comm, int threads, int block_limit,
-         int data_size) {
+         int data_size, bool performance_test) {
   T *result;
   cudaStream_t stream;
   CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
@@ -101,7 +101,7 @@ void run(int myRank, int nRanks, ncclComm_t &comm, int threads, int block_limit,
 
   cudaIpcMemHandle_t self_data_handle;
   cudaIpcMemHandle_t data_handles[8];
-  vllm::Metadata *buffer;
+  vllm::Signal *buffer;
   T *self_data_copy;
   /**
    * Allocate IPC buffer
@@ -115,9 +115,9 @@ void run(int myRank, int nRanks, ncclComm_t &comm, int threads, int block_limit,
    * convenience.
    */
   CUDACHECK(
-      cudaMalloc(&buffer, 2 * data_size * sizeof(T) + sizeof(vllm::Metadata)));
-  CUDACHECK(cudaMemset(buffer, 0,
-                       2 * data_size * sizeof(T) + sizeof(vllm::Metadata)));
+      cudaMalloc(&buffer, 2 * data_size * sizeof(T) + sizeof(vllm::Signal)));
+  CUDACHECK(
+      cudaMemset(buffer, 0, 2 * data_size * sizeof(T) + sizeof(vllm::Signal)));
   CUDACHECK(cudaMalloc(&self_data_copy, data_size * sizeof(T)));
   CUDACHECK(cudaIpcGetMemHandle(&self_data_handle, buffer));
 
@@ -133,7 +133,7 @@ void run(int myRank, int nRanks, ncclComm_t &comm, int threads, int block_limit,
                            offsets, myRank);
   auto *self_data =
       reinterpret_cast<T *>(reinterpret_cast<char *>(buffer) +
-                            sizeof(vllm::Metadata) + data_size * sizeof(T));
+                            sizeof(vllm::Signal) + data_size * sizeof(T));
   // hack buffer registration
   {
     std::vector<std::string> handles;
@@ -143,8 +143,8 @@ void run(int myRank, int nRanks, ncclComm_t &comm, int threads, int block_limit,
       char *end = (char *)&data_handles[i + 1];
       handles.emplace_back(begin, end);
     }
-    std::vector<int64_t> offsets(
-        nRanks, sizeof(vllm::Metadata) + data_size * sizeof(T));
+    std::vector<int64_t> offsets(nRanks,
+                                 sizeof(vllm::Signal) + data_size * sizeof(T));
     fa.register_buffer(handles, offsets, self_data);
   }
 
@@ -169,81 +169,112 @@ void run(int myRank, int nRanks, ncclComm_t &comm, int threads, int block_limit,
   } else {
     ncclDtype = ncclFloat;
   }
+  double *nccl_result, *my_result;
+  CUDACHECK(cudaMallocHost(&nccl_result, data_size * sizeof(double)));
+  CUDACHECK(cudaMallocHost(&my_result, data_size * sizeof(double)));
+  if (performance_test) {
+    dummy_kernel<<<1, 1, 0, stream>>>();
+    constexpr int warmup_iters = 5;
+    constexpr int num_iters = 100;
+    // warmup
+    for (int i = 0; i < warmup_iters; i++) {
+      NCCLCHECK(ncclAllReduce(result, result, data_size, ncclDtype, ncclSum,
+                              comm, stream));
+    }
+    CUDACHECK(cudaEventRecord(start, stream));
+    for (int i = 0; i < num_iters; i++) {
+      NCCLCHECK(ncclAllReduce(result, result, data_size, ncclDtype, ncclSum,
+                              comm, stream));
+    }
+    CUDACHECK(cudaEventRecord(stop, stream));
+    CUDACHECK(cudaStreamSynchronize(stream));
+    float allreduce_ms = 0;
+    cudaEventElapsedTime(&allreduce_ms, start, stop);
 
-  dummy_kernel<<<1, 1, 0, stream>>>();
-  constexpr int warmup_iters = 5;
-  constexpr int num_iters = 25;
-  // warmup
-  for (int i = 0; i < warmup_iters; i++) {
-    NCCLCHECK(ncclAllReduce(result, result, data_size, ncclDtype, ncclSum, comm,
-                            stream));
-  }
-  CUDACHECK(cudaEventRecord(start, stream));
-  for (int i = 0; i < num_iters; i++) {
-    NCCLCHECK(ncclAllReduce(result, result, data_size, ncclDtype, ncclSum, comm,
-                            stream));
-  }
-  CUDACHECK(cudaEventRecord(stop, stream));
-  CUDACHECK(cudaStreamSynchronize(stream));
-  float allreduce_ms = 0;
-  cudaEventElapsedTime(&allreduce_ms, start, stop);
-
-  // if (myRank == 1) dummy_kernel<<<1, 1, 0, stream>>>();
-  // set_data<T><<<16, 1024, 0, stream>>>(self_data, data_size, myRank);
-
-  dummy_kernel<<<1, 1, 0, stream>>>();
-  // warm up
-  for (int i = 0; i < warmup_iters; i++) {
-    fa.allreduce<T>(stream, self_data, result, data_size, threads, block_limit);
-  }
-  CUDACHECK(cudaEventRecord(start, stream));
-  for (int i = 0; i < num_iters; i++) {
-    fa.allreduce<T>(stream, self_data, result, data_size, threads, block_limit);
-  }
-  CUDACHECK(cudaEventRecord(stop, stream));
-  CUDACHECK(cudaStreamSynchronize(stream));
-
-  float duration_ms = 0;
-  cudaEventElapsedTime(&duration_ms, start, stop);
-  if (myRank == 0)
-    printf(
-        "Rank %d done, nGPUs:%d, sz (kb): %d, %d, %d, my time:%.2fus, nccl "
-        "time:%.2fus\n",
-        myRank, nRanks, data_size * sizeof(T) / 1024, threads, block_limit,
-        duration_ms * 1e3 / num_iters, allreduce_ms * 1e3 / num_iters);
+    dummy_kernel<<<1, 1, 0, stream>>>();
+    // warm up
+    for (int i = 0; i < warmup_iters; i++) {
+      fa.allreduce<T>(stream, self_data, result, data_size, threads,
+                      block_limit);
+    }
+    CUDACHECK(cudaEventRecord(start, stream));
+    for (int i = 0; i < num_iters; i++) {
+      fa.allreduce<T>(stream, self_data, result, data_size, threads,
+                      block_limit);
+    }
+    CUDACHECK(cudaEventRecord(stop, stream));
+    CUDACHECK(cudaStreamSynchronize(stream));
 
-  // And wait for all the queued up work to complete
-  CUDACHECK(cudaStreamSynchronize(stream));
+    float duration_ms = 0;
+    cudaEventElapsedTime(&duration_ms, start, stop);
+    if (myRank == 0)
+      printf(
+          "Rank %d done, nGPUs:%d, sz (kb): %d, %d, %d, my time:%.2fus, nccl "
+          "time:%.2fus\n",
+          myRank, nRanks, data_size * sizeof(T) / 1024, threads, block_limit,
+          duration_ms * 1e3 / num_iters, allreduce_ms * 1e3 / num_iters);
 
-  NCCLCHECK(ncclAllReduce(self_data_copy, self_data, data_size, ncclDtype,
-                          ncclSum, comm, stream));
+    // And wait for all the queued up work to complete
+    CUDACHECK(cudaStreamSynchronize(stream));
 
-  double *nccl_result, *my_result;
-  CUDACHECK(cudaMallocHost(&nccl_result, data_size * sizeof(double)));
-  CUDACHECK(cudaMallocHost(&my_result, data_size * sizeof(double)));
+    NCCLCHECK(ncclAllReduce(self_data_copy, self_data, data_size, ncclDtype,
+                            ncclSum, comm, stream));
 
-  convert_data<T><<<108, 1024, 0, stream>>>(self_data, result, nccl_result,
-                                            my_result, data_size);
-  CUDACHECK(cudaStreamSynchronize(stream));
+    convert_data<T><<<108, 1024, 0, stream>>>(self_data, result, nccl_result,
+                                              my_result, data_size);
+    CUDACHECK(cudaStreamSynchronize(stream));
 
-  for (unsigned long j = 0; j < data_size; j++) {
-    auto diff = abs(nccl_result[j] - my_result[j]);
-    if (diff >= 1e-2) {
-      printf("Rank %d: Verification mismatch at %lld: %f != (my) %f, gt=%f\n",
-             myRank, j, nccl_result[j], my_result[j], ground_truth[j]);
-      break;
+    for (unsigned long j = 0; j < data_size; j++) {
+      auto diff = abs(nccl_result[j] - my_result[j]);
+      if (diff >= 4e-2) {
+        printf("Rank %d: Verification mismatch at %lld: %f != (my) %f, gt=%f\n",
+               myRank, j, nccl_result[j], my_result[j], ground_truth[j]);
+        break;
+      }
     }
-  }
+    long double nccl_diffs = 0.0;
+    long double my_diffs = 0.0;
+    for (int j = 0; j < data_size; j++) {
+      nccl_diffs += abs(nccl_result[j] - ground_truth[j]);
+      my_diffs += abs(my_result[j] - ground_truth[j]);
+    }
+    if (myRank == 0)
+      std::cout << "average abs diffs: nccl: " << nccl_diffs / data_size
+                << " me: " << my_diffs / data_size << std::endl;
+  } else {
+    for (int i = 0; i < 100; i++) {
+      fa.allreduce<T>(stream, self_data, result, data_size, threads,
+                      block_limit);
+      CUDACHECK(cudaStreamSynchronize(stream));
+      NCCLCHECK(ncclAllReduce(self_data, self_data_copy, data_size, ncclDtype,
+                              ncclSum, comm, stream));
+      convert_data<T><<<108, 1024, 0, stream>>>(
+          self_data_copy, result, nccl_result, my_result, data_size);
+      CUDACHECK(cudaStreamSynchronize(stream));
 
-  long double nccl_diffs = 0.0;
-  long double my_diffs = 0.0;
-  for (int j = 0; j < data_size; j++) {
-    nccl_diffs += abs(nccl_result[j] - ground_truth[j]);
-    my_diffs += abs(my_result[j] - ground_truth[j]);
+      for (unsigned long j = 0; j < data_size; j++) {
+        auto diff = abs(nccl_result[j] - my_result[j]);
+        if (diff >= 4e-2) {
+          printf(
+              "Rank %d: Verification mismatch at %lld: %f != (my) %f, gt=%f\n",
+              myRank, j, nccl_result[j], my_result[j], ground_truth[j]);
+          break;
+        }
+      }
+    }
+    if (myRank == 0)
+      printf("Test passed: nGPUs:%d, sz (kb): %d, %d, %d\n", nRanks,
+             data_size * sizeof(T) / 1024, threads, block_limit);
+    // long double nccl_diffs = 0.0;
+    // long double my_diffs = 0.0;
+    // for (int j = 0; j < data_size; j++) {
+    //   nccl_diffs += abs(nccl_result[j] - ground_truth[j]);
+    //   my_diffs += abs(my_result[j] - ground_truth[j]);
+    // }
+    // if (myRank == 0)
+    //   std::cout << "average abs diffs: nccl: " << nccl_diffs / data_size
+    //             << " me: " << my_diffs / data_size << std::endl;
   }
-  if (myRank == 0)
-    std::cout << "average abs diffs: nccl: " << nccl_diffs / data_size
-              << " me: " << my_diffs / data_size << std::endl;
 
   CUDACHECK(cudaFree(result));
   CUDACHECK(cudaFree(self_data_copy));
@@ -269,14 +300,15 @@ int main(int argc, char **argv) {
                      MPI_COMM_WORLD));
   NCCLCHECK(ncclCommInitRank(&comm, nRanks, id, myRank));
 
+  bool performance_test = true;
   cudaProfilerStart();
   // for (int threads : {256, 512}) {
   //   for (int block_limit = 16; block_limit < 112; block_limit += 4) {
   //     run<half>(myRank, nRanks, comm, threads, block_limit, 4096 * 1024);
   //   }
   // }
-  for (int sz = 512; sz <= (32 << 20); sz *= 2) {
-    run<half>(myRank, nRanks, comm, 512, 36, sz + 8 * 50);
+  for (int sz = 512; sz <= (8 << 20); sz *= 2) {
+    run<half>(myRank, nRanks, comm, 512, 36, sz + 8 * 47, performance_test);
   }
 
   cudaProfilerStop();
diff --git a/vllm/config.py b/vllm/config.py
index a86114f35e916..6dfb51586562b 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -506,15 +506,6 @@ def _verify_args(self) -> None:
             raise ValueError("Unable to use nsight profiling unless workers "
                              "run with Ray.")
 
-        # FIXME(woosuk): Fix the stability issues and re-enable the custom
-        # all-reduce kernel.
-        if not self.disable_custom_all_reduce and self.world_size > 1:
-            self.disable_custom_all_reduce = True
-            logger.info(
-                "Custom all-reduce kernels are temporarily disabled due to "
-                "stability issues. We will re-enable them once the issues are "
-                "resolved.")
-
 
 class SchedulerConfig:
     """Scheduler configuration.
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 1f463bdaaedc3..e9b3d46d4bb61 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -83,7 +83,7 @@ def __init__(
         swap_space: int = 4,
         enforce_eager: bool = False,
         max_context_len_to_capture: int = 8192,
-        disable_custom_all_reduce: bool = False,
+        disable_custom_all_reduce: bool = True,
         **kwargs,
     ) -> None:
         if "disable_log_stats" not in kwargs:
diff --git a/vllm/model_executor/parallel_utils/custom_all_reduce.py b/vllm/model_executor/parallel_utils/custom_all_reduce.py
index 0c749c0484fc5..396be89492367 100644
--- a/vllm/model_executor/parallel_utils/custom_all_reduce.py
+++ b/vllm/model_executor/parallel_utils/custom_all_reduce.py
@@ -37,16 +37,23 @@ def init_custom_ar() -> None:
         logger.warn(
             "Custom allreduce is disabled due to an unsupported world size: "
             "%d. Supported world sizes: %s. To silence this warning, specify"
-            "disable_custom_all_reduce=True explicitly.", world_size,
+            " disable_custom_all_reduce=True explicitly.", world_size,
             str(_SUPPORTED_WORLD_SIZES))
         return
     if not _can_p2p(rank, world_size):
         logger.warn(
             "Custom allreduce is disabled because your platform lacks GPU P2P"
-            " capability. To silence this warning, specify"
-            "disable_custom_all_reduce=True explicitly.")
+            " capability or P2P test failed. To silence this warning, specify"
+            " disable_custom_all_reduce=True explicitly.")
         return
-    _CA_HANDLE = CustomAllreduce(rank, world_size)
+    full_nvlink = _is_full_nvlink(rank, world_size)
+    if world_size > 2 and not full_nvlink:
+        logger.warn(
+            "Custom allreduce is disabled because it's not supported on more"
+            " than two PCIe-only GPUs. To silence this warning, specify"
+            " disable_custom_all_reduce=True explicitly.")
+        return
+    _CA_HANDLE = CustomAllreduce(rank, world_size, full_nvlink)
 
 
 def begin_capture() -> None:
@@ -134,18 +141,48 @@ def _is_full_nvlink(rank, world_size):
 
 
 def _can_p2p(rank: int, world_size: int) -> bool:
+    num_dev = torch.cuda.device_count()
+    # note: num dev can be larger than world_size if we're only using
+    # first few GPUs
+    if num_dev < world_size:
+        logger.warn(
+            "Cannot test GPU P2P because not all GPUs are visible to the "
+            "current process. This might be the case if 'CUDA_VISIBLE_DEVICES'"
+            " is set.")
+        return False
     for i in range(world_size):
         if i == rank:
             continue
         if not torch.cuda.can_device_access_peer(rank, i):
             return False
+        # on some platforms, P2P support might be buggy and we need
+        # additional checks. See also:
+        # https://github.com/vllm-project/vllm/issues/2728
+        if not _can_actually_p2p(rank, i):
+            return False
     return True
 
 
+# code partly borrowed from
+# https://github.com/turboderp/exllamav2/blob/1c67f97f3d2a968605a9c31ab791a05c85bb7879/exllamav2/compat.py#L10
+# License: MIT
+def _can_actually_p2p(idx_a, idx_b):
+    dev_i = f"cuda:{idx_a}"
+    dev_j = f"cuda:{idx_b}"
+    a = torch.randn(5, device=dev_i) + 123.0
+    b = a.to(dev_j)
+    c = b.to(dev_i)
+    return torch.all(a == c)
+
+
 class CustomAllreduce:
 
     # max_size: max supported allreduce size
-    def __init__(self, rank, world_size, max_size=8192 * 1024) -> None:
+    def __init__(self,
+                 rank,
+                 world_size,
+                 full_nvlink,
+                 max_size=8192 * 1024) -> None:
         # buffers memory are owned by this Python class and passed to C++
         # meta data composes of two parts: meta data for synchronization
         # (256 bytes) and a temporary buffer for storing intermediate
@@ -167,11 +204,10 @@ def __init__(self, rank, world_size, max_size=8192 * 1024) -> None:
         self.max_size = max_size
         self.world_size = world_size
         handles, offsets = self._get_ipc_meta(self.meta)
-        self.full_nvlink = _is_full_nvlink(rank, world_size)
+        self.full_nvlink = full_nvlink
         self._ptr = custom_ar.init_custom_ar(self.meta, self.rank_data,
                                              handles, offsets, rank,
                                              self.full_nvlink)
-        self.fast_cond = self.full_nvlink or world_size <= 2
         self.register_buffer(self.buffer)
 
     def _get_ipc_meta(self, inp: torch.Tensor):

From cf2f084d56a1293cb08da2393984cdc7685ac019 Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tpa@zurich.ibm.com>
Date: Fri, 22 Mar 2024 20:28:14 +0100
Subject: [PATCH 163/196] Dynamic scheduler delay to improve ITL performance 
 (#3279)

Co-authored-by: Jan van Lunteren <jvl@zurich.ibm.com>
---
 tests/core/test_scheduler.py | 34 ++++++++++++++++++++++++++++++++++
 vllm/config.py               |  4 ++++
 vllm/core/scheduler.py       | 26 +++++++++++++++++++++++++-
 vllm/engine/arg_utils.py     | 10 +++++++++-
 4 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py
index 397101fa86104..4a690e24ec720 100644
--- a/tests/core/test_scheduler.py
+++ b/tests/core/test_scheduler.py
@@ -1,5 +1,6 @@
 from typing import List
 import pytest  # noqa
+import time
 
 from vllm.config import CacheConfig, SchedulerConfig
 from vllm.core.scheduler import Scheduler
@@ -168,3 +169,36 @@ def test_scheduler_max_seqs():
     # and one is prompting.
     _, out = scheduler.schedule()
     assert set(out.scheduled_seq_groups) == set([all_seq_groups[1]])
+
+
+def test_scheduler_delay_factor():
+
+    block_size = 4
+    scheduler_config = SchedulerConfig(100, 64, 16, delay_factor=0.5)
+    cache_config = CacheConfig(block_size, 1.0, 1, "auto")
+    cache_config.num_cpu_blocks = 8
+    cache_config.num_gpu_blocks = 8
+    scheduler = Scheduler(scheduler_config, cache_config, None)
+
+    # schedule first prompt
+    _, seq_group = create_dummy_prompt("0", prompt_length=block_size)
+    scheduler.add_seq_group(seq_group)
+    seq_group_meta, out = scheduler.schedule()
+    assert out.prompt_run
+    assert seq_group_meta[0].request_id == '0'
+
+    # wait for a second before scheduling next prompt
+    time.sleep(1)
+    _, seq_group = create_dummy_prompt("1", prompt_length=block_size)
+    scheduler.add_seq_group(seq_group)
+
+    # second prompt should *not* be scheduled
+    seq_group_meta, out = scheduler.schedule()
+    assert not out.prompt_run
+    assert seq_group_meta[0].request_id == '0'
+
+    # wait for more than 0.5 second and try again
+    time.sleep(0.6)
+    seq_group_meta, out = scheduler.schedule()
+    assert out.prompt_run
+    assert seq_group_meta[0].request_id == '1'
diff --git a/vllm/config.py b/vllm/config.py
index 6dfb51586562b..2003563e4e50e 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -517,6 +517,8 @@ class SchedulerConfig:
             iteration.
         max_model_len: Maximum length of a sequence (including prompt
             and generated text).
+        delay_factor: Apply a delay (of delay factor multiplied by previous
+            prompt latency) before scheduling next prompt.
     """
 
     def __init__(
@@ -524,6 +526,7 @@ def __init__(
         max_num_batched_tokens: Optional[int],
         max_num_seqs: int,
         max_model_len: int,
+        delay_factor: float = 0.0,
     ) -> None:
         if max_num_batched_tokens is not None:
             self.max_num_batched_tokens = max_num_batched_tokens
@@ -533,6 +536,7 @@ def __init__(
             self.max_num_batched_tokens = max(max_model_len, 2048)
         self.max_num_seqs = max_num_seqs
         self.max_model_len = max_model_len
+        self.delay_factor = delay_factor
         self._verify_args()
 
     def _verify_args(self) -> None:
diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py
index be55e8520a55f..4bd0ef360b3ff 100644
--- a/vllm/core/scheduler.py
+++ b/vllm/core/scheduler.py
@@ -103,6 +103,13 @@ def __init__(
         # Sequence groups in the SWAPPED state.
         self.swapped: Deque[SequenceGroup] = deque()
 
+        # Time at previous scheduling step
+        self.prev_time = 0.0
+        # Did we schedule a prompt at previous step?
+        self.prev_prompt = False
+        # Latency of the last prompt step
+        self.last_prompt_latency = 0.0
+
     @property
     def lora_enabled(self) -> bool:
         return bool(self.lora_config)
@@ -179,7 +186,7 @@ def _schedule(self) -> SchedulerOutputs:
             # are added to the back.
             leftover_waiting_sequences = deque()
             num_batched_tokens = 0
-            while self.waiting:
+            while self._passed_delay(now) and self.waiting:
                 seq_group = self.waiting[0]
                 waiting_seqs = seq_group.get_seqs(
                     status=SequenceStatus.WAITING)
@@ -246,6 +253,7 @@ def _schedule(self) -> SchedulerOutputs:
             self.waiting.extendleft(leftover_waiting_sequences)
 
             if scheduled or ignored_seq_groups:
+                self.prev_prompt = True
                 scheduler_outputs = SchedulerOutputs(
                     scheduled_seq_groups=scheduled,
                     prompt_run=True,
@@ -491,3 +499,19 @@ def _swap_out(
 
     def mark_blocks_as_computed(self, seq_group: SequenceGroup):
         self.block_manager.mark_blocks_as_computed(seq_group)
+
+    def _passed_delay(self, now: float) -> bool:
+        if self.prev_prompt:
+            self.last_prompt_latency = now - self.prev_time
+        self.prev_time, self.prev_prompt = now, False
+        # Delay scheduling prompts to let waiting queue fill up
+        if self.scheduler_config.delay_factor > 0 and self.waiting:
+            earliest_arrival_time = min(
+                [e.metrics.arrival_time for e in self.waiting])
+            passed_delay = (
+                (now - earliest_arrival_time) >
+                (self.scheduler_config.delay_factor * self.last_prompt_latency)
+                or not self.running)
+        else:
+            passed_delay = True
+        return passed_delay
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 94c80f4284067..2070686ea6e8e 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -51,6 +51,7 @@ class EngineArgs:
     max_cpu_loras: Optional[int] = None
     device: str = 'auto'
     ray_workers_use_nsight: bool = False
+    scheduler_delay_factor: float = 0.0
 
     def __post_init__(self):
         if self.tokenizer is None:
@@ -305,6 +306,12 @@ def add_cli_args(
                             default=EngineArgs.device,
                             choices=["auto", "cuda", "neuron"],
                             help='Device type for vLLM execution.')
+        parser.add_argument(
+            '--scheduler-delay-factor',
+            type=float,
+            default=EngineArgs.scheduler_delay_factor,
+            help='Apply a delay (of delay factor multiplied by previous'
+            'prompt latency) before scheduling next prompt.')
         return parser
 
     @classmethod
@@ -342,7 +349,8 @@ def create_engine_configs(
             ), self.ray_workers_use_nsight)
         scheduler_config = SchedulerConfig(self.max_num_batched_tokens,
                                            self.max_num_seqs,
-                                           model_config.max_model_len)
+                                           model_config.max_model_len,
+                                           self.scheduler_delay_factor)
         lora_config = LoRAConfig(
             max_lora_rank=self.max_lora_rank,
             max_loras=self.max_loras,

From bfdb1ba5c3fb14387c69acb1f5067102d8028e56 Mon Sep 17 00:00:00 2001
From: Antoni Baum <antoni.baum@protonmail.com>
Date: Fri, 22 Mar 2024 13:44:12 -0700
Subject: [PATCH 164/196] [Core] Improve detokenization performance for prefill
 (#3469)

Co-authored-by: MeloYang <meloyang05@gmail.com>
---
 tests/tokenization/test_detokenize.py  | 163 +++++++++++++++++++++++--
 vllm/engine/llm_engine.py              |  66 ++--------
 vllm/transformers_utils/detokenizer.py | 155 +++++++++++++++++++++++
 vllm/transformers_utils/tokenizer.py   |  90 +++++++++++---
 4 files changed, 385 insertions(+), 89 deletions(-)
 create mode 100644 vllm/transformers_utils/detokenizer.py

diff --git a/tests/tokenization/test_detokenize.py b/tests/tokenization/test_detokenize.py
index 4421739390e3b..082034083aebd 100644
--- a/tests/tokenization/test_detokenize.py
+++ b/tests/tokenization/test_detokenize.py
@@ -1,13 +1,17 @@
 import pytest
 
 from transformers import AutoTokenizer
+from typing import List, Dict
 
+from vllm.sequence import Sequence, Logprob, SamplingParams, SequenceGroup
+from vllm.transformers_utils.tokenizer_group import get_tokenizer_group
 from vllm.transformers_utils.tokenizer import detokenize_incrementally
+from vllm.transformers_utils.detokenizer import Detokenizer
 
 TRUTH = [
-    "Hello here, this is a simple test",  # noqa: E501
-    "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs. It is designed to be used in production environments, where inference and serving",  # noqa: E501
-    "我很感谢你的热情"  # noqa: E501
+    "Hello here, this is a simple test",
+    "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs. It is designed to be used in production environments, where inference and serving",  # noqa
+    "我很感谢你的热情"
 ]
 TOKENIZERS = [
     "facebook/opt-125m",
@@ -24,12 +28,12 @@
 
 
 def _run_incremental_decode(tokenizer, all_input_ids,
-                            skip_special_tokens: bool):
+                            skip_special_tokens: bool, starting_index: int):
     decoded_text = ""
     offset = 0
     token_offset = 0
     prev_tokens = None
-    for i in range(len(all_input_ids)):
+    for i in range(starting_index, len(all_input_ids)):
         new_tokens, text, offset, token_offset = detokenize_incrementally(
             tokenizer,
             all_input_ids[:i + 1],
@@ -46,17 +50,152 @@ def _run_incremental_decode(tokenizer, all_input_ids,
 
 
 @pytest.mark.parametrize("truth", TRUTH)
+@pytest.mark.parametrize("with_prompt", [True, False])
 @pytest.mark.parametrize("tokenizer_id", TOKENIZERS)
 @pytest.mark.parametrize("skip_special_tokens", (True, False))
-def test_decode_streaming(tokenizer_id, truth, skip_special_tokens):
+def test_decode_streaming(tokenizer_id, truth, with_prompt,
+                          skip_special_tokens):
     tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
-    all_input_ids = tokenizer(truth, add_special_tokens=False)["input_ids"]
+    if with_prompt:
+        truth_tokens = tokenizer(truth, add_special_tokens=False)["input_ids"]
+        prompt_input_ids = truth_tokens[:len(truth) // 2]
+        generated_input_ids = truth_tokens[len(truth) // 2:]
+        all_input_ids = prompt_input_ids + generated_input_ids
+        starting_index = len(prompt_input_ids)
+        prompt = tokenizer.decode(prompt_input_ids,
+                                  skip_special_tokens=skip_special_tokens)
+        generated = truth[len(prompt):]
+    else:
+        generated = truth
+        starting_index = 0
+        all_input_ids = tokenizer(truth, add_special_tokens=False)["input_ids"]
     if skip_special_tokens:
-        all_input_ids = ([tokenizer.bos_token_id]
-                         if tokenizer.bos_token_id is not None else
-                         []) + all_input_ids + [tokenizer.eos_token_id]
+        if tokenizer.bos_token_id is not None:
+            all_input_ids = [tokenizer.bos_token_id] + all_input_ids
+            starting_index += 1
+        all_input_ids = all_input_ids + [tokenizer.eos_token_id]
 
     decoded_text = _run_incremental_decode(
-        tokenizer, all_input_ids, skip_special_tokens=skip_special_tokens)
+        tokenizer,
+        all_input_ids,
+        skip_special_tokens=skip_special_tokens,
+        starting_index=starting_index)
 
-    assert decoded_text == truth
+    assert decoded_text == generated
+
+
+@pytest.fixture
+def detokenizer(tokenizer_name: str) -> Detokenizer:
+    init_kwargs = dict(
+        tokenizer_id=tokenizer_name,
+        enable_lora=False,
+        max_num_seqs=100,
+        max_input_length=None,
+        tokenizer_mode="auto",
+        trust_remote_code=False,
+        revision=None,
+    )
+
+    tokenizer_group = get_tokenizer_group(
+        None,
+        **init_kwargs,
+    )
+
+    return Detokenizer(tokenizer_group)
+
+
+@pytest.fixture(name="complete_sequence_token_ids")
+def create_complete_sequence_token_ids(complete_sequence: str,
+                                       tokenizer_name: str) -> List[int]:
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+    complete_sequence_token_ids = tokenizer(complete_sequence)["input_ids"]
+    return complete_sequence_token_ids
+
+
+def create_sequence(prompt_token_ids=None):
+    prompt_token_ids = prompt_token_ids or [1]
+    return Sequence(
+        seq_id=0,
+        prompt="<s>",
+        prompt_token_ids=prompt_token_ids,
+        block_size=16,
+    )
+
+
+def create_dummy_logprobs(
+        complete_sequence_token_ids: List[int]) -> List[Dict[int, Logprob]]:
+    return [{
+        token_id: Logprob(logprob=0.0),
+        token_id + 1: Logprob(logprob=0.1)
+    } for token_id in complete_sequence_token_ids]
+
+
+@pytest.mark.parametrize("complete_sequence", TRUTH)
+@pytest.mark.parametrize("tokenizer_name", TOKENIZERS)
+@pytest.mark.parametrize("skip_special_tokens", [True, False])
+def test_decode_sequence_logprobs(complete_sequence: str,
+                                  complete_sequence_token_ids: List[int],
+                                  detokenizer: Detokenizer,
+                                  skip_special_tokens: bool):
+    """Verify Detokenizer decodes logprobs correctly."""
+    sampling_params = SamplingParams(skip_special_tokens=skip_special_tokens,
+                                     logprobs=2)
+
+    # Run sequentially.
+    seq = create_sequence()
+    dummy_logprobs = create_dummy_logprobs(complete_sequence_token_ids)
+    sequential_logprobs_text_chosen_token = []
+    sequential_logprobs_text_other_token = []
+    for new_token, logprobs in zip(complete_sequence_token_ids,
+                                   dummy_logprobs):
+        seq.append_token_id(new_token, logprobs)
+        detokenizer.decode_sequence_inplace(seq, sampling_params)
+        sequential_logprobs_text_chosen_token.append(
+            seq.output_logprobs[-1][new_token].decoded_token)
+        sequential_logprobs_text_other_token.append(
+            seq.output_logprobs[-1][new_token + 1].decoded_token)
+    sequential_result = seq.output_text
+
+    assert sequential_result == "".join(sequential_logprobs_text_chosen_token)
+    assert sequential_result != "".join(sequential_logprobs_text_other_token)
+
+    if skip_special_tokens:
+        # Text for logprobs for the chosen token should be the same as the
+        # generated text. Note that this will only be true if we skip
+        # special tokens.
+        assert sequential_result == complete_sequence
+
+
+@pytest.mark.parametrize("complete_sequence", TRUTH)
+@pytest.mark.parametrize("tokenizer_name", TOKENIZERS)
+@pytest.mark.parametrize("skip_special_tokens", [True])
+def test_decode_prompt_logprobs(complete_sequence: str,
+                                complete_sequence_token_ids: List[int],
+                                detokenizer: Detokenizer,
+                                skip_special_tokens: bool):
+    """Verify Detokenizer decodes prompt logprobs correctly."""
+    sampling_params = SamplingParams(skip_special_tokens=skip_special_tokens,
+                                     prompt_logprobs=1)
+
+    # Run sequentially.
+    seq = create_sequence(complete_sequence_token_ids)
+    seq_group = SequenceGroup(request_id="1",
+                              seqs=[seq],
+                              sampling_params=sampling_params,
+                              arrival_time=0.0)
+    dummy_logprobs = create_dummy_logprobs(complete_sequence_token_ids)
+    detokenizer.decode_prompt_logprobs_inplace(seq_group, dummy_logprobs)
+    decoded_prompt_logprobs = dummy_logprobs
+
+    if skip_special_tokens:
+        # Text for logprobs for the chosen token should be the same as the
+        # prompt text. Note that this will only be true if we skip
+        # special tokens.
+        assert complete_sequence == "".join([
+            logprobs[token_id].decoded_token for token_id, logprobs in zip(
+                complete_sequence_token_ids, decoded_prompt_logprobs)
+        ])
+        assert complete_sequence != "".join([
+            logprobs[token_id + 1].decoded_token for token_id, logprobs in zip(
+                complete_sequence_token_ids, decoded_prompt_logprobs)
+        ])
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 7247828418da5..283b5d9ac44c1 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1,5 +1,5 @@
 import time
-from typing import Dict, Iterable, List, Optional, Tuple, Type, Union
+from typing import Iterable, List, Optional, Tuple, Type, Union
 
 from transformers import PreTrainedTokenizer
 
@@ -15,11 +15,11 @@
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import SamplingParams
-from vllm.sequence import (Logprob, SamplerOutput, Sequence, SequenceGroup,
+from vllm.sequence import (SamplerOutput, Sequence, SequenceGroup,
                            SequenceGroupOutput, SequenceOutput, SequenceStatus)
-from vllm.transformers_utils.tokenizer import detokenize_incrementally
 from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
                                                      get_tokenizer_group)
+from vllm.transformers_utils.detokenizer import Detokenizer
 from vllm.utils import Counter
 
 logger = init_logger(__name__)
@@ -97,6 +97,7 @@ def __init__(
         self._verify_args()
 
         self._init_tokenizer()
+        self.detokenizer = Detokenizer(self.tokenizer)
         self.seq_counter = Counter()
 
         self.model_executor = executor_class(model_config, cache_config,
@@ -153,7 +154,7 @@ def __reduce__(self):
         raise RuntimeError("LLMEngine should not be pickled!")
 
     def get_tokenizer(self) -> "PreTrainedTokenizer":
-        return self.tokenizer.get_lora_tokenizer()
+        return self.tokenizer.get_lora_tokenizer(None)
 
     def get_tokenizer_for_seq(self,
                               sequence: Sequence) -> "PreTrainedTokenizer":
@@ -370,13 +371,8 @@ def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
         # Process prompt logprobs
         prompt_logprobs = outputs.prompt_logprobs
         if prompt_logprobs is not None:
-            # We can pick any sequence for the prompt.
-            seq = next(iter(seq_group.seqs_dict.values()))
-            all_token_ids = seq.get_token_ids()
-            for i, prompt_logprobs_for_token in enumerate(prompt_logprobs):
-                self._decode_logprobs(seq, seq_group.sampling_params,
-                                      prompt_logprobs_for_token,
-                                      all_token_ids[:i])
+            self.detokenizer.decode_prompt_logprobs_inplace(
+                seq_group, prompt_logprobs)
             seq_group.prompt_logprobs = prompt_logprobs
 
         # Process samples
@@ -420,7 +416,8 @@ def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
             child_seqs.append((parent, parent))
 
         for seq, _ in child_seqs:
-            self._decode_sequence(seq, seq_group.sampling_params)
+            self.detokenizer.decode_sequence_inplace(seq,
+                                                     seq_group.sampling_params)
             self._check_stop(seq, seq_group.sampling_params)
 
         # Non-beam search case
@@ -713,51 +710,6 @@ def _get_stats(self,
             time_e2e_requests=time_e2e_requests,
         )
 
-    def _decode_logprobs(self, seq: Sequence, prms: SamplingParams,
-                         logprobs: Dict[int, Logprob],
-                         all_input_ids: List[int]) -> None:
-        if not logprobs:
-            return
-        for token_id, sample_logprob in logprobs.items():
-            if (sample_logprob.decoded_token is None and token_id != -1):
-                all_input_ids_with_logprob = all_input_ids[:-1] + [token_id]
-                (_, new_text, prefix_offset,
-                 read_offset) = detokenize_incrementally(
-                     self.get_tokenizer_for_seq(seq),
-                     all_input_ids=all_input_ids_with_logprob,
-                     prev_tokens=seq.tokens,
-                     prefix_offset=seq.prefix_offset,
-                     read_offset=seq.read_offset,
-                     skip_special_tokens=prms.skip_special_tokens,
-                     spaces_between_special_tokens=prms.
-                     spaces_between_special_tokens,
-                 )
-                sample_logprob.decoded_token = new_text
-
-    def _decode_sequence(self, seq: Sequence, prms: SamplingParams) -> None:
-        """Decodes the new token for a sequence."""
-        all_input_ids = seq.get_token_ids()
-        self._decode_logprobs(seq, prms, seq.output_logprobs[-1],
-                              all_input_ids)
-
-        (new_tokens, new_output_text, prefix_offset,
-         read_offset) = detokenize_incrementally(
-             self.get_tokenizer_for_seq(seq),
-             all_input_ids=all_input_ids,
-             prev_tokens=seq.tokens,
-             prefix_offset=seq.prefix_offset,
-             read_offset=seq.read_offset,
-             skip_special_tokens=prms.skip_special_tokens,
-             spaces_between_special_tokens=prms.spaces_between_special_tokens,
-         )
-        if seq.tokens is None:
-            seq.tokens = new_tokens
-        else:
-            seq.tokens.extend(new_tokens)
-        seq.prefix_offset = prefix_offset
-        seq.read_offset = read_offset
-        seq.output_text += new_output_text
-
     def _check_stop(self, seq: Sequence,
                     sampling_params: SamplingParams) -> None:
         """Stop the finished sequences."""
diff --git a/vllm/transformers_utils/detokenizer.py b/vllm/transformers_utils/detokenizer.py
new file mode 100644
index 0000000000000..1f322b3675d02
--- /dev/null
+++ b/vllm/transformers_utils/detokenizer.py
@@ -0,0 +1,155 @@
+from typing import List, Dict, Optional
+from transformers import PreTrainedTokenizer
+from vllm.sequence import Sequence, Logprob, SequenceGroup, SamplingParams
+from vllm.transformers_utils.tokenizer import (detokenize_incrementally,
+                                               convert_prompt_ids_to_tokens)
+from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
+    BaseTokenizerGroup)
+
+# Used eg. for marking rejected tokens in spec decoding.
+INVALID_TOKEN_ID = -1
+
+
+class Detokenizer:
+    """Provides methods to decode the output of a model into text."""
+
+    def __init__(self, tokenizer_group: BaseTokenizerGroup):
+        self.tokenizer_group = tokenizer_group
+
+    def get_tokenizer_for_seq(self,
+                              sequence: Sequence) -> "PreTrainedTokenizer":
+        """Returns the HF tokenizer to use for a given sequence."""
+        return self.tokenizer_group.get_lora_tokenizer(sequence.lora_request)
+
+    def decode_prompt_logprobs_inplace(
+            self, seq_group: SequenceGroup,
+            prompt_logprobs: List[Optional[Dict[int, Logprob]]]) -> None:
+        """Decodes the logprobs for the prompt of a sequence group.
+
+        Args:
+            seq_group: The sequence group to decode.
+            prompt_logprobs: The logprobs to decode.
+        
+        Returns:
+            The prompt logprobs with the decoded tokens.
+        """
+        prms = seq_group.sampling_params
+        # We can pick any sequence for the prompt.
+        seq = next(iter(seq_group.seqs_dict.values()))
+        # Only prompt, without the generated token.
+        all_token_ids = seq.get_token_ids()
+        prompt_token_ids = all_token_ids[:-1]
+        tokenizer = self.get_tokenizer_for_seq(seq)
+        prefix_offset = 0
+        read_offset = 0
+        next_iter_prefix_offset = 0
+        next_iter_read_offset = 0
+        next_iter_tokens = []
+        prev_tokens = None
+
+        for token_position, prompt_logprobs_for_token in enumerate(
+                prompt_logprobs):
+            if not prompt_logprobs_for_token:
+                continue
+            for token_id, sample_logprob in prompt_logprobs_for_token.items():
+                if (sample_logprob.decoded_token is None
+                        and token_id != INVALID_TOKEN_ID):
+                    prompt_token_ids_with_token = (
+                        prompt_token_ids[:token_position] + [token_id])
+                    (new_tokens, new_text, new_prefix_offset,
+                     new_read_offset) = detokenize_incrementally(
+                         tokenizer=tokenizer,
+                         all_input_ids=prompt_token_ids_with_token,
+                         prev_tokens=prev_tokens,
+                         prefix_offset=prefix_offset,
+                         read_offset=read_offset,
+                         skip_special_tokens=prms.skip_special_tokens,
+                         spaces_between_special_tokens=prms.
+                         spaces_between_special_tokens,
+                     )
+
+                    sample_logprob.decoded_token = new_text
+
+                    # Use the offsets & prev tokens corresponding to
+                    # real tokens to ensure detokenization is consistent
+                    # actual with prompt.
+                    if token_id == all_token_ids[token_position]:
+                        next_iter_prefix_offset = new_prefix_offset
+                        next_iter_read_offset = new_read_offset
+                        next_iter_tokens = new_tokens
+
+            # Advance to the next token position.
+            prefix_offset = next_iter_prefix_offset
+            read_offset = next_iter_read_offset
+            if prev_tokens is None:
+                prev_tokens = next_iter_tokens
+            else:
+                prev_tokens.extend(next_iter_tokens)
+
+    def decode_sequence_inplace(self, seq: Sequence,
+                                prms: SamplingParams) -> None:
+        """Decodes the new token for a sequence. In-place operation.
+
+        Args:
+            seq: The sequence to decode.
+            prms: The sampling parameters used to generate the sequence.
+        """
+        all_input_ids = seq.get_token_ids()
+        token_id_generated_this_iteration = all_input_ids[-1]
+        tokenizer = self.get_tokenizer_for_seq(seq)
+
+        # Convert prompt token IDs to tokens if necessary.
+        # Do it here so that we don't have to repeat this
+        # computation for each logprob.
+        if seq.tokens is None:
+            (seq.tokens, seq.prefix_offset,
+             seq.read_offset) = convert_prompt_ids_to_tokens(
+                 tokenizer=tokenizer,
+                 prompt_ids=all_input_ids[:-1],
+                 skip_special_tokens=prms.skip_special_tokens,
+             )
+
+        (new_tokens, new_decoded_token_text, prefix_offset,
+         read_offset) = detokenize_incrementally(
+             tokenizer=tokenizer,
+             all_input_ids=all_input_ids,
+             prev_tokens=seq.tokens,
+             prefix_offset=seq.prefix_offset,
+             read_offset=seq.read_offset,
+             skip_special_tokens=prms.skip_special_tokens,
+             spaces_between_special_tokens=prms.spaces_between_special_tokens,
+         )
+
+        # Decode logprobs
+        logprobs = seq.output_logprobs[-1]
+        if logprobs:
+            previous_tokens = all_input_ids[:-1]
+            for token_id, sample_logprob in logprobs.items():
+                # If the token was generated this iteration,
+                # use the provided text.
+                if token_id == token_id_generated_this_iteration:
+                    sample_logprob.decoded_token = new_decoded_token_text
+                    continue
+
+                if (sample_logprob.decoded_token is None
+                        and token_id != INVALID_TOKEN_ID):
+                    all_input_ids_with_logprob = previous_tokens + [token_id]
+                    (_, new_text, _, _) = detokenize_incrementally(
+                        tokenizer=tokenizer,
+                        all_input_ids=all_input_ids_with_logprob,
+                        prev_tokens=seq.tokens,
+                        prefix_offset=seq.prefix_offset,
+                        read_offset=seq.read_offset,
+                        skip_special_tokens=prms.skip_special_tokens,
+                        spaces_between_special_tokens=prms.
+                        spaces_between_special_tokens,
+                    )
+                    sample_logprob.decoded_token = new_text
+
+        if seq.tokens is None:
+            seq.tokens = new_tokens
+        else:
+            seq.tokens.extend(new_tokens)
+        seq.prefix_offset = prefix_offset
+        seq.read_offset = read_offset
+        seq.output_text += new_decoded_token_text
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index f7a1a19a89bcf..eebdacc4903ca 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -158,6 +158,34 @@ def _convert_tokens_to_string_with_added_encoders(
         return "".join(sub_texts)
 
 
+# 5 is an arbitrary value that should work for all
+# tokenizers (bigger = more conservative).
+INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET = 5
+
+
+def convert_prompt_ids_to_tokens(
+    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
+    prompt_ids: List[int],
+    skip_special_tokens: bool = False,
+) -> Tuple[List[str], int, int]:
+    """Converts the prompt ids to tokens and returns the tokens and offsets
+    for incremental detokenization.
+
+    Note that not all tokens are converted to strings. Only the tokens that
+    are necessary for incremental detokenization are converted to strings.
+    """
+    # Offset a little more in case we have special tokens.
+    prefix_offset = max(
+        len(prompt_ids) - INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET - 2, 0)
+    # We do not need to convert the whole prompt to tokens.
+    new_tokens = tokenizer.convert_ids_to_tokens(
+        prompt_ids[prefix_offset:], skip_special_tokens=skip_special_tokens)
+    prefix_offset = max(
+        len(new_tokens) - INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET, 0)
+    read_offset = len(new_tokens)
+    return new_tokens, prefix_offset, read_offset
+
+
 # Based on
 # https://github.com/huggingface/text-generation-inference/blob/v0.9.4/server/text_generation_server/models/model.py#L62C9-L62C15
 # under Apache 2.0 license
@@ -165,31 +193,53 @@ def detokenize_incrementally(
     tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
     all_input_ids: List[int],
     prev_tokens: Optional[List[str]],
-    prefix_offset: int = 0,
-    read_offset: int = 0,
+    prefix_offset: int,
+    read_offset: int,
     skip_special_tokens: bool = False,
     spaces_between_special_tokens: bool = True,
 ) -> Tuple[List[str], str, int, int]:
+    """Detokenizes the input ids incrementally and returns the new tokens
+    and the new text.
+
+    If `prev_tokens` is None, this function will convert the input ids to
+    tokens and return the tokens and the new text. Otherwise, it will return the
+    new tokens and the new text.
+
+    This function will also return the new prefix offset and the new read
+    offset to be used in the next iteration.
+
+    The offsets are necessary to defeat cleanup algorithms in the decode which
+    decide to add a space or not depending on the surrounding ids.
+
+    Args:
+        tokenizer: The tokenizer to use.
+        all_input_ids: The input ids. The last id is the new token id.
+        prev_tokens: The previous tokens. If None, this function will convert
+            the input ids to tokens and return the tokens and the new text.
+        prefix_offset: The prefix offset.
+        read_offset: The read offset.
+        skip_special_tokens: Whether to skip special tokens.
+        spaces_between_special_tokens: Whether to add spaces between special
+            tokens.
+    """
     new_token_id = all_input_ids[-1]
     # This is the first iteration for this sequence
-    if prev_tokens is None:
-        new_tokens = tokenizer.convert_ids_to_tokens(
-            all_input_ids, skip_special_tokens=skip_special_tokens)
-        output_tokens = new_tokens
-        # 5 is an arbitrary value that should work for all
-        # tokenizers (bigger = more conservative).
-        # Subtract 1 extra to account for the generated token.
-        prefix_offset = max(len(output_tokens) - 6, 0)
-        # If the first new token is a special token, we can't skip 1 extra token
-        if skip_special_tokens and new_token_id in tokenizer.all_special_ids:
-            read_offset = max(len(output_tokens), 0)
-        else:
-            read_offset = max(len(output_tokens) - 1, 0)
-    else:
-        # Put new_token_id in a list so skip_special_tokens is respected
-        new_tokens = tokenizer.convert_ids_to_tokens(
-            [new_token_id], skip_special_tokens=skip_special_tokens)
-        output_tokens = prev_tokens + new_tokens
+    is_first_iter = prev_tokens is None
+    if is_first_iter:
+        (prev_tokens, prefix_offset,
+         read_offset) = convert_prompt_ids_to_tokens(
+             tokenizer,
+             all_input_ids[:-1],
+             skip_special_tokens=skip_special_tokens)
+
+    # Put new_token_id in a list so skip_special_tokens is respected
+    new_tokens = tokenizer.convert_ids_to_tokens(
+        [new_token_id], skip_special_tokens=skip_special_tokens)
+    output_tokens = prev_tokens + new_tokens
+
+    # If this is the first iteration, return all tokens.
+    if is_first_iter:
+        new_tokens = output_tokens
 
     # The prefix text is necessary only to defeat cleanup algorithms in
     # the decode which decide to add a space or not depending on the

From 743a0b74021b466088924d1a1228031bdedba896 Mon Sep 17 00:00:00 2001
From: kota-iizuka <64062831+kota-iizuka@users.noreply.github.com>
Date: Sun, 24 Mar 2024 03:43:11 +0900
Subject: [PATCH 165/196] [Bugfix] use SoftLockFile instead of LockFile (#3578)

---
 vllm/model_executor/weight_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py
index 24d78db8d2637..0d7ee269922f4 100644
--- a/vllm/model_executor/weight_utils.py
+++ b/vllm/model_executor/weight_utils.py
@@ -34,7 +34,7 @@ def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None):
     lock_dir = cache_dir if cache_dir is not None else _vllm_filelocks_path
     os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
     lock_file_name = model_name_or_path.replace("/", "-") + ".lock"
-    lock = filelock.FileLock(os.path.join(lock_dir, lock_file_name))
+    lock = filelock.SoftFileLock(os.path.join(lock_dir, lock_file_name))
     return lock
 
 

From 3c5ab9b811da7a72af6459bc0c344644ebdc1ef6 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Sat, 23 Mar 2024 23:30:56 -0700
Subject: [PATCH 166/196] [Misc] Fix BLOOM copyright notice (#3591)

---
 vllm/model_executor/models/bloom.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py
index 851c475206661..7cf4370236a8b 100644
--- a/vllm/model_executor/models/bloom.py
+++ b/vllm/model_executor/models/bloom.py
@@ -1,7 +1,7 @@
 # coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/bloom/modeling_bloom.py
-# Copyright 2023 The CacheFlow team.
+# Copyright 2023 The vLLM team.
 # Copyright 2022 HuggingFace Inc. team and BigScience workshop.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

From f8a12ecc7f7ebcffe26e1ae405c6aa533fc400cd Mon Sep 17 00:00:00 2001
From: Roger Wang <136131678+ywang96@users.noreply.github.com>
Date: Sun, 24 Mar 2024 06:32:45 -0700
Subject: [PATCH 167/196] [Misc] Bump transformers version (#3592)

---
 requirements-rocm.txt | 4 ++--
 requirements.txt      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements-rocm.txt b/requirements-rocm.txt
index 07d94cd94f5fa..6acf70695cef8 100644
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -7,9 +7,9 @@ ray >= 2.9
 sentencepiece  # Required for LLaMA tokenizer.
 numpy
 tokenizers>=0.15.0
-transformers >= 4.39.0  # Required for StarCoder2.
+transformers >= 4.39.1  # Required for StarCoder2 & Llava.
 fastapi
 uvicorn[standard]
 pydantic >= 2.0  # Required for OpenAI server.
 prometheus_client >= 0.18.0
-outlines == 0.0.34
\ No newline at end of file
+outlines == 0.0.34
diff --git a/requirements.txt b/requirements.txt
index e136defad4943..eb9977d93dd8d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ ray >= 2.9
 sentencepiece  # Required for LLaMA tokenizer.
 numpy
 torch == 2.1.2
-transformers >= 4.39.0  # Required for StarCoder2.
+transformers >= 4.39.1  # Required for StarCoder2 & Llava.
 xformers == 0.0.23.post1  # Required for CUDA 12.1.
 fastapi
 uvicorn[standard]

From af9e53496fc4dfc01b4680c1f16e38687cb3a91a Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Sun, 24 Mar 2024 06:34:01 -0700
Subject: [PATCH 168/196] [BugFix] Fix Falcon tied embeddings (#3590)

Co-authored-by: 44670 <44670@users.noreply.github.com>
---
 vllm/model_executor/models/falcon.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py
index 7626dbe62293f..0a01796a96416 100644
--- a/vllm/model_executor/models/falcon.py
+++ b/vllm/model_executor/models/falcon.py
@@ -37,7 +37,7 @@
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.sampler import Sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    VocabParallelEmbedding, ParallelLMHead)
+    VocabParallelEmbedding)
 from vllm.model_executor.parallel_utils.communication_op import (
     tensor_model_parallel_all_reduce)
 from vllm.model_executor.parallel_utils.parallel_state import (
@@ -370,10 +370,7 @@ def __init__(
         self.config = config
         self.linear_method = linear_method
         self.transformer = FalconModel(config, linear_method)
-        self.lm_head = ParallelLMHead(
-            config.vocab_size,
-            config.hidden_size,
-        )
+        self.lm_head_weight = self.transformer.word_embeddings.weight
         self.logits_processor = LogitsProcessor(config.vocab_size)
         self.sampler = Sampler()
 
@@ -394,7 +391,7 @@ def forward(
 
     def compute_logits(self, hidden_states: torch.Tensor,
                        sampling_metadata: SamplingMetadata) -> torch.Tensor:
-        logits = self.logits_processor(self.lm_head.weight, hidden_states,
+        logits = self.logits_processor(self.lm_head_weight, hidden_states,
                                        sampling_metadata)
         return logits
 
@@ -419,9 +416,12 @@ def load_weights(self,
         else:
             total_num_kv_heads = total_num_heads
         num_query_heads_per_kv_head = total_num_heads // total_num_kv_heads
-        params_dict = dict(self.named_parameters())
+        params_dict = dict(self.named_parameters(remove_duplicate=False))
         for name, loaded_weight in hf_model_weights_iterator(
                 model_name_or_path, cache_dir, load_format, revision):
+            if name == "lm_head.weight":
+                # Falcon uses tied embeddings.
+                continue
             # Skip loading extra bias for GPTQ models.
             if name.endswith(".bias") and name not in params_dict:
                 continue

From d3c6ea8c6067b91478a5324ba55727e744eb7238 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 14:57:02 +0000
Subject: [PATCH 169/196] initial merge

---
 tests/models/test_models_logprobs.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/models/test_models_logprobs.py b/tests/models/test_models_logprobs.py
index 8878510bd0a93..1211f3f8837ee 100644
--- a/tests/models/test_models_logprobs.py
+++ b/tests/models/test_models_logprobs.py
@@ -16,12 +16,14 @@
     "gpt2",
     "bigcode/tiny_starcoder_py",
     "EleutherAI/gpt-j-6b",
-    "EleutherAI/pythia-1b",  # Switched to 1b model, 70m model logits too unstable.          # noqa
-    "bigscience/bloom-1b1",  # Switched to 1b model, 560m model logits too unstable.         # noqa
-    # "mosaicml/mpt-7b",     # Failing on the hf_runner, ignore for now.                     # noqa
+    "EleutherAI/pythia-1b",
+    "bigscience/bloom-1b1",
+    # "mosaicml/mpt-7b",                # vLLM upsbug in mpt right now  # noqa
     "microsoft/phi-2",
-    # "stabilityai/stablelm-3b-4e1t",   # vLLM bug looking up model in ModelRegistry, ignore for now.   # noqa
-    # "allenai/OLMo-1B",                # Failing on the hf_runner, ignore for now. (Wait for https://github.com/allenai/OLMo/pull/451 to land in transformers) # noqa
+    "stabilityai/stablelm-3b-4e1t",
+    "allenai/OLMo-1B",
+    "bigcode/starcoder2-3b",
+    "Qwen/Qwen1.5-0.5B",
 ]
 
 

From a828ef3afc8dd148e6fbc0b99b21660bb9a23e76 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:07:10 +0000
Subject: [PATCH 170/196] cleanup benchmark_prefix caching

---
 benchmarks/benchmark_prefix_caching.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py
index 5867e3b171919..dc18f181bd472 100644
--- a/benchmarks/benchmark_prefix_caching.py
+++ b/benchmarks/benchmark_prefix_caching.py
@@ -1,13 +1,10 @@
-# flake8: noqa
-# UPSTREAM SYNC: noqa is required for passing ruff run on nm-automation
-
 import argparse
 import time
 
 from vllm import LLM
 from vllm import SamplingParams
 
-PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n"
+PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n" # noqa: E501
 
 
 def test_prefix(llm=None, sampling_params=None, prompts=None):

From 6f6ab1cfc0be85a145720681b9b0a6d086607435 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:07:26 +0000
Subject: [PATCH 171/196] cleanup pybind

---
 csrc/pybind.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
index 6ab8843ca9e65..574a7a2a3de43 100644
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -68,7 +68,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   ops.def("awq_gemm", &awq_gemm, "Quantized GEMM for AWQ");
   ops.def("marlin_gemm", &marlin_gemm, "Marlin Optimized Quantized GEMM for GPTQ");
   ops.def("awq_dequantize", &awq_dequantize, "Dequantization for AWQ");
-  ops.def("marlin_gemm", &marlin_gemm, "Marlin Optimized Quantized GEMM for GPTQ");
 #endif
  
   ops.def("gptq_gemm", &gptq_gemm, "Quantized GEMM for GPTQ");

From 03b78a4cf64faabee6587d7f09f2a32352415851 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:10:42 +0000
Subject: [PATCH 172/196] cleanup requirements-dev.txt

---
 requirements-dev.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 00fa132b14c21..51fa57f068003 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 # formatting
 yapf==0.32.0
 toml==0.10.2

From 8c96a1c1f86460c2aa065b504fe5d8886accd76f Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:16:05 +0000
Subject: [PATCH 173/196] cleanup test skip comments

---
 tests/kernels/test_attention.py      | 4 +++-
 tests/kernels/test_cache.py          | 4 +++-
 tests/kernels/test_prefix_prefill.py | 4 +++-
 tests/lora/test_layers.py            | 7 +++++--
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/tests/kernels/test_attention.py b/tests/kernels/test_attention.py
index f03c77b0b8e34..c5e00b0f2ff0b 100644
--- a/tests/kernels/test_attention.py
+++ b/tests/kernels/test_attention.py
@@ -1,3 +1,4 @@
+# UPSTREAM SYNC: this file may need attention
 import random
 from typing import List, Optional, Tuple
 
@@ -134,8 +135,9 @@ def test_paged_attention(
     seed: int,
     device: str,
 ) -> None:
+    # UPSTREAM SYNC: needed to pass multi-gpu tests
     if (kv_cache_dtype == "fp8_e5m2" and device != "cuda:0"):
-        pytest.skip("Skip cuda:1 test for fp8 attention")
+        pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 
     random.seed(seed)
     torch.random.manual_seed(seed)
diff --git a/tests/kernels/test_cache.py b/tests/kernels/test_cache.py
index c12414a4cf05b..c5b39e51d55c2 100644
--- a/tests/kernels/test_cache.py
+++ b/tests/kernels/test_cache.py
@@ -1,3 +1,4 @@
+# UPSTREAM SYNC: this file may need attention
 import random
 
 import pytest
@@ -51,8 +52,9 @@ def test_copy_blocks(
     kv_cache_dtype: str,
     device: str,
 ) -> None:
+    # UPSTREAM SYNC: needed to pass multi-gpu tests
     if (kv_cache_dtype == "fp8_e5m2" and device != "cuda:0"):
-        pytest.skip("Skip cuda:1 test for fp8 attention")
+        pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 
     random.seed(seed)
     torch.random.manual_seed(seed)
diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index d41428e0a9ad3..831597c403229 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -1,3 +1,4 @@
+# UPSTREAM SYNC: this file may need attention
 import random
 import pytest
 import time
@@ -30,8 +31,9 @@ def test_contexted_kv_attention(
     dtype: torch.dtype,
     device: str,
 ) -> None:
+    # UPSTREAM SYNC: this is needed to pass multi-gpu tests
     if device != "cuda:0":
-        pytest.skip("Skipping context fwd attention for cuda > 0 for MVP")
+        pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 
     random.seed(0)
     torch.manual_seed(0)
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index 4f3b5b2037dd6..7b3a73bd98eff 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -1,3 +1,4 @@
+# UPSTREAM SYNC: this file may need attention
 import pytest
 import random
 from copy import deepcopy
@@ -172,6 +173,7 @@ def create_random_inputs(
 @pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_embeddings(dist_init, num_loras, device) -> None:
+    # UPSTREAM SYNC: needed to pass multi-gpu tests
     if device != "cuda:0":
         pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 
@@ -262,11 +264,10 @@ def create_random_embedding_layer():
 
 
 @torch.inference_mode()
-# @pytest.mark.skip(
-#     reason="Fails when loras are in any slot other than the first.")
 @pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_embeddings_with_new_embeddings(dist_init, num_loras, device) -> None:
+    # UPSTREAM SYNC: needed to pass multi-gpu tests
     if device != "cuda:0":
         pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 
@@ -522,6 +523,7 @@ def _pretest():
 @pytest.mark.parametrize("orientation", ["row", "column"])
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_linear_parallel(dist_init, num_loras, orientation, device) -> None:
+    # UPSTREAM SYNC: needed to pass multi-gpu tests
     if device != "cuda:0":
         pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 
@@ -623,6 +625,7 @@ def create_random_linear_parallel_layer():
 @pytest.mark.parametrize("repeats", [2, 3])
 @pytest.mark.parametrize("device", CUDA_DEVICES)
 def test_column_parallel_packed(dist_init, num_loras, repeats, device) -> None:
+    # UPSTREAM SYNC: needed to pass multi-gpu tests
     if device != "cuda:0":
         pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 

From 119bd05f504380cac7722a69bf69a41db7a55adb Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:19:26 +0000
Subject: [PATCH 174/196] cleanup model comments

---
 tests/models/test_mistral.py         | 4 ++--
 tests/models/test_models.py          | 4 ++--
 tests/models/test_models_logprobs.py | 2 +-
 tests/samplers/test_sampler.py       | 1 +
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py
index b6a031f2c7e18..1f9d2661f7f6b 100644
--- a/tests/models/test_mistral.py
+++ b/tests/models/test_mistral.py
@@ -1,4 +1,3 @@
-# This file has been modified by Neural Magic
 """Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
 
 Run `pytest tests/models/test_mistral.py --forked`.
@@ -10,7 +9,8 @@
 ]
 
 
-@pytest.mark.skip("running these on a10g results in process getting killed")
+# UPSTREAM SYNC: we run OOM on the A10g instances.
+@pytest.mark.skip("Not enough memory in automation testing.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["bfloat16"])
 @pytest.mark.parametrize("max_tokens", [128])
diff --git a/tests/models/test_models.py b/tests/models/test_models.py
index e95739dae77fd..b1636b4db0ee1 100644
--- a/tests/models/test_models.py
+++ b/tests/models/test_models.py
@@ -1,4 +1,3 @@
-# This file has been modified by Neural Magic
 # UPSTREAM SYNC: if any new models are added to this file, add them
 # to test_models_logprobs.py as well
 """Compare the outputs of HF and vLLM when using greedy sampling.
@@ -27,7 +26,8 @@
 ]
 
 
-@pytest.mark.skip("running these on a10g results in process getting killed")
+# UPSTREAM SYNC: we run OOM on the A10g instances.
+@pytest.mark.skip("Not enough memory in automation testing.")
 @pytest.mark.parametrize("model", MODELS)
 @pytest.mark.parametrize("dtype", ["float"])
 @pytest.mark.parametrize("max_tokens", [128])
diff --git a/tests/models/test_models_logprobs.py b/tests/models/test_models_logprobs.py
index 1211f3f8837ee..c2ab3c36e69d5 100644
--- a/tests/models/test_models_logprobs.py
+++ b/tests/models/test_models_logprobs.py
@@ -18,7 +18,7 @@
     "EleutherAI/gpt-j-6b",
     "EleutherAI/pythia-1b",
     "bigscience/bloom-1b1",
-    # "mosaicml/mpt-7b",                # vLLM upsbug in mpt right now  # noqa
+    # "mosaicml/mpt-7b",                # vLLM upstream bug in mpt right now  # noqa
     "microsoft/phi-2",
     "stabilityai/stablelm-3b-4e1t",
     "allenai/OLMo-1B",
diff --git a/tests/samplers/test_sampler.py b/tests/samplers/test_sampler.py
index f4808999e4531..ffe6951237bd3 100644
--- a/tests/samplers/test_sampler.py
+++ b/tests/samplers/test_sampler.py
@@ -1,3 +1,4 @@
+# UPSTREAM SYNC: devices need to be passed around to pass multi-gpu automation tests
 import random
 from typing import Tuple, List
 from unittest.mock import patch

From 018c9028da17e026e25630033a928f7cfd8ee07b Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:22:24 +0000
Subject: [PATCH 175/196] cleanup sampler

---
 tests/samplers/test_sampler.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/samplers/test_sampler.py b/tests/samplers/test_sampler.py
index ffe6951237bd3..90de805ccb2bc 100644
--- a/tests/samplers/test_sampler.py
+++ b/tests/samplers/test_sampler.py
@@ -33,8 +33,8 @@ def _prepare_test(
     fake_logits = torch.full((batch_size, vocab_size),
                              1e-2,
                              dtype=input_tensor.dtype)
-    # UPSTREAM SYNC: passing device required for multi-gpu tests
     sampler = MockLogitsSampler(fake_logits)
+    # UPSTREAM SYNC: passing device required for multi-gpu tests
     model_runner = ModelRunner(None, None, None, DeviceConfig(device=device),
                                None)
     return input_tensor, fake_logits, sampler, model_runner
@@ -78,6 +78,7 @@ def test_sampler_all_greedy(seed: int, device: str):
     set_random_seed(seed)
     torch.set_default_device(device)
     batch_size = random.randint(1, 256)
+    # UPSTREAM SYNC: passing device required for multi-gpu tests
     input_tensor, fake_logits, sampler, model_runner = _prepare_test(
         batch_size, device)
 
@@ -98,6 +99,7 @@ def test_sampler_all_random(seed: int, device: str):
     set_random_seed(seed)
     torch.set_default_device(device)
     batch_size = random.randint(1, 256)
+    # UPSTREAM SYNC: passing device required for multi-gpu tests
     input_tensor, fake_logits, sampler, model_runner = _prepare_test(
         batch_size, device)
 
@@ -198,6 +200,7 @@ def test_sampler_mixed(seed: int, device: str):
     set_random_seed(seed)
     torch.set_default_device(device)
     batch_size = random.randint(1, 256)
+    # UPSTREAM SYNC: passing device required for multi-gpu tests
     input_tensor, fake_logits, sampler, model_runner = _prepare_test(
         batch_size, device)
 

From 6844a99acd5d57ef0cd3fdf7242d4a4756844a67 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:25:38 +0000
Subject: [PATCH 176/196] cleanup config

---
 vllm/config.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vllm/config.py b/vllm/config.py
index 0a49a24dad9ea..9a5f3efb2d0d2 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 from typing import TYPE_CHECKING, Optional, Union, ClassVar
 from dataclasses import dataclass
 import os
@@ -83,6 +81,7 @@ def __init__(
         tokenizer_revision: Optional[str] = None,
         max_model_len: Optional[int] = None,
         quantization: Optional[str] = None,
+        # UPSTREAM SYNC: keep sparsity
         sparsity: Optional[str] = None,
         enforce_eager: bool = False,
         max_context_len_to_capture: Optional[int] = None,
@@ -99,6 +98,7 @@ def __init__(
         self.code_revision = code_revision
         self.tokenizer_revision = tokenizer_revision
         self.quantization = quantization
+        # UPSTREAM SYNC: keep sparsity
         self.sparsity = sparsity
         self.enforce_eager = enforce_eager
         self.max_context_len_to_capture = max_context_len_to_capture
@@ -127,6 +127,7 @@ def __init__(
         self._verify_load_format()
         self._verify_tokenizer_mode()
         self._verify_quantization()
+        # UPSTREAM SYNC: keep sparsity
         self._verify_sparsity()
         self._verify_cuda_graph()
 
@@ -166,6 +167,7 @@ def _verify_tokenizer_mode(self) -> None:
                 "either 'auto' or 'slow'.")
         self.tokenizer_mode = tokenizer_mode
 
+    # UPSTREAM SYNC: keep sparsity
     def _verify_sparsity(self) -> None:
         supported_sparsity = ["sparse_w16a16", "semi_structured_sparse_w16a16"]
 
@@ -200,7 +202,6 @@ def _verify_quantization(self) -> None:
         # Parse quantization method from the HF model config, if available.
         hf_quant_config = getattr(self.hf_config, "quantization_config", None)
         if hf_quant_config is not None:
-
             hf_quant_method = str(hf_quant_config["quant_method"]).lower()
 
             # If the GPTQ model is serialized in marlin format, use marlin.

From 474ccb7249619eed922e7324e1102415089b5de7 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:28:01 +0000
Subject: [PATCH 177/196] fixed block allocator to match upstream (bad merge)

---
 tests/kernels/test_prefix_prefill.py |  2 +-
 vllm/core/block_manager.py           | 72 +++-------------------------
 2 files changed, 8 insertions(+), 66 deletions(-)

diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index 831597c403229..051a79cb0ef44 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -31,7 +31,7 @@ def test_contexted_kv_attention(
     dtype: torch.dtype,
     device: str,
 ) -> None:
-    # UPSTREAM SYNC: this is needed to pass multi-gpu tests
+    # UPSTREAM SYNC: needed to pass multi-gpu tests
     if device != "cuda:0":
         pytest.skip("Skipping multi-gpu tests for now [ bad test setup ]")
 
diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 026a718b9afcb..857ae58d93124 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -145,12 +145,12 @@ class UncachedBlockAllocator(BlockAllocatorBase):
     the reference count becomes zero, the block is added back to the free list.
     """
 
-    def __init__(self,
-                 device: Device,
-                 block_size: int,
-                 num_blocks: int,
-                 eviction_policy: EvictionPolicy = EvictionPolicy.LRU,
-                 enable_caching: bool = False) -> None:
+    def __init__(
+        self,
+        device: Device,
+        block_size: int,
+        num_blocks: int,
+    ) -> None:
         self.device = device
         self.block_size = block_size
         self.num_blocks = num_blocks
@@ -179,12 +179,7 @@ def free(self, block: PhysicalTokenBlock) -> None:
             raise ValueError(f"Double free! {block} is already freed.")
         block.ref_count -= 1
         if block.ref_count == 0:
-            assert block.block_hash not in self.evictor
-            self.evictor.add(block)
-
-            # If caching is enabled, remove the block from the cached_blocks
-            if self.enable_caching:
-                del self.cached_blocks[block.block_hash]
+            self.free_blocks.append(block)
 
     def get_num_free_blocks(self) -> int:
         return len(self.free_blocks)
@@ -539,56 +534,3 @@ def get_num_free_gpu_blocks(self) -> int:
 
     def get_num_free_cpu_blocks(self) -> int:
         return self.cpu_allocator.get_num_free_blocks()
-
-    def access_all_blocks_in_seq(
-        self,
-        seq: Sequence,
-        access_time: float,
-    ) -> None:
-        if self.enable_caching:
-            # Update the last accessed time of all the blocks accessed
-            # in this step.
-            block_table = self.block_tables[seq.seq_id]
-            for block in block_table:
-                block.last_accessed = access_time
-
-    def compute_full_blocks_in_seq(self, seq: Sequence):
-        if seq.seq_id not in self.block_tables:
-            return
-        max_full_block = seq.get_len() // self.block_size - 1
-        block_table = self.block_tables[seq.seq_id]
-        if max_full_block == -1:
-            return
-        for i in reversed(range(max_full_block)):
-            if block_table[i].computed:
-                break
-            block_table[i].computed = True
-
-    def get_all_computed_blocks(self, seq: Sequence) -> List[int]:
-        if seq.seq_id not in self.block_tables:
-            return []
-        block_table = self.block_tables[seq.seq_id]
-        # NOTE We exclude the last block to avoid the case where the entire
-        # prompt is cached. This would cause erroneous behavior in model
-        # runner.
-        return [
-            b.block_number
-            for b in takewhile(lambda b: b.computed, block_table[:-1])
-        ]
-
-    def get_common_computed_block_ids(self,
-                                      seq_group: SequenceGroup) -> List[int]:
-        # Can return non-empty result only with prefix caching enabled.
-        if not self.enable_caching:
-            return []
-
-        ids_list = [
-            self.get_all_computed_blocks(seq)
-            for seq in iter(seq_group.seqs_dict.values())
-        ]
-        return commonprefix([ids for ids in ids_list if ids != []])
-
-    def mark_blocks_as_computed(self, seq_group: SequenceGroup):
-        if self.enable_caching:
-            for seq in seq_group.seqs_dict.values():
-                self.compute_full_blocks_in_seq(seq)

From ab76a09eb58257b22730436dd4d0d3ee84d26049 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:29:27 +0000
Subject: [PATCH 178/196] cleanup engine args

---
 vllm/engine/arg_utils.py  | 4 +++-
 vllm/engine/llm_engine.py | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 6cd66041af423..088547c746fb0 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -39,6 +39,7 @@ class EngineArgs:
     code_revision: Optional[str] = None
     tokenizer_revision: Optional[str] = None
     quantization: Optional[str] = None
+    # UPSTREAM SYNC: keep sparsity argument
     sparsity: Optional[str] = None
     enforce_eager: bool = False
     max_context_len_to_capture: int = 8192
@@ -237,6 +238,7 @@ def add_cli_args(
                             'None, we assume the model weights are not '
                             'quantized and use `dtype` to determine the data '
                             'type of the weights.')
+        # UPSTREAM SYNC: keep sparsity argument
         parser.add_argument(
             '--sparsity',
             '-s',
@@ -345,7 +347,7 @@ def create_engine_configs(
             self.trust_remote_code, self.download_dir, self.load_format,
             self.dtype, self.seed, self.revision, self.code_revision,
             self.tokenizer_revision, self.max_model_len, self.quantization,
-            # UPSTREAM SYNC: make sure sparsity argument is included.
+            # UPSTREAM SYNC: keep sparsity argument
             self.sparsity, self.enforce_eager, self.max_context_len_to_capture,
             self.max_logprobs)
         cache_config = CacheConfig(self.block_size,
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index f668e5f746834..23c1aba0b3ad9 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -15,7 +15,7 @@
 from vllm.logger import init_logger
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import SamplingParams
-from vllm.sequence import (Logprob, SamplerOutput, Sequence, SequenceGroup,
+from vllm.sequence import (SamplerOutput, Sequence, SequenceGroup,
                            SequenceGroupOutput, SequenceOutput, SequenceStatus)
 from vllm.transformers_utils.tokenizer_group import (BaseTokenizerGroup,
                                                      get_tokenizer_group)

From 519c6fae34f24df4ea1f3576b655b36537887455 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:29:51 +0000
Subject: [PATCH 179/196] cleanup llm-engine

---
 vllm/engine/llm_engine.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 23c1aba0b3ad9..749de65e6e5dd 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -81,6 +81,7 @@ def __init__(
             f"disable_custom_all_reduce="
             f"{parallel_config.disable_custom_all_reduce}, "
             f"quantization={model_config.quantization}, "
+            # UPSTREAM SYNC: keep sparsity
             f"sparsity={model_config.sparsity}, "
             f"enforce_eager={model_config.enforce_eager}, "
             f"kv_cache_dtype={cache_config.cache_dtype}, "

From 767bf232b66991ba23a7a55eb8c771df2af5755f Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:31:07 +0000
Subject: [PATCH 180/196] cleanup LLM front end

---
 vllm/entrypoints/llm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 87f678b6025b6..0851558690663 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 from typing import List, Optional, Union
 
 from tqdm import tqdm
@@ -83,6 +81,7 @@ def __init__(
         tensor_parallel_size: int = 1,
         dtype: str = "auto",
         quantization: Optional[str] = None,
+        # UPSTREAM SYNC: keep sparsity
         sparsity: Optional[str] = None,
         revision: Optional[str] = None,
         tokenizer_revision: Optional[str] = None,
@@ -104,6 +103,7 @@ def __init__(
             tensor_parallel_size=tensor_parallel_size,
             dtype=dtype,
             quantization=quantization,
+            # UPSTREAM SYNC: keep sparsity
             sparsity=sparsity,
             revision=revision,
             tokenizer_revision=tokenizer_revision,

From 8788f27dbbd57e2693c8cd85db6335276f79e719 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:32:20 +0000
Subject: [PATCH 181/196] minor cleanups

---
 vllm/entrypoints/openai/api_server.py | 1 -
 vllm/model_executor/__init__.py       | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 7967e27173140..a0685a4d38fbe 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -3,7 +3,6 @@
 import os
 import importlib
 import inspect
-import ssl
 
 from prometheus_client import make_asgi_app
 import fastapi
diff --git a/vllm/model_executor/__init__.py b/vllm/model_executor/__init__.py
index 0cca59d51c163..5f3c78360e2d7 100644
--- a/vllm/model_executor/__init__.py
+++ b/vllm/model_executor/__init__.py
@@ -1,6 +1,6 @@
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.model_executor.utils import set_random_seed, get_model
+from vllm.model_executor.utils import set_random_seed
 
 __all__ = [
     "InputMetadata",

From acd2876d19abe02a91a47351ce2d9e08d8456fca Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:36:00 +0000
Subject: [PATCH 182/196] linear

---
 vllm/model_executor/layers/linear.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py
index 131f1ea2208b2..6f83435774a56 100644
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional
 
@@ -15,6 +13,7 @@
     divide, split_tensor_along_last_dim)
 from vllm.model_executor.utils import set_weight_attrs
 from vllm.logger import init_logger
+# UPSTREAM SYNC: keep LazyCompressedParameter
 from vllm.model_executor.layers.parameters import LazyCompressedParameter
 
 logger = init_logger(__name__)
@@ -203,7 +202,6 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
         tp_rank = get_tensor_model_parallel_rank()
         output_dim = getattr(param, "output_dim", None)
         param_data = param.data
-
         if output_dim is not None:
             shard_size = param_data.shape[output_dim]
             start_idx = tp_rank * shard_size
@@ -212,6 +210,7 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
         assert param_data.shape == loaded_weight.shape
         param_data.copy_(loaded_weight)
 
+        # UPSTREAM SYNC: keep compress in place
         if isinstance(param, LazyCompressedParameter):
             param.compress()
 
@@ -262,6 +261,7 @@ def __init__(
         linear_method: Optional[LinearMethodBase] = None,
     ):
         self.output_sizes = output_sizes
+        # UPSTREAM SYNC: needed for LazyCompressedParameter
         self.loaded_shards = set()
         tp_size = get_tensor_model_parallel_world_size()
         assert all(output_size % tp_size == 0 for output_size in output_sizes)
@@ -334,10 +334,12 @@ def weight_loader(self,
                     "MergedColumnParallelLinear, assume the weight is "
                     "the same for all partitions.")
 
+        # UPSTREAM SYNC: needed for LazyCompressedParameter
         self.loaded_shards.add(loaded_shard_id)
         assert param_data.shape == loaded_weight.shape
         param_data.copy_(loaded_weight)
 
+        # UPSTREAM SYNC: needed for LazyCompressedParameter
         # This is super hacky for now but we basically want to only compress
         # once all of the shards are loaded, right now we just check if the
         # number of shards loaded matches the number of outputs expected,
@@ -388,6 +390,7 @@ def __init__(
         if total_num_kv_heads is None:
             total_num_kv_heads = total_num_heads
         self.total_num_kv_heads = total_num_kv_heads
+        # UPSTREAM SYNC: needed for LazyCompressedParameter
         self.loaded_shards = set()
         # Divide the weight matrix along the last dimension.
         tp_size = get_tensor_model_parallel_world_size()
@@ -488,11 +491,11 @@ def weight_loader(self,
         assert param_data.shape == loaded_weight.shape
         param_data.copy_(loaded_weight)
 
-        self.loaded_shards.add(loaded_shard_id)
-
+        # UPSTREAM SYNC: needed for LazyCompressedParameter
         # This is super hacky for now but we basically want to only
         # compress once all of the shards are loaded, for the QKV matrix
         # this means loading shards "q", "k" and "v"
+        self.loaded_shards.add(loaded_shard_id)
         all_shards_loaded = (self.loaded_shards == set(["q", "k", "v"]))
         if all_shards_loaded and isinstance(param, LazyCompressedParameter):
             param.compress()
@@ -586,6 +589,7 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
         assert param_data.shape == loaded_weight.shape
         param_data.copy_(loaded_weight)
 
+        # UPSTREAM SYNC: needed for LazyCompressedParameter
         if isinstance(param, LazyCompressedParameter):
             param.compress()
 

From 23e29a90fc0af197d7836c6f4a48d90c03261250 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:37:41 +0000
Subject: [PATCH 183/196] various cleanups

---
 vllm/model_executor/layers/quantization/__init__.py | 2 --
 vllm/model_executor/model_loader.py                 | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py
index 1891ed4c2b8ee..af27b1844cea4 100644
--- a/vllm/model_executor/layers/quantization/__init__.py
+++ b/vllm/model_executor/layers/quantization/__init__.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 from typing import Type
 
 from vllm.model_executor.layers.quantization.base_config import (
diff --git a/vllm/model_executor/model_loader.py b/vllm/model_executor/model_loader.py
index b19e9083c696d..03cdcc913de6e 100644
--- a/vllm/model_executor/model_loader.py
+++ b/vllm/model_executor/model_loader.py
@@ -1,4 +1,3 @@
-# This file has been modified by Neural Magic
 """Utilities for selecting and loading models."""
 import contextlib
 from typing import Type
@@ -63,6 +62,7 @@ def get_model(model_config: ModelConfig, device_config: DeviceConfig,
                 f"method {model_config.quantization}. Supported dtypes: "
                 f"{supported_dtypes}")
         linear_method = quant_config.get_linear_method()
+    # UPSTREAM SYNC: needed to support sparsity
     if model_config.sparsity is not None:
         sparse_config = get_sparse_config(model_config)
         capability = torch.cuda.get_device_capability()

From d6bd5dc2439e4cb976841fe2b30089d2092125f5 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:38:43 +0000
Subject: [PATCH 184/196] fixed Neuron

---
 vllm/model_executor/models/__init__.py | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
index 069830c4d7cb5..efadb1c504ca8 100755
--- a/vllm/model_executor/models/__init__.py
+++ b/vllm/model_executor/models/__init__.py
@@ -4,7 +4,7 @@
 import torch.nn as nn
 
 from vllm.logger import init_logger
-from vllm.utils import is_hip, is_neuron
+from vllm.utils import is_hip
 
 logger = init_logger(__name__)
 
@@ -63,12 +63,6 @@
     "Sliding window attention is not yet supported in ROCm's flash attention",
 }
 
-# Models supported by Neuron.
-_NEURON_SUPPORTED_MODELS = {
-    "LlamaForCausalLM": "neuron.llama",
-    "MistralForCausalLM": "neuron.mistral"
-}
-
 
 class ModelRegistry:
 
@@ -85,15 +79,8 @@ def load_model_cls(model_arch: str) -> Optional[Type[nn.Module]]:
                 logger.warning(
                     f"Model architecture {model_arch} is partially supported "
                     "by ROCm: " + _ROCM_PARTIALLY_SUPPORTED_MODELS[model_arch])
-        elif is_neuron():
-            if model_arch not in _NEURON_SUPPORTED_MODELS:
-                raise ValueError(
-                    f"Model architecture {model_arch} is not supported by "
-                    "Neuron for now.")
 
         module_name, model_cls_name = _MODELS[model_arch]
-        if is_neuron():
-            module_name = _NEURON_SUPPORTED_MODELS[model_arch]
         module = importlib.import_module(
             f"vllm.model_executor.models.{module_name}")
         return getattr(module, model_cls_name, None)

From fa7482a6e2ea63784066c3c39c2fea3d158b95fb Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:39:38 +0000
Subject: [PATCH 185/196] removed neuron models

---
 vllm/model_executor/models/neuron/llama.py   | 79 -------------------
 vllm/model_executor/models/neuron/mistral.py | 82 --------------------
 2 files changed, 161 deletions(-)
 delete mode 100644 vllm/model_executor/models/neuron/llama.py
 delete mode 100755 vllm/model_executor/models/neuron/mistral.py

diff --git a/vllm/model_executor/models/neuron/llama.py b/vllm/model_executor/models/neuron/llama.py
deleted file mode 100644
index e2856da99d9b1..0000000000000
--- a/vllm/model_executor/models/neuron/llama.py
+++ /dev/null
@@ -1,79 +0,0 @@
-"""Inference-only LLaMA model compatible with HuggingFace weights."""
-import os
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from transformers import LlamaConfig
-
-from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.sampler import Sampler
-from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.sequence import SamplerOutput
-
-KVCache = Tuple[torch.Tensor, torch.Tensor]
-
-
-class LlamaForCausalLM(nn.Module):
-
-    def __init__(
-        self,
-        config: LlamaConfig,
-        linear_method=None,
-    ) -> None:
-        super().__init__()
-        self.config = config
-        self.linear_method = linear_method
-        self.model = None
-        self.sampler = Sampler(config.vocab_size)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        positions: torch.Tensor,
-        kv_caches: List[KVCache],
-        input_metadata: InputMetadata,
-    ) -> torch.Tensor:
-        with torch.inference_mode():
-            block_size = self.model.context_buckets[-1]
-            if input_metadata.is_prompt:
-                seq_ids = input_metadata.slot_mapping[:, 0] // block_size
-            else:
-                seq_ids = input_metadata.block_tables
-            logits = self.model(input_ids,
-                                cache_ids=positions,
-                                start_ids=seq_ids.flatten())
-        return logits
-
-    def sample(
-        self,
-        hidden_states: torch.Tensor,
-        sampling_metadata: SamplingMetadata,
-    ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.model.chkpt_model.lm_head,
-                                   hidden_states, sampling_metadata)
-        return next_tokens
-
-    def load_weights(self,
-                     model_name_or_path: str,
-                     cache_dir: Optional[str] = None,
-                     load_format: str = "auto",
-                     revision: Optional[str] = None,
-                     **kwargs):
-        from transformers_neuronx.llama.model import LlamaForSampling
-
-        split_model_dir = f"{model_name_or_path}-split"
-        if os.path.isdir(os.path.join(model_name_or_path,
-                                      "pytorch_model.bin")):
-            split_model_dir = model_name_or_path
-        elif not os.path.exists(f"{model_name_or_path}-split"):
-            from transformers.models.llama import LlamaForCausalLM
-            from transformers_neuronx.module import save_pretrained_split
-
-            hf_model = LlamaForCausalLM.from_pretrained(model_name_or_path,
-                                                        low_cpu_mem_usage=True)
-            save_pretrained_split(hf_model, f"{model_name_or_path}-split")
-
-        self.model = LlamaForSampling.from_pretrained(split_model_dir,
-                                                      **kwargs)
-        self.model.to_neuron()
diff --git a/vllm/model_executor/models/neuron/mistral.py b/vllm/model_executor/models/neuron/mistral.py
deleted file mode 100755
index a302cce30abab..0000000000000
--- a/vllm/model_executor/models/neuron/mistral.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""Inference-only Mistral model compatible with HuggingFace weights."""
-from typing import List, Optional, Tuple
-
-import torch
-from torch import nn
-from transformers import MistralConfig
-
-from vllm.model_executor.input_metadata import InputMetadata
-from vllm.model_executor.layers.sampler import Sampler
-from vllm.model_executor.sampling_metadata import SamplingMetadata
-from vllm.sequence import SamplerOutput
-import os
-
-KVCache = Tuple[torch.Tensor, torch.Tensor]
-
-
-class MistralForCausalLM(nn.Module):
-
-    def __init__(
-        self,
-        config: MistralConfig,
-        linear_method=None,
-    ) -> None:
-        super().__init__()
-        self.config = config
-        self.linear_method = linear_method
-        self.model = None
-        self.lm_head = None
-        self.sampler = Sampler(config.vocab_size)
-
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        positions: torch.Tensor,
-        kv_caches: List[KVCache],
-        input_metadata: InputMetadata,
-    ) -> SamplerOutput:
-        with torch.inference_mode():
-            seq_ids = []
-            block_size = self.model.context_buckets[-1]
-            if input_metadata.is_prompt:
-                seq_ids = input_metadata.slot_mapping[:, 0] // block_size
-            else:
-                seq_ids = input_metadata.block_tables
-
-            logits = self.model(input_ids,
-                                cache_ids=positions,
-                                start_ids=seq_ids)
-        return logits
-
-    def sample(
-        self,
-        hidden_states: torch.Tensor,
-        sampling_metadata: SamplingMetadata,
-    ) -> Optional[SamplerOutput]:
-        next_tokens = self.sampler(self.model.chkpt_model.lm_head,
-                                   hidden_states, sampling_metadata)
-        return next_tokens
-
-    def load_weights(self,
-                     model_name_or_path: str,
-                     cache_dir: Optional[str] = None,
-                     load_format: str = "auto",
-                     revision: Optional[str] = None,
-                     **kwargs):
-        from transformers_neuronx.mistral.model import MistralForSampling
-
-        split_model_dir = f"{model_name_or_path}-split"
-        if os.path.isdir(os.path.join(model_name_or_path,
-                                      "pytorch_model.bin")):
-            split_model_dir = model_name_or_path
-        elif not os.path.exists(f"{model_name_or_path}-split"):
-            from transformers import MistralForCausalLM
-            from transformers_neuronx.module import save_pretrained_split
-
-            hf_model = MistralForCausalLM.from_pretrained(
-                model_name_or_path, low_cpu_mem_usage=True)
-            save_pretrained_split(hf_model, f"{model_name_or_path}-split")
-
-        self.model = MistralForSampling.from_pretrained(
-            split_model_dir, **kwargs)
-        self.model.to_neuron()

From 571bbf7ec5788dbfec22c57d651e69e3fe6257dc Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:46:08 +0000
Subject: [PATCH 186/196] starcoder tmp fix

---
 vllm/model_executor/utils.py                  | 17 ------
 vllm/model_executor/weight_utils.py           |  7 ++-
 vllm/test_utils.py                            |  3 +-
 vllm/transformers_utils/config.py             |  9 ---
 vllm/transformers_utils/configs/starcoder2.py | 55 -------------------
 5 files changed, 6 insertions(+), 85 deletions(-)
 delete mode 100644 vllm/transformers_utils/configs/starcoder2.py

diff --git a/vllm/model_executor/utils.py b/vllm/model_executor/utils.py
index 0113e3edf0675..336bc1cd005cf 100644
--- a/vllm/model_executor/utils.py
+++ b/vllm/model_executor/utils.py
@@ -1,18 +1,10 @@
 """Utils for model executor."""
 import random
-import importlib
 from typing import Any, Dict, Optional
 
 import numpy as np
 import torch
 
-from vllm.config import DeviceConfig, ModelConfig
-
-DEVICE_TO_MODEL_LOADER_MAP = {
-    "cuda": "model_loader",
-    "neuron": "neuron_model_loader",
-}
-
 
 def set_random_seed(seed: int) -> None:
     random.seed(seed)
@@ -41,12 +33,3 @@ def set_weight_attrs(
         assert not hasattr(
             weight, key), (f"Overwriting existing tensor attribute: {key}")
         setattr(weight, key, value)
-
-
-def get_model(model_config: ModelConfig, device_config: DeviceConfig,
-              **kwargs) -> torch.nn.Module:
-    model_loader_module = DEVICE_TO_MODEL_LOADER_MAP[device_config.device_type]
-    imported_model_loader = importlib.import_module(
-        f"vllm.model_executor.{model_loader_module}")
-    get_model_fn = imported_model_loader.get_model
-    return get_model_fn(model_config, device_config, **kwargs)
diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py
index 4b10d7b14e5be..1c79ee5e08268 100644
--- a/vllm/model_executor/weight_utils.py
+++ b/vllm/model_executor/weight_utils.py
@@ -1,4 +1,3 @@
-# This file has been modified by Neural Magic
 """Utilities for downloading and initializing model weights."""
 import filelock
 import glob
@@ -18,6 +17,7 @@
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import (get_quantization_config,
                                                      QuantizationConfig)
+# UPSTREAM SYNC: needed for sparsity
 from vllm.model_executor.layers.parameters import LazyCompressedParameter
 
 logger = init_logger(__name__)
@@ -88,7 +88,8 @@ def convert_bin_to_safetensor_file(
             raise RuntimeError(f"The output tensors do not match for key {k}")
 
 
-# TODO(rib-2): Once we define hf_sparsity_config
+# UPSTREAM SYNC: needed for sparsity
+# TODO: (MLE) load compressed models from here
 def get_sparse_config(model_config: ModelConfig):
     from vllm.model_executor.layers.sparsity import get_sparsity_config
     sparsity_cls = get_sparsity_config(model_config.sparsity)
@@ -294,11 +295,13 @@ def convert_pyslice_to_tensor(x: Any) -> torch.Tensor:
     return x
 
 
+# UPSTEAM SYNC: Parameter needed for LazyCompressedParameter
 def default_weight_loader(param: torch.nn.Parameter,
                           loaded_weight: torch.Tensor) -> None:
     """Default weight loader."""
     assert param.size() == loaded_weight.size()
     param.data.copy_(loaded_weight)
+    # UPSTREAM SYNC: needed for sparsity
     if isinstance(param, LazyCompressedParameter):
         param.compress()
 
diff --git a/vllm/test_utils.py b/vllm/test_utils.py
index e2f17d286009a..2cbde7cc8e5e9 100644
--- a/vllm/test_utils.py
+++ b/vllm/test_utils.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 import ray
 
 from vllm.config import ParallelConfig
@@ -30,6 +28,7 @@ def multi_process_tensor_parallel(
 ) -> None:
     # Using ray helps debugging the error when it failed
     # as compared to multiprocessing.
+    # UPSTREAM SYNC: reinit error needed for NM automation
     ray.init(ignore_reinit_error=True)
 
     distributed_init_port = get_open_port()
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index c884cdbe2ae8d..dc226248910e2 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -17,15 +17,6 @@ def get_config(model: str,
                trust_remote_code: bool,
                revision: Optional[str] = None,
                code_revision: Optional[str] = None) -> PretrainedConfig:
-    # FIXME(woosuk): This is a temporary fix for StarCoder2.
-    # Remove this when the model is supported by HuggingFace transformers.
-    if "bigcode" in model and "starcoder2" in model:
-        config_class = _CONFIG_REGISTRY["starcoder2"]
-        config = config_class.from_pretrained(model,
-                                              revision=revision,
-                                              code_revision=code_revision)
-        return config
-
     try:
         config = AutoConfig.from_pretrained(
             model,
diff --git a/vllm/transformers_utils/configs/starcoder2.py b/vllm/transformers_utils/configs/starcoder2.py
deleted file mode 100644
index 2879cd0445275..0000000000000
--- a/vllm/transformers_utils/configs/starcoder2.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from transformers import PretrainedConfig
-
-
-class Starcoder2Config(PretrainedConfig):
-    model_type = "starcoder2"
-    keys_to_ignore_at_inference = ["past_key_values"]
-
-    def __init__(
-        self,
-        vocab_size=49152,
-        hidden_size=3072,
-        intermediate_size=12288,
-        num_hidden_layers=30,
-        num_attention_heads=24,
-        num_key_value_heads=2,
-        hidden_act="gelu_pytorch_tanh",
-        max_position_embeddings=4096,
-        initializer_range=0.018042,
-        norm_epsilon=1e-5,
-        use_cache=True,
-        bos_token_id=50256,
-        eos_token_id=50256,
-        rope_theta=10000.0,
-        sliding_window=None,
-        attention_dropout=0.0,
-        residual_dropout=0.0,
-        embedding_dropout=0.0,
-        use_bias=True,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.hidden_size = hidden_size
-        self.intermediate_size = intermediate_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.sliding_window = sliding_window
-        self.use_bias = use_bias
-        self.num_key_value_heads = num_key_value_heads
-        self.hidden_act = hidden_act
-        self.initializer_range = initializer_range
-        self.norm_epsilon = norm_epsilon
-        self.use_cache = use_cache
-        self.rope_theta = rope_theta
-        self.attention_dropout = attention_dropout
-        self.residual_dropout = residual_dropout
-        self.embedding_dropout = embedding_dropout
-
-        super().__init__(
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            **kwargs,
-        )
-        if self.architectures is None:
-            self.architectures = ['Starcoder2ForCausalLM']

From 281e3c5e585e9ad6207bfc141bec9b74591f1df2 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:47:15 +0000
Subject: [PATCH 187/196] final neuron fixes

---
 vllm/worker/cache_engine.py | 4 ----
 vllm/worker/model_runner.py | 5 -----
 2 files changed, 9 deletions(-)

diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
index 6f9626c9e4c51..307b7b778cb3f 100644
--- a/vllm/worker/cache_engine.py
+++ b/vllm/worker/cache_engine.py
@@ -38,10 +38,6 @@ def __init__(
         self.num_gpu_blocks = cache_config.num_gpu_blocks
         self.num_cpu_blocks = cache_config.num_cpu_blocks
 
-        # Skip initializing CUDA stream and buffer for Neuron backend.
-        if is_neuron():
-            return
-
         if cache_config.cache_dtype == "auto":
             self.dtype = model_config.dtype
         else:
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index 404224344442d..b8eeb51379f49 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -1,5 +1,4 @@
 import contextlib
-import dataclasses
 import time
 from typing import Dict, List, Optional, Tuple, Set
 
@@ -86,10 +85,6 @@ def __init__(
         self.pin_memory = is_pin_memory_available()
         self.kv_cache_dtype = kv_cache_dtype
 
-        # Set enforce_eager to True for Neuron backend, to avoid capturing graph
-        if self.device_config.is_neuron:
-            self.model_config.enforce_eager = True
-
     def load_model(self) -> None:
         with CudaMemoryProfiler() as m:
             self.model = get_model(self.model_config,

From 2ec44fdbc4dcbc7c9f5cdfe0633601747652876e Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:56:44 +0000
Subject: [PATCH 188/196] small cleanups

---
 csrc/ops.h                                  | 2 --
 csrc/pybind.cpp                             | 2 --
 setup.py                                    | 1 -
 tests/conftest.py                           | 3 +--
 tests/distributed/test_custom_all_reduce.py | 2 --
 tests/entrypoints/test_openai_server.py     | 2 --
 tests/kernels/test_attention.py             | 1 -
 tests/kernels/test_cache.py                 | 1 -
 tests/kernels/test_prefix_prefill.py        | 1 -
 tests/lora/test_layers.py                   | 1 -
 tests/lora/test_mixtral.py                  | 1 +
 11 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/csrc/ops.h b/csrc/ops.h
index 3f111ac488066..d5d6e240da7c4 100644
--- a/csrc/ops.h
+++ b/csrc/ops.h
@@ -1,5 +1,3 @@
-// This file has been modified by Neural Magic
-
 #pragma once
 
 #include <torch/extension.h>
diff --git a/csrc/pybind.cpp b/csrc/pybind.cpp
index 574a7a2a3de43..a5c6439fd6909 100644
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -1,5 +1,3 @@
-// This file has been modified by Neural Magic
-
 #include "cache.h"
 #include "cuda_utils.h"
 #include "ops.h"
diff --git a/setup.py b/setup.py
index 8231cfeac4e51..e01cf89af6997 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,6 @@
 # UPSTREAM SYNC: noqa is required for passing ruff.
 # This file has been modified by Neural Magic
 
-import contextlib
 import io
 import os
 import re
diff --git a/tests/conftest.py b/tests/conftest.py
index 3e827f25bca4c..8b4ebb4a35b19 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 import os
 from typing import List, Optional, Tuple
 
@@ -159,6 +157,7 @@ def hf_runner():
     return HfRunner
 
 
+# UPSTREAM SYNC: needed for nm-automation
 class HfRunnerNM(HfRunner):
 
     def generate_greedy_logprobs_nm(
diff --git a/tests/distributed/test_custom_all_reduce.py b/tests/distributed/test_custom_all_reduce.py
index 1e45e4e9e2030..9b797f6705628 100644
--- a/tests/distributed/test_custom_all_reduce.py
+++ b/tests/distributed/test_custom_all_reduce.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 import random
 
 import os
diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py
index 77c2f54e0daff..fffaf11c94deb 100644
--- a/tests/entrypoints/test_openai_server.py
+++ b/tests/entrypoints/test_openai_server.py
@@ -1,5 +1,3 @@
-# This file has been modified by Neural Magic
-
 import os
 import subprocess
 import time
diff --git a/tests/kernels/test_attention.py b/tests/kernels/test_attention.py
index c5e00b0f2ff0b..f9a34fb7684b6 100644
--- a/tests/kernels/test_attention.py
+++ b/tests/kernels/test_attention.py
@@ -1,4 +1,3 @@
-# UPSTREAM SYNC: this file may need attention
 import random
 from typing import List, Optional, Tuple
 
diff --git a/tests/kernels/test_cache.py b/tests/kernels/test_cache.py
index c5b39e51d55c2..7208927370785 100644
--- a/tests/kernels/test_cache.py
+++ b/tests/kernels/test_cache.py
@@ -1,4 +1,3 @@
-# UPSTREAM SYNC: this file may need attention
 import random
 
 import pytest
diff --git a/tests/kernels/test_prefix_prefill.py b/tests/kernels/test_prefix_prefill.py
index 051a79cb0ef44..511d6d4337814 100644
--- a/tests/kernels/test_prefix_prefill.py
+++ b/tests/kernels/test_prefix_prefill.py
@@ -1,4 +1,3 @@
-# UPSTREAM SYNC: this file may need attention
 import random
 import pytest
 import time
diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py
index 7b3a73bd98eff..4e535148ec68b 100644
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -1,4 +1,3 @@
-# UPSTREAM SYNC: this file may need attention
 import pytest
 import random
 from copy import deepcopy
diff --git a/tests/lora/test_mixtral.py b/tests/lora/test_mixtral.py
index 47172853a1c66..ba47581cb4422 100644
--- a/tests/lora/test_mixtral.py
+++ b/tests/lora/test_mixtral.py
@@ -29,6 +29,7 @@ def do_sample(llm, lora_path: str, lora_id: int):
     return generated_texts
 
 
+# UPSTREAM SYNC: need to skip for nm-automation
 @pytest.mark.skip(reason="Not enough GPU memory in automation")
 @pytest.mark.parametrize("tp_size", [4])
 def test_mixtral_lora(mixtral_lora_files, tp_size):

From a1f583d1cff7e4c824593237a1cda85209f61b34 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 15:59:20 +0000
Subject: [PATCH 189/196] fixed BlockSpaceManager

---
 vllm/core/block_manager.py | 53 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py
index 857ae58d93124..ad9b557fd9a83 100644
--- a/vllm/core/block_manager.py
+++ b/vllm/core/block_manager.py
@@ -534,3 +534,56 @@ def get_num_free_gpu_blocks(self) -> int:
 
     def get_num_free_cpu_blocks(self) -> int:
         return self.cpu_allocator.get_num_free_blocks()
+
+    def access_all_blocks_in_seq(
+        self,
+        seq: Sequence,
+        access_time: float,
+    ) -> None:
+        if self.enable_caching:
+            # Update the last accessed time of all the blocks accessed
+            # in this step.
+            block_table = self.block_tables[seq.seq_id]
+            for block in block_table:
+                block.last_accessed = access_time
+
+    def compute_full_blocks_in_seq(self, seq: Sequence):
+        if seq.seq_id not in self.block_tables:
+            return
+        max_full_block = seq.get_len() // self.block_size - 1
+        block_table = self.block_tables[seq.seq_id]
+        if max_full_block == -1:
+            return
+        for i in reversed(range(max_full_block)):
+            if block_table[i].computed:
+                break
+            block_table[i].computed = True
+
+    def get_all_computed_blocks(self, seq: Sequence) -> List[int]:
+        if seq.seq_id not in self.block_tables:
+            return []
+        block_table = self.block_tables[seq.seq_id]
+        # NOTE We exclude the last block to avoid the case where the entire
+        # prompt is cached. This would cause erroneous behavior in model
+        # runner.
+        return [
+            b.block_number
+            for b in takewhile(lambda b: b.computed, block_table[:-1])
+        ]
+
+    def get_common_computed_block_ids(self,
+                                      seq_group: SequenceGroup) -> List[int]:
+        # Can return non-empty result only with prefix caching enabled.
+        if not self.enable_caching:
+            return []
+
+        ids_list = [
+            self.get_all_computed_blocks(seq)
+            for seq in iter(seq_group.seqs_dict.values())
+        ]
+        return commonprefix([ids for ids in ids_list if ids != []])
+
+    def mark_blocks_as_computed(self, seq_group: SequenceGroup):
+        if self.enable_caching:
+            for seq in seq_group.seqs_dict.values():
+                self.compute_full_blocks_in_seq(seq)

From 4265468fc8db288c47b88acc0e7c937834dd1b5c Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 16:29:22 +0000
Subject: [PATCH 190/196] yapf / ruff

---
 benchmarks/benchmark_prefix_caching.py |  2 +-
 setup.py                               |  1 +
 tests/conftest.py                      |  1 +
 vllm/engine/arg_utils.py               | 21 ++++++++++++++++-----
 4 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py
index dc18f181bd472..546c61e847839 100644
--- a/benchmarks/benchmark_prefix_caching.py
+++ b/benchmarks/benchmark_prefix_caching.py
@@ -4,7 +4,7 @@
 from vllm import LLM
 from vllm import SamplingParams
 
-PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n" # noqa: E501
+PROMPT = "You are a helpful assistant in recognizes the content of tables in markdown format. Here is a table as fellows. You need to answer my question about the table.\n# Table\n|Opening|Opening|Sl. No.|Film|Cast|Director|Music Director|Notes|\n|----|----|----|----|----|----|----|----|\n|J A N|9|1|Agni Pushpam|Jayabharathi, Kamalahasan|Jeassy|M. K. Arjunan||\n|J A N|16|2|Priyamvada|Mohan Sharma, Lakshmi, KPAC Lalitha|K. S. Sethumadhavan|V. Dakshinamoorthy||\n|J A N|23|3|Yakshagaanam|Madhu, Sheela|Sheela|M. S. Viswanathan||\n|J A N|30|4|Paalkkadal|Sheela, Sharada|T. K. Prasad|A. T. Ummer||\n|F E B|5|5|Amma|Madhu, Srividya|M. Krishnan Nair|M. K. Arjunan||\n|F E B|13|6|Appooppan|Thikkurissi Sukumaran Nair, Kamal Haasan|P. Bhaskaran|M. S. Baburaj||\n|F E B|20|7|Srishti|Chowalloor Krishnankutty, Ravi Alummoodu|K. T. Muhammad|M. S. Baburaj||\n|F E B|20|8|Vanadevatha|Prem Nazir, Madhubala|Yusufali Kechery|G. Devarajan||\n|F E B|27|9|Samasya|Madhu, Kamalahaasan|K. Thankappan|Shyam||\n|F E B|27|10|Yudhabhoomi|K. P. Ummer, Vidhubala|Crossbelt Mani|R. K. Shekhar||\n|M A R|5|11|Seemantha Puthran|Prem Nazir, Jayabharathi|A. B. Raj|M. K. Arjunan||\n|M A R|12|12|Swapnadanam|Rani Chandra, Dr. Mohandas|K. G. George|Bhaskar Chandavarkar||\n|M A R|19|13|Thulavarsham|Prem Nazir, sreedevi, Sudheer|N. Sankaran Nair|V. Dakshinamoorthy||\n|M A R|20|14|Aruthu|Kaviyoor Ponnamma, Kamalahasan|Ravi|G. Devarajan||\n|M A R|26|15|Swimming Pool|Kamal Haasan, M. G. Soman|J. Sasikumar|M. K. Arjunan||\n\n# Question\nWhat' s the content in the (1,1) cells\n"  # noqa: E501
 
 
 def test_prefix(llm=None, sampling_params=None, prompts=None):
diff --git a/setup.py b/setup.py
index e01cf89af6997..5e0e5f8f6a82b 100644
--- a/setup.py
+++ b/setup.py
@@ -343,6 +343,7 @@ def get_extra_requirements() -> dict:
         "sparsity": _sparsity_deps,
     }
 
+
 package_data = {
     "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json"]
 }
diff --git a/tests/conftest.py b/tests/conftest.py
index 8b4ebb4a35b19..83a1221b95191 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -330,6 +330,7 @@ def generate_beam_search(
 def vllm_runner():
     return VllmRunner
 
+
 # UPSTREAM SYNC: needed for nm-automation
 class VllmRunnerNm(VllmRunner):
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 088547c746fb0..edacbadaa1f9a 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -343,12 +343,23 @@ def create_engine_configs(
                DeviceConfig, Optional[LoRAConfig]]:
         device_config = DeviceConfig(self.device)
         model_config = ModelConfig(
-            self.model, self.tokenizer, self.tokenizer_mode,
-            self.trust_remote_code, self.download_dir, self.load_format,
-            self.dtype, self.seed, self.revision, self.code_revision,
-            self.tokenizer_revision, self.max_model_len, self.quantization,
+            self.model,
+            self.tokenizer,
+            self.tokenizer_mode,
+            self.trust_remote_code,
+            self.download_dir,
+            self.load_format,
+            self.dtype,
+            self.seed,
+            self.revision,
+            self.code_revision,
+            self.tokenizer_revision,
+            self.max_model_len,
+            self.quantization,
             # UPSTREAM SYNC: keep sparsity argument
-            self.sparsity, self.enforce_eager, self.max_context_len_to_capture,
+            self.sparsity,
+            self.enforce_eager,
+            self.max_context_len_to_capture,
             self.max_logprobs)
         cache_config = CacheConfig(self.block_size,
                                    self.gpu_memory_utilization,

From d696d74ecc7fcb4873f9944d9af5778fac0afb9d Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 16:30:31 +0000
Subject: [PATCH 191/196] ruff 2

---
 tests/samplers/test_sampler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/samplers/test_sampler.py b/tests/samplers/test_sampler.py
index 90de805ccb2bc..26cd7d70ed336 100644
--- a/tests/samplers/test_sampler.py
+++ b/tests/samplers/test_sampler.py
@@ -1,4 +1,4 @@
-# UPSTREAM SYNC: devices need to be passed around to pass multi-gpu automation tests
+# UPSTREAM SYNC: devices need to be passed around to pass multi-gpu automation
 import random
 from typing import Tuple, List
 from unittest.mock import patch

From a102e130ab2fa921cb54eb0820d54dc66a3a77d2 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 16:35:27 +0000
Subject: [PATCH 192/196] format

---
 vllm/model_executor/weight_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py
index 1c79ee5e08268..2ae517d1a156d 100644
--- a/vllm/model_executor/weight_utils.py
+++ b/vllm/model_executor/weight_utils.py
@@ -295,7 +295,7 @@ def convert_pyslice_to_tensor(x: Any) -> torch.Tensor:
     return x
 
 
-# UPSTEAM SYNC: Parameter needed for LazyCompressedParameter
+# UPSTREAM SYNC: Parameter needed for LazyCompressedParameter
 def default_weight_loader(param: torch.nn.Parameter,
                           loaded_weight: torch.Tensor) -> None:
     """Default weight loader."""

From 476798eacab14dbf880721dea5c9d1ecd4120480 Mon Sep 17 00:00:00 2001
From: Andrew Feldman <afeldman@neuralmagic.com>
Date: Sun, 24 Mar 2024 17:46:16 +0000
Subject: [PATCH 193/196] fixed basic correctness failure by running with
 --forked

---
 .github/scripts/run-tests | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/scripts/run-tests b/.github/scripts/run-tests
index 83073663c2a89..cc683422dc302 100755
--- a/.github/scripts/run-tests
+++ b/.github/scripts/run-tests
@@ -100,6 +100,8 @@ do
         coverage run --data-file=.coverage-$(basename ${TEST}) -m pytest --forked --junitxml=${RESULT_XML} ${TEST} || LOCAL_SUCCESS=$?
     elif [[ "${TEST}" == *"models_logprobs"* ]]; then
         coverage run --data-file=.coverage-$(basename ${TEST}) -m pytest --forked --junitxml=${RESULT_XML} ${TEST} || LOCAL_SUCCESS=$?
+    elif [[ "${TEST}" == *"basic_correctness"* ]]; then
+        coverage run --data-file=.coverage-$(basename ${TEST}) -m pytest --forked --junitxml=${RESULT_XML} ${TEST} || LOCAL_SUCCESS=$?
     else
         coverage run --data-file=.coverage-$(basename ${TEST}) -m pytest --junitxml=${RESULT_XML} ${TEST} || LOCAL_SUCCESS=$?
     fi

From e973135ad8d893979daba5003001d3b4fd6b8f03 Mon Sep 17 00:00:00 2001
From: Robert Shaw <rshaw@neuralmagic.com>
Date: Mon, 25 Mar 2024 01:35:02 +0000
Subject: [PATCH 194/196] fixed tests for nightly

---
 tests/lora/test_gemma.py             | 3 +++
 tests/models/test_models_logprobs.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/lora/test_gemma.py b/tests/lora/test_gemma.py
index 0082c6e74e888..c790c76507de2 100644
--- a/tests/lora/test_gemma.py
+++ b/tests/lora/test_gemma.py
@@ -1,3 +1,4 @@
+import pytest
 import vllm
 from vllm.lora.request import LoRARequest
 
@@ -26,6 +27,8 @@ def do_sample(llm, lora_path: str, lora_id: int) -> str:
     return generated_texts
 
 
+# UPSTREAM SYNC: skip this test in nm-automation
+@pytest.mark.skip("Flaky test in NM automation")
 def test_gemma_lora(gemma_lora_files):
     llm = vllm.LLM(MODEL_PATH,
                    max_model_len=1024,
diff --git a/tests/models/test_models_logprobs.py b/tests/models/test_models_logprobs.py
index c2ab3c36e69d5..9818a9db62f5a 100644
--- a/tests/models/test_models_logprobs.py
+++ b/tests/models/test_models_logprobs.py
@@ -21,7 +21,7 @@
     # "mosaicml/mpt-7b",                # vLLM upstream bug in mpt right now  # noqa
     "microsoft/phi-2",
     "stabilityai/stablelm-3b-4e1t",
-    "allenai/OLMo-1B",
+    # "allenai/OLMo-1B",                # dependencies are not installed right now  # noqa
     "bigcode/starcoder2-3b",
     "Qwen/Qwen1.5-0.5B",
 ]

From 4ce1f873f5e8276cf28d11a572436315a16a9d20 Mon Sep 17 00:00:00 2001
From: Varun Sundar Rabindranath <varun@neuralmagic.com>
Date: Tue, 26 Mar 2024 02:46:38 +0000
Subject: [PATCH 195/196] add nvcc_threads to gha

---
 .github/actions/nm-build-vllm/action.yml  | 3 ---
 .github/actions/nm-set-env/action.yml     | 5 +++++
 .github/workflows/build-test.yml          | 9 +++++++++
 .github/workflows/build-whl.yml           | 9 +++++++++
 .github/workflows/gen-whl.yml             | 1 +
 .github/workflows/nightly.yml             | 5 +++++
 .github/workflows/nm-benchmark.yml        | 9 +++++++++
 .github/workflows/nm-lm-eval-accuracy.yml | 9 +++++++++
 .github/workflows/remote-push.yml         | 2 ++
 9 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/.github/actions/nm-build-vllm/action.yml b/.github/actions/nm-build-vllm/action.yml
index f068c7a62378d..0299a401b5c09 100644
--- a/.github/actions/nm-build-vllm/action.yml
+++ b/.github/actions/nm-build-vllm/action.yml
@@ -1,9 +1,6 @@
 name: build nm-vllm
 description: 'build nm-vllm'
 inputs:
-  Gi_per_thread:
-    description: 'requested GiB to reserve per thread'
-    required: true
   python:
     description: 'python version, e.g. 3.10.12'
     required: true
diff --git a/.github/actions/nm-set-env/action.yml b/.github/actions/nm-set-env/action.yml
index 75043e2c34306..caf9ad501ce8a 100644
--- a/.github/actions/nm-set-env/action.yml
+++ b/.github/actions/nm-set-env/action.yml
@@ -7,6 +7,10 @@ inputs:
   Gi_per_thread:
     description: 'requested GiB to reserve per thread'
     required: true
+  nvcc_threads:
+    description: "number of threads nvcc build threads"
+    type: string
+    required: true
 runs:
   using: composite
   steps:
@@ -16,6 +20,7 @@ runs:
       echo "HF_HOME=/EFS/hf_home" >> $GITHUB_ENV
       NUM_THREADS=$(./.github/scripts/determine-threading -G ${{ inputs.Gi_per_thread }})
       echo "MAX_JOBS=${NUM_THREADS}" >> $GITHUB_ENV
+      echo "NVCC_THREADS=${{ inputs.nvcc_threads }}" >> $GITHUB_ENV
       echo "VLLM_INSTALL_PUNICA_KERNELS=1" >> $GITHUB_ENV
       echo "NCCL_IGNORE_DISABLED_P2P=1" >> $GITHUB_ENV
       echo "PYENV_ROOT=/usr/local/apps/pyenv" >> $GITHUB_ENV
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 0517bfeee42e5..156a85c4225d2 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -19,6 +19,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -47,6 +51,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -79,6 +87,7 @@ jobs:
               with:
                 hf_token: ${{ secrets.NM_HF_TOKEN }}
                 Gi_per_thread: ${{ inputs.Gi_per_thread }}
+                nvcc_threads: ${{ inputs.nvcc_threads }}
 
             - name: set python
               id: set_python
diff --git a/.github/workflows/build-whl.yml b/.github/workflows/build-whl.yml
index dfbffac6e177e..cf3b5a40b2744 100644
--- a/.github/workflows/build-whl.yml
+++ b/.github/workflows/build-whl.yml
@@ -19,6 +19,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -43,6 +47,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -76,6 +84,7 @@ jobs:
               with:
                 hf_token: ${{ secrets.NM_HF_TOKEN }}
                 Gi_per_thread: ${{ inputs.Gi_per_thread }}
+                nvcc_threads: ${{ inputs.nvcc_threads }}
 
             - name: set python
               id: set_python
diff --git a/.github/workflows/gen-whl.yml b/.github/workflows/gen-whl.yml
index fbe3e50883cb0..7bb8f6ba0e3f8 100644
--- a/.github/workflows/gen-whl.yml
+++ b/.github/workflows/gen-whl.yml
@@ -20,5 +20,6 @@ jobs:
         timeout: 30
         gitref: ${{ inputs.gitref }}
         Gi_per_thread: 4
+        nvcc_threads: 8
         python: ${{ matrix.python }}
     secrets: inherit
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index b6b5d0be11af2..e4f94b812a282 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -24,6 +24,7 @@ jobs:
             timeout: 240
             gitref: ${{ github.ref }}
             Gi_per_thread: 4
+            nvcc_threads: 8
             python: 3.10.12
             test_skip_list:
         secrets: inherit
@@ -35,6 +36,7 @@ jobs:
             timeout: 300
             gitref: ${{ github.ref }}
             Gi_per_thread: 12
+            nvcc_threads: 1
             python: 3.11.4
             test_skip_list:
         secrets: inherit
@@ -48,6 +50,7 @@ jobs:
     #         timeout: 480
     #         gitref: '${{ github.ref }}'
     #         Gi_per_thread: 4
+    #         nvcc_threads: 8
     #         python: "3.10.12"
     #         # Always push if it is a scheduled job
     #         push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
@@ -62,6 +65,7 @@ jobs:
             timeout: 720
             gitref: '${{ github.ref }}'
             Gi_per_thread: 12
+            nvcc_threads: 1
             python: "3.10.12"
             # Always push if it is a scheduled job
             push_benchmark_results_to_gh_pages: "${{ github.event_name == 'schedule' || inputs.push_benchmark_results_to_gh_pages }}"
@@ -75,5 +79,6 @@ jobs:
             timeout: 60
             gitref: '${{ github.ref }}'
             Gi_per_thread: 12
+            nvcc_threads: 1
             python: "3.10.12"
         secrets: inherit
diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml
index 1df48156f3ace..d82ba78c5234b 100644
--- a/.github/workflows/nm-benchmark.yml
+++ b/.github/workflows/nm-benchmark.yml
@@ -23,6 +23,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -55,6 +59,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -89,6 +97,7 @@ jobs:
         with:
           hf_token: ${{ secrets.NM_HF_TOKEN }}
           Gi_per_thread: ${{ inputs.Gi_per_thread }}
+          nvcc_threads: ${{ inputs.nvcc_threads }}
 
       - name: set python
         id: set_python
diff --git a/.github/workflows/nm-lm-eval-accuracy.yml b/.github/workflows/nm-lm-eval-accuracy.yml
index 48ac2b8217289..af95cdd603bed 100644
--- a/.github/workflows/nm-lm-eval-accuracy.yml
+++ b/.github/workflows/nm-lm-eval-accuracy.yml
@@ -19,6 +19,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -43,6 +47,10 @@ on:
         description: 'requested GiB to reserve per thread'
         type: string
         required: true
+      nvcc_threads:
+        description: "number of threads nvcc build threads"
+        type: string
+        required: true
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -68,6 +76,7 @@ jobs:
         with:
           hf_token: ${{ secrets.NM_HF_TOKEN }}
           Gi_per_thread: ${{ inputs.Gi_per_thread }}
+          nvcc_threads: ${{ inputs.nvcc_threads }}
 
       - name: set python
         id: set_python
diff --git a/.github/workflows/remote-push.yml b/.github/workflows/remote-push.yml
index f8f27758c1d51..8f33ee864beab 100644
--- a/.github/workflows/remote-push.yml
+++ b/.github/workflows/remote-push.yml
@@ -24,6 +24,7 @@ jobs:
             timeout: 240
             gitref: '${{ github.ref }}'
             Gi_per_thread: 4
+            nvcc_threads: 8
             python: ${{ matrix.python }}
             test_skip_list: neuralmagic/tests/skip-for-remote-push.txt
         secrets: inherit
@@ -37,6 +38,7 @@ jobs:
     #        timeout: 60
     #        gitref: '${{ github.ref }}'
     #        Gi_per_thread: 12
+    #        nvcc_threads: 1
     #        python: "3.10.12"
     #        push_benchmark_results_to_gh_pages: "false"
     #    secrets: inherit

From 8ddab6adc111447e1173c4cc59a2451274b15f9a Mon Sep 17 00:00:00 2001
From: Varun Sundar Rabindranath <varun@neuralmagic.com>
Date: Tue, 26 Mar 2024 04:55:02 +0000
Subject: [PATCH 196/196] Remove Gi_per_thread arg to nm-build-vllm action

---
 .github/workflows/build-test.yml          | 1 -
 .github/workflows/build-whl.yml           | 1 -
 .github/workflows/nm-benchmark.yml        | 1 -
 .github/workflows/nm-lm-eval-accuracy.yml | 1 -
 4 files changed, 4 deletions(-)

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 156a85c4225d2..2deaffb92db43 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -120,7 +120,6 @@ jobs:
               id: build
               uses: ./.github/actions/nm-build-vllm/
               with:
-                Gi_per_thread: ${{ inputs.Gi_per_thread }}
                 python: ${{ inputs.python }}
                 venv: TEST
                 pypi: ${{ secrets.NM_PRIVATE_PYPI_LOCATION }}
diff --git a/.github/workflows/build-whl.yml b/.github/workflows/build-whl.yml
index cf3b5a40b2744..d6c0938beb956 100644
--- a/.github/workflows/build-whl.yml
+++ b/.github/workflows/build-whl.yml
@@ -110,7 +110,6 @@ jobs:
               id: build
               uses: ./.github/actions/nm-build-vllm/
               with:
-                Gi_per_thread: ${{ inputs.Gi_per_thread }}
                 python: ${{ inputs.python }}
                 venv: ${{ env.VENV_BUILD_BASE }}
                 pypi: ${{ secrets.NM_PRIVATE_PYPI_LOCATION }}
diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml
index d82ba78c5234b..e73a34eea94cc 100644
--- a/.github/workflows/nm-benchmark.yml
+++ b/.github/workflows/nm-benchmark.yml
@@ -116,7 +116,6 @@ jobs:
         id: build
         uses: ./.github/actions/nm-build-vllm/
         with:
-          Gi_per_thread: ${{ inputs.Gi_per_thread }}
           python: ${{ inputs.python }}
           venv: TEST
           pypi: ${{ secrets.NM_PRIVATE_PYPI_LOCATION }}
diff --git a/.github/workflows/nm-lm-eval-accuracy.yml b/.github/workflows/nm-lm-eval-accuracy.yml
index af95cdd603bed..88d8d436e8d14 100644
--- a/.github/workflows/nm-lm-eval-accuracy.yml
+++ b/.github/workflows/nm-lm-eval-accuracy.yml
@@ -95,7 +95,6 @@ jobs:
         id: build
         uses: ./.github/actions/nm-build-vllm/
         with:
-          Gi_per_thread: ${{ inputs.Gi_per_thread }}
           python: ${{ inputs.python }}
           venv: TEST
           pypi: ${{ secrets.NM_PRIVATE_PYPI_LOCATION }}