Name: Support New Model
Author: InternLM

搜索技能.../

Support New Model | Skills Pool

File	Purpose
`lmdeploy/pytorch/models/<model>.py`	Attention, MLP, DecoderLayer, Model, ForCausalLM
`lmdeploy/pytorch/models/module_map.py`	HF class name → LMDeploy class path mapping
`lmdeploy/pytorch/configurations/<model>.py`	Config builder — only needed for non-standard/nested HF configs
`lmdeploy/vl/model/<model>.py`	VLM: image/video preprocessing (VLM only)
`lmdeploy/vl/model/base.py`	`VisionModel` base class + `VISION_MODELS` registry
`lmdeploy/archs.py`	VLM: arch name → task mapping (VLM only)
`lmdeploy/lite/apis/calibrate.py`	Quantization: layer/norm/head mappings (optional)
`lmdeploy/lite/quantization/awq.py`	Quantization: AWQ scale mappings (optional)

import torch
import torch.nn as nn
from lmdeploy.pytorch.model_inputs import StepContext, StepContextManager
from lmdeploy.pytorch.nn import (ApplyRotaryEmb, Attention, RMSNorm, SiluAndMul,
                                  build_rotary_embedding_from_config)
from lmdeploy.pytorch.nn.linear import (build_down_linear, build_gateup_linear,
                                         build_o_proj, build_qkv_proj)
from lmdeploy.pytorch.weight_loader.model_weight_loader import load_weight
from .patch import add_prefix
from .utils.cudagraph import CudaGraphMixin
from .utils.model import DeployModelMixinV1, build_embedding

class MyModelAttention(nn.Module):
    def __init__(self, config, dtype=None, device=None, prefix=''):
        super().__init__()
        self.qkv_proj = build_qkv_proj(
            config.hidden_size,
            num_q_heads=config.num_attention_heads,
            num_kv_heads=config.num_key_value_heads,
            head_size=config.hidden_size // config.num_attention_heads,
            bias=False,
            dtype=dtype, device=device, prefix=add_prefix('qkv_proj', prefix))
        self.apply_rotary_pos_emb = ApplyRotaryEmb()
        self.attn_fwd = Attention(
            config.num_attention_heads,
            config.hidden_size // config.num_attention_heads,
            num_kv_heads=config.num_key_value_heads)
        self.o_proj = build_o_proj(
            config.num_attention_heads,
            config.hidden_size // config.num_attention_heads,
            config.hidden_size,
            bias=False,
            dtype=dtype, device=device, prefix=add_prefix('o_proj', prefix))

    def forward(self, hidden_states, rotary_pos_emb, past_key_value, attn_metadata):
        qkv_states = self.qkv_proj(hidden_states)
        # split q, k, v; apply rotary; call attn_fwd; project output
        ...

class MyModelMLP(nn.Module):
    def __init__(self, config, dtype=None, device=None, prefix=''):
        super().__init__()
        self.gate_up_proj = build_gateup_linear(
            config.hidden_size, config.intermediate_size,
            bias=False, dtype=dtype, device=device,
            prefix=add_prefix('gate_up_proj', prefix))
        self.down_proj = build_down_linear(
            config.intermediate_size, config.hidden_size,
            bias=False, dtype=dtype, device=device,
            prefix=add_prefix('down_proj', prefix))
        self.act_fn = SiluAndMul()

    def forward(self, x):
        return self.down_proj(self.act_fn(self.gate_up_proj(x)))

class MyModelForCausalLM(nn.Module, DeployModelMixinV1, CudaGraphMixin):
    # Maps packed param name → list of original HF param suffixes
    packed_modules_mapping = {
        'qkv_proj': ['q_proj', 'k_proj', 'v_proj'],
        'gate_up_proj': ['gate_proj', 'up_proj'],
    }

    def __init__(self, config, ctx_mgr=None, prefix='', **kwargs):
        super().__init__()
        self.model = MyModelModel(config, ...)
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
        self.ctx_mgr = ctx_mgr

    def get_input_embeddings(self):
        return self.model.embed_tokens

    def forward(self, input_ids, inputs_embeds, past_key_values, attn_metadata, **kwargs):
        hidden_states = self.model(input_ids, inputs_embeds, past_key_values, attn_metadata)
        return hidden_states

    def get_logits(self, hidden_states):
        return self.lm_head(hidden_states)

    # prepare_inputs_for_generation and load_weights: copy from qwen3.py,
    # update stacked_params_mapping to match this model's HF weight names.

MODULE_MAP.update({
    'MyModelForCausalLM': f'{LMDEPLOY_PYTORCH_MODEL_PATH}.my_model.MyModelForCausalLM',
})

from .builder import AutoModelConfigBuilder, DefaultModelConfigBuilder

class MyModelConfigBuilder(AutoModelConfigBuilder):
    @classmethod
    def condition(cls, hf_config):
        # Must match model_type from config.json exactly
        return hf_config.model_type == 'my_model'

    @classmethod
    def build(cls, hf_config, model_path=None, **kwargs):
        # Extract the text config if nested; patch fields if needed
        cfg = DefaultModelConfigBuilder.build(hf_config, model_path, **kwargs)
        cfg.hf_config = hf_config  # keep full config for VLM layers
        return cfg

from lmdeploy.vl.model.base import VISION_MODELS, VisionModel

@VISION_MODELS.register_module()
class MyModelVLModel(VisionModel):
    # Must match hf_config.architectures exactly (can be a list for variants)
    _arch = ['MyModelForConditionalGeneration']

    def build_preprocessor(self):
        """Load the vision processor from the model checkpoint."""
        from transformers import AutoProcessor
        self.processor = AutoProcessor.from_pretrained(self.model_path)
        # Set image_token_id to the token ID of the image placeholder
        # (used by the engine to know where to inject image features)
        tokenizer = self.processor.tokenizer
        self.image_token = '<image>'  # model-specific placeholder token
        self.image_token_id = tokenizer.convert_tokens_to_ids(self.image_token)

    # preprocess and to_pytorch: copy from vl/model/qwen3.py and adapt
    # image token handling (image_token, image_token_id, image_tokens count).

from .my_model import MyModelVLModel  # noqa F401

# lmdeploy/archs.py — inside check_vl_llm()
supported_archs = set([
    ...
    'MyModelForConditionalGeneration',  # add this line
])

python -m lmdeploy.pytorch.chat <model_path> --backend pytorch

from lmdeploy import pipeline
pipe = pipeline('<model_path>')
result = pipe(('Describe this image.', 'path/to/image.jpg'))
print(result.text)

pytest tests/test_lmdeploy/test_vl/     # VLM tests
pytest tests/test_lmdeploy/             # all unit tests

LMDEPLOY_LOG_LEVEL=DEBUG python -m lmdeploy.pytorch.chat <model_path> --backend pytorch 2>&1 | grep -E "load|weight|miss"

Support New Model

Tutorial: Adding a New Model to LMDeploy (PyTorch Backend)

Before Writing Any Code

Support New Model

Tutorial: Adding a New Model to LMDeploy (PyTorch Backend)

Before Writing Any Code

Key Files Quick Reference

Step-by-Step: LLM (PyTorch Backend)

Step 1 — Create the PyTorch model file

Step 2 — Register in `module_map.py`

Step 3 — Add config builder (if needed)

Step 4 — Add quantization mappings (optional)

Step-by-Step: VLM (additional steps)

Step 5 — Create the VL preprocessor

Step 6 — Register VLM arch in `archs.py`

Checklist

Common Pitfalls

Verification

Pytorch Patterns

Regex Vs Llm Structured Text

Effect

Flags

WPF to WinUI 3 Migration Skill

At Dispatch V2

Support New Model

Tutorial: Adding a New Model to LMDeploy (PyTorch Backend)

Before Writing Any Code

Support New Model

Tutorial: Adding a New Model to LMDeploy (PyTorch Backend)

Before Writing Any Code

Key Files Quick Reference

Step-by-Step: LLM (PyTorch Backend)

Step 1 — Create the PyTorch model file

Step 2 — Register in module_map.py

Step 3 — Add config builder (if needed)

Step 4 — Add quantization mappings (optional)

Step-by-Step: VLM (additional steps)

Step 5 — Create the VL preprocessor

Step 6 — Register VLM arch in archs.py

Checklist

Common Pitfalls

Verification

Pytorch Patterns

Regex Vs Llm Structured Text

Effect

Flags

WPF to WinUI 3 Migration Skill

At Dispatch V2

Step 2 — Register in `module_map.py`

Step 6 — Register VLM arch in `archs.py`