Name: Tổng quan
Author: Kh4ngTh41

Tổng quan | Skills Pool

import gymnasium as gym
import numpy as np
from gymnasium import spaces

class SFLNodeEnv(gym.Env):
    def __init__(self, num_nodes=5, num_layers=10):
        super().__init__()
        self.num_nodes = num_nodes
        self.num_layers = num_layers
        
        # State: [CPU, RAM, Energy, Bandwidth, Loss, Loss_Std, Neighbor_Availability]
        self.observation_space = spaces.Box(
            low=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
            high=np.array([100.0, 100.0, 100.0, 1000.0, 10.0, 5.0, 1.0]),
            dtype=np.float32
        )

        # Action: [cut_layer, batch_size, H, target_node]
        self.action_space = spaces.MultiDiscrete([
            num_layers,      # cut_layer: vị trí cắt mô hình (0 to num_layers-1)
            8,               # batch_size: index trong [4, 8, 16, 32, 64, 128, 256, 512]
            5,               # H: số lượng local epochs (1, 2, 3, 5, 10)
            num_nodes        # target_node: node đích cho split learning
        ])
        
        self.batch_sizes = [4, 8, 16, 32, 64, 128, 256, 512]
        self.local_epochs = [1, 2, 3, 5, 10]

    def step(self, action):
        cut_layer = action[0]
        batch_size = self.batch_sizes[action[1]]
        H = self.local_epochs[action[2]]
        target_node = action[3]
        
        # 1. Tính toán chi phí tài nguyên
        resource_cost = self._compute_resource_cost(cut_layer, batch_size, H, target_node)
        
        # 2. Mô phỏng training step
        performance_gain = self._simulate_training(cut_layer, batch_size, H, target_node)
        
        # 3. Tính Shapley value contribution
        shapley_weight = self._compute_shapley_contribution(target_node, performance_gain)
        
        # 4. Reward theo Eq. 7: -cost + shapley_weighted_gain
        reward = -resource_cost + shapley_weight * performance_gain
        
        # 5. Cập nhật state
        self._update_state(cut_layer, batch_size, H, target_node)
        
        # 6. Kiểm tra điều kiện kết thúc
        terminated = self._check_termination()
        truncated = self.current_step >= self.max_steps
        
        info = {
            'resource_cost': resource_cost,
            'performance_gain': performance_gain,
            'shapley_weight': shapley_weight,
            'cut_layer': cut_layer,
            'target_node': target_node
        }
        
        return self.state, reward, terminated, truncated, info

    def _compute_resource_cost(self, cut_layer, batch_size, H, target_node):
        # Computation cost: phụ thuộc vào cut_layer và batch_size
        comp_cost = (cut_layer / self.num_layers) * (batch_size / 512.0) * 0.3
        
        # Communication cost: kích thước activation tại cut point
        # Giả sử activation size giảm theo depth
        activation_size = (1.0 - cut_layer / self.num_layers) * batch_size * 1024  # KB
        comm_cost = activation_size / self.state[3]  # chia cho bandwidth
        
        # Energy cost: tỷ lệ với computation và communication
        energy_cost = (comp_cost + comm_cost) * 0.1 / (self.state[2] / 100.0)
        
        # Penalty nếu target_node quá tải (neighbor availability thấp)
        availability_penalty = (1.0 - self.state[6]) * 0.2
        
        return comp_cost + comm_cost + energy_cost + availability_penalty

    def _compute_shapley_contribution(self, target_node, performance_gain):
        # Shapley value đo marginal contribution của target_node
        # Trong thực tế cần Monte Carlo approximation, đây là simplified version
        
        # Lấy lịch sử performance của các coalition có/không có target_node
        coalition_with = self.coalition_history.get((target_node, True), [])
        coalition_without = self.coalition_history.get((target_node, False), [])
        
        if len(coalition_with) > 0 and len(coalition_without) > 0:
            # Marginal contribution = avg performance with node - avg without node
            marginal = np.mean(coalition_with) - np.mean(coalition_without)
            # Normalize to [0, 1] range
            shapley_weight = np.clip(marginal / (np.std(coalition_with) + 1e-6), 0, 1)
        else:
            # Cold start: uniform weight
            shapley_weight = 1.0 / self.num_nodes
        
        return shapley_weight

    def _simulate_training(self, cut_layer, batch_size, H, target_node):
        # Mô phỏng đơn giản: performance gain phụ thuộc vào:
        # 1. Data heterogeneity giữa node hiện tại và target
        # 2. Tính ổn định của loss (Loss_Std)
        # 3. Số local epochs H
        
        # Giả sử có data distribution cho mỗi node (IID vs Non-IID)
        data_similarity = self._compute_data_similarity(target_node)
        
        # Performance gain cao hơn nếu data tương đồng và loss ổn định
        base_gain = data_similarity * (1.0 / (self.state[5] + 1.0))  # Loss_Std ở state[5]
        
        # Điều chỉnh theo H: nhiều local epochs = học tốt hơn nhưng risk overfitting
        epoch_factor = np.log(H + 1) / np.log(11)  # normalize to [0, 1]
        
        performance_gain = base_gain * epoch_factor * np.random.uniform(0.8, 1.2)
        
        # Cập nhật coalition history cho Shapley computation
        self._update_coalition_history(target_node, performance_gain)
        
        return performance_gain

    def _update_state(self, cut_layer, batch_size, H, target_node):
        # CPU và RAM giảm theo computation cost
        comp_load = (cut_layer / self.num_layers) * (batch_size / 512.0)
        self.state[0] = max(0, self.state[0] - comp_load * 10)  # CPU
        self.state[1] = max(0, self.state[1] - comp_load * 15)  # RAM
        
        # Energy tiêu hao
        energy_consumed = comp_load * 5 + (batch_size / 512.0) * 2
        self.state[2] = max(0, self.state[2] - energy_consumed)
        
        # Bandwidth fluctuation (mô phỏng network dynamics)
        self.state[3] = np.clip(self.state[3] + np.random.normal(0, 50), 10, 1000)
        
        # Loss cải thiện dần (simplified convergence)
        self.state[4] = max(0.1, self.state[4] * 0.95)
        
        # Loss_Std giảm khi training ổn định
        self.state[5] = max(0.1, self.state[5] * 0.98)
        
        # Neighbor availability thay đổi theo network churn
        self.state[6] = np.clip(self.state[6] + np.random.normal(0, 0.1), 0, 1)
        
        self.current_step += 1

    def _check_termination(self):
        # Kết thúc nếu:
        # 1. Loss đủ thấp (convergence)
        if self.state[4] < 0.5:
            return True
        # 2. Hết năng lượng
        if self.state[2] < 5.0:
            return True
        # 3. Không còn tài nguyên
        if self.state[0] < 5.0 or self.state[1] < 5.0:
            return True
        return False

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        
        # Khởi tạo state ngẫu nhiên
        self.state = np.array([
            np.random.uniform(60, 100),   # CPU
            np.random.uniform(60, 100),   # RAM
            np.random.uniform(80, 100),   # Energy
            np.random.uniform(100, 800),  # Bandwidth
            np.random.uniform(5, 10),     # Loss
            np.random.uniform(1, 3),      # Loss_Std
            np.random.uniform(0.5, 1.0)   # Neighbor_Availability
        ], dtype=np.float32)
        
        self.current_step = 0
        self.coalition_history = {}
        
        return self.state, {}

# Ví dụ sử dụng với Stable-Baselines3
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

env = SFLNodeEnv(num_nodes=5, num_layers=10)
check_env(env)  # Validate environment

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=50000)

# Evaluation
obs, info = env.reset()
for _ in range(100):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    print(f"Reward: {reward:.3f}, Cut: {action[0]}, Target: {action[3]}")
    if terminated or truncated:
        obs, info = env.reset()

# Kiểm tra reward shaping
def validate_reward_function(env, num_episodes=10):
    rewards = []
    for _ in range(num_episodes):
        obs, _ = env.reset()
        episode_reward = 0
        for _ in range(100):
            action = env.action_space.sample()
            obs, reward, terminated, truncated, info = env.step(action)
            episode_reward += reward
            if terminated or truncated:
                break
        rewards.append(episode_reward)
    
    print(f"Mean reward: {np.mean(rewards):.2f} ± {np.std(rewards):.2f}")
    print(f"Min/Max: {np.min(rewards):.2f} / {np.max(rewards):.2f}")
    return rewards

# Kiểm tra action distribution
def analyze_action_distribution(env, model, num_steps=1000):
    obs, _ = env.reset()
    actions = []
    for _ in range(num_steps):
        action, _ = model.predict(obs, deterministic=False)
        actions.append(action)
        obs, _, terminated, truncated, _ = env.step(action)
        if terminated or truncated:
            obs, _ = env.reset()
    
    actions = np.array(actions)
    print("Cut layer distribution:", np.bincount(actions[:, 0]))
    print("Batch size distribution:", np.bincount(actions[:, 1]))
    print("Target node distribution:", np.bincount(actions[:, 3]))

Tổng quan

Kiến trúc môi trường

Định nghĩa không gian

State Space

Tổng quan

Kiến trúc môi trường

Định nghĩa không gian

State Space

Action Space

Hàm step() và Reward Function

Cấu trúc step()

Chi phí tài nguyên

Shapley Value Contribution

Mô phỏng Training

State Update và Termination

Tích hợp với Training Loop

Các điểm cần lưu ý

Shapley Value Approximation

Non-IID Data Handling

Resource Constraints

Multi-Agent Extension

Debugging và Validation

Continuous Learning V2

Continuous Learning V2

Continuous Learning V2

Continuous Learning

Continuous Learning

Pytorch Patterns