burn icon indicating copy to clipboard operation
burn copied to clipboard

WIP: Display of module tree

Open antimora opened this issue 1 year ago • 1 comments

Pull Request Template

Checklist

  • [ ] Confirmed that run-checks all script has been executed.
  • [ ] Made sure the book is up to date with changes in this PR.

Related Issues/PRs

fixes #1357

Changes

Work in progress

Testing

Work in progress

antimora avatar May 13 '24 05:05 antimora

I'll be on a two week break. I'll finish off after May 28th. Any feedback is welcome.

antimora avatar May 13 '24 16:05 antimora

Progress output of Whisper model:

Whisper {
  encoder: Encoder {
    conv1: Conv1d {stride: 1, kernel_size: 3, dilation: 1, groups: 1, padding: Explicit(1)}
    conv2: Conv1d {stride: 2, kernel_size: 3, dilation: 1, groups: 1, padding: Explicit(1)}
    positional_embedding: Param
    blocks: Vec<0..4> {
      0: ResidualAttentionBlock {
        attn: MultiHeadAttention {
          n_head: 6
          n_state: 384
          query: Linear {d_input: 384, d_output: 384, bias: true}
          key: Linear {d_input: 384, d_output: 384, bias: false}
          value: Linear {d_input: 384, d_output: 384, bias: true}
          out: Linear {d_input: 384, d_output: 384, bias: true}
        }
        attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
        mlp: Mlp {
          n_state: 384
          n_mlp: 6
          ln1: Linear {d_input: 384, d_output: 6, bias: true}
          gelu: Gelu
          ln2: Linear {d_input: 384, d_output: 6, bias: true}
        }
        mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
      }
      1: ResidualAttentionBlock {
        attn: MultiHeadAttention {
          n_head: 6
          n_state: 384
          query: Linear {d_input: 384, d_output: 384, bias: true}
          key: Linear {d_input: 384, d_output: 384, bias: false}
          value: Linear {d_input: 384, d_output: 384, bias: true}
          out: Linear {d_input: 384, d_output: 384, bias: true}
        }
        attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
        mlp: Mlp {
          n_state: 384
          n_mlp: 6
          ln1: Linear {d_input: 384, d_output: 6, bias: true}
          gelu: Gelu
          ln2: Linear {d_input: 384, d_output: 6, bias: true}
        }
        mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
      }
      2: ResidualAttentionBlock {
        attn: MultiHeadAttention {
          n_head: 6
          n_state: 384
          query: Linear {d_input: 384, d_output: 384, bias: true}
          key: Linear {d_input: 384, d_output: 384, bias: false}
          value: Linear {d_input: 384, d_output: 384, bias: true}
          out: Linear {d_input: 384, d_output: 384, bias: true}
        }
        attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
        mlp: Mlp {
          n_state: 384
          n_mlp: 6
          ln1: Linear {d_input: 384, d_output: 6, bias: true}
          gelu: Gelu
          ln2: Linear {d_input: 384, d_output: 6, bias: true}
        }
        mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
      }
      3: ResidualAttentionBlock {
        attn: MultiHeadAttention {
          n_head: 6
          n_state: 384
          query: Linear {d_input: 384, d_output: 384, bias: true}
          key: Linear {d_input: 384, d_output: 384, bias: false}
          value: Linear {d_input: 384, d_output: 384, bias: true}
          out: Linear {d_input: 384, d_output: 384, bias: true}
        }
        attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
        mlp: Mlp {
          n_state: 384
          n_mlp: 6
          ln1: Linear {d_input: 384, d_output: 6, bias: true}
          gelu: Gelu
          ln2: Linear {d_input: 384, d_output: 6, bias: true}
        }
        mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
      }
    }
    ln_post: LayerNorm {d_model: 384, epsilon: 0.00001}
    gelu: Gelu
  }
}

antimora avatar Jun 13 '24 15:06 antimora