burn
burn copied to clipboard
WIP: Display of module tree
Pull Request Template
Checklist
- [ ] Confirmed that
run-checks allscript has been executed. - [ ] Made sure the book is up to date with changes in this PR.
Related Issues/PRs
fixes #1357
Changes
Work in progress
Testing
Work in progress
I'll be on a two week break. I'll finish off after May 28th. Any feedback is welcome.
Progress output of Whisper model:
Whisper {
encoder: Encoder {
conv1: Conv1d {stride: 1, kernel_size: 3, dilation: 1, groups: 1, padding: Explicit(1)}
conv2: Conv1d {stride: 2, kernel_size: 3, dilation: 1, groups: 1, padding: Explicit(1)}
positional_embedding: Param
blocks: Vec<0..4> {
0: ResidualAttentionBlock {
attn: MultiHeadAttention {
n_head: 6
n_state: 384
query: Linear {d_input: 384, d_output: 384, bias: true}
key: Linear {d_input: 384, d_output: 384, bias: false}
value: Linear {d_input: 384, d_output: 384, bias: true}
out: Linear {d_input: 384, d_output: 384, bias: true}
}
attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
mlp: Mlp {
n_state: 384
n_mlp: 6
ln1: Linear {d_input: 384, d_output: 6, bias: true}
gelu: Gelu
ln2: Linear {d_input: 384, d_output: 6, bias: true}
}
mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
}
1: ResidualAttentionBlock {
attn: MultiHeadAttention {
n_head: 6
n_state: 384
query: Linear {d_input: 384, d_output: 384, bias: true}
key: Linear {d_input: 384, d_output: 384, bias: false}
value: Linear {d_input: 384, d_output: 384, bias: true}
out: Linear {d_input: 384, d_output: 384, bias: true}
}
attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
mlp: Mlp {
n_state: 384
n_mlp: 6
ln1: Linear {d_input: 384, d_output: 6, bias: true}
gelu: Gelu
ln2: Linear {d_input: 384, d_output: 6, bias: true}
}
mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
}
2: ResidualAttentionBlock {
attn: MultiHeadAttention {
n_head: 6
n_state: 384
query: Linear {d_input: 384, d_output: 384, bias: true}
key: Linear {d_input: 384, d_output: 384, bias: false}
value: Linear {d_input: 384, d_output: 384, bias: true}
out: Linear {d_input: 384, d_output: 384, bias: true}
}
attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
mlp: Mlp {
n_state: 384
n_mlp: 6
ln1: Linear {d_input: 384, d_output: 6, bias: true}
gelu: Gelu
ln2: Linear {d_input: 384, d_output: 6, bias: true}
}
mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
}
3: ResidualAttentionBlock {
attn: MultiHeadAttention {
n_head: 6
n_state: 384
query: Linear {d_input: 384, d_output: 384, bias: true}
key: Linear {d_input: 384, d_output: 384, bias: false}
value: Linear {d_input: 384, d_output: 384, bias: true}
out: Linear {d_input: 384, d_output: 384, bias: true}
}
attn_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
mlp: Mlp {
n_state: 384
n_mlp: 6
ln1: Linear {d_input: 384, d_output: 6, bias: true}
gelu: Gelu
ln2: Linear {d_input: 384, d_output: 6, bias: true}
}
mlp_ln: LayerNorm {d_model: 384, epsilon: 0.00001}
}
}
ln_post: LayerNorm {d_model: 384, epsilon: 0.00001}
gelu: Gelu
}
}