-
Notifications
You must be signed in to change notification settings - Fork 256
/
StableFast3D_config.yaml
96 lines (87 loc) · 2.45 KB
/
StableFast3D_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
cond_image_size: 512
isosurface_resolution: 160
radius: 0.87
camera_embedder_cls: StableFast3D.sf3d.models.camera.LinearCameraEmbedder
camera_embedder:
in_channels: 25
out_channels: 768
conditions:
- c2w_cond
- intrinsic_normed_cond
image_tokenizer_cls: StableFast3D.sf3d.models.tokenizers.image.DINOV2SingleImageTokenizer
image_tokenizer:
pretrained_model_name_or_path: "facebook/dinov2-large"
width: 512
height: 512
modulation_cond_dim: 768
tokenizer_cls: StableFast3D.sf3d.models.tokenizers.triplane.TriplaneLearnablePositionalEmbedding
tokenizer:
plane_size: 96
num_channels: 1024
backbone_cls: StableFast3D.sf3d.models.transformers.backbone.TwoStreamInterleaveTransformer
backbone:
num_attention_heads: 16
attention_head_dim: 64
raw_triplane_channels: 1024
triplane_channels: 1024
raw_image_channels: 1024 # DINO features
num_latents: 1792
num_blocks: 4
num_basic_blocks: 3
post_processor_cls: StableFast3D.sf3d.models.network.PixelShuffleUpsampleNetwork
post_processor:
in_channels: 1024
out_channels: 40
scale_factor: 4
conv_layers: 4
decoder_cls: StableFast3D.sf3d.models.network.MaterialMLP
decoder:
in_channels: 120
n_neurons: 64
activation: silu
heads:
- name: density
out_channels: 1
out_bias: -1.0
n_hidden_layers: 2
output_activation: trunc_exp
- name: features
out_channels: 3
n_hidden_layers: 3
output_activation: sigmoid
- name: perturb_normal
out_channels: 3
n_hidden_layers: 3
output_activation: normalize_channel_last
- name: vertex_offset
out_channels: 3
n_hidden_layers: 2
image_estimator_cls: StableFast3D.sf3d.models.image_estimator.clip_based_estimator.ClipBasedHeadEstimator
image_estimator:
distribution: beta
distribution_eval: mode
heads:
- name: roughness
out_channels: 1
n_hidden_layers: 3
output_activation: linear
add_to_decoder_features: true
output_bias: 1.0
shape: [-1, 1, 1]
- name: metallic
out_channels: 1
n_hidden_layers: 3
output_activation: linear
add_to_decoder_features: true
output_bias: 1.0
shape: [-1, 1, 1]
global_estimator_cls: StableFast3D.sf3d.models.global_estimator.multi_head_estimator.MultiHeadEstimator
global_estimator:
triplane_features: 1024
heads:
- name: sg_amplitudes
out_channels: 24
n_hidden_layers: 3
output_activation: softplus
output_bias: 1.0
shape: [-1, 24, 1]