KurtMica commited on
Commit
04a05a3
1 Parent(s): 5893315

Model files.

Browse files
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.th filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - allennlp
4
+ ---
5
+
6
+ # TODO: Fill this model card
best.th ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c59993607c26b7dd8d9da3c7d208b78475deeb7759bf4686472945a80bf9faa
3
+ size 504095439
config.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "multitask",
4
+ "readers": {
5
+ "pos": {
6
+ "type": "mlrs_pos",
7
+ "language_specific": false,
8
+ "token_indexers": {
9
+ "transformer": {
10
+ "type": "pretrained_transformer_mismatched",
11
+ "max_length": 512,
12
+ "model_name": "MLRS/BERTu"
13
+ }
14
+ }
15
+ }
16
+ }
17
+ },
18
+ "model": {
19
+ "type": "multitask",
20
+ "arg_name_mapping": {
21
+ "backbone": {
22
+ "tokens": "text",
23
+ "words": "text"
24
+ }
25
+ },
26
+ "backbone": {
27
+ "type": "embedder_and_mask",
28
+ "text_field_embedder": {
29
+ "token_embedders": {
30
+ "transformer": {
31
+ "type": "pretrained_transformer_mismatched_with_dropout",
32
+ "last_layer_only": false,
33
+ "layer_dropout": 0.1,
34
+ "max_length": 512,
35
+ "model_name": "MLRS/BERTu",
36
+ "tokenizer_kwargs": {},
37
+ "train_parameters": true
38
+ }
39
+ }
40
+ }
41
+ },
42
+ "heads": {
43
+ "pos": {
44
+ "type": "linear_tagger",
45
+ "dropout": 0.3,
46
+ "encoder": {
47
+ "type": "pass_through",
48
+ "input_dim": 768
49
+ },
50
+ "initializer": {
51
+ "regexes": [
52
+ [
53
+ ".*projection.*weight",
54
+ {
55
+ "type": "xavier_uniform"
56
+ }
57
+ ],
58
+ [
59
+ ".*projection.*bias",
60
+ {
61
+ "type": "zero"
62
+ }
63
+ ],
64
+ [
65
+ ".*tag_bilinear.*weight",
66
+ {
67
+ "type": "xavier_uniform"
68
+ }
69
+ ],
70
+ [
71
+ ".*tag_bilinear.*bias",
72
+ {
73
+ "type": "zero"
74
+ }
75
+ ],
76
+ [
77
+ ".*weight_ih.*",
78
+ {
79
+ "type": "xavier_uniform"
80
+ }
81
+ ],
82
+ [
83
+ ".*weight_hh.*",
84
+ {
85
+ "type": "orthogonal"
86
+ }
87
+ ],
88
+ [
89
+ ".*bias_ih.*",
90
+ {
91
+ "type": "zero"
92
+ }
93
+ ],
94
+ [
95
+ ".*bias_hh.*",
96
+ {
97
+ "type": "lstm_hidden_bias"
98
+ }
99
+ ]
100
+ ]
101
+ }
102
+ }
103
+ }
104
+ },
105
+ "train_data_path": {
106
+ "pos": "MLRS POS Gold/mlrs_pos-train.tsv"
107
+ },
108
+ "validation_data_path": {
109
+ "pos": "MLRS POS Gold/mlrs_pos-dev.tsv"
110
+ },
111
+ "trainer": {
112
+ "callbacks": [
113
+ {
114
+ "tensorboard_writer": {
115
+ "should_log_learning_rate": true,
116
+ "should_log_parameter_statistics": true
117
+ },
118
+ "type": "tensorboard"
119
+ }
120
+ ],
121
+ "cuda_device": 0,
122
+ "grad_norm": 5,
123
+ "learning_rate_scheduler": {
124
+ "type": "ulmfit_sqrt",
125
+ "affected_group_count": 2,
126
+ "decay_factor": 0.05,
127
+ "discriminative_fine_tuning": true,
128
+ "factor": 5,
129
+ "gradual_unfreezing": true,
130
+ "model_size": 1,
131
+ "start_step": 39,
132
+ "warmup_steps": 39
133
+ },
134
+ "num_epochs": 200,
135
+ "optimizer": {
136
+ "type": "huggingface_adamw",
137
+ "betas": [
138
+ 0.9,
139
+ 0.999
140
+ ],
141
+ "correct_bias": false,
142
+ "lr": 0.0005,
143
+ "parameter_groups": [
144
+ [
145
+ [
146
+ "text_field_embedder.*transformer_model.embeddings.*_embeddings.*",
147
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight"
148
+ ],
149
+ {}
150
+ ],
151
+ [
152
+ [
153
+ "text_field_embedder.*transformer_model.embeddings.LayerNorm.*",
154
+ "text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*",
155
+ "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias",
156
+ "text_field_embedder.*transformer_model.pooler.dense.bias"
157
+ ],
158
+ {
159
+ "weight_decay": 0
160
+ }
161
+ ],
162
+ [
163
+ [
164
+ "text_field_embedder.*._scalar_mix.*",
165
+ "text_field_embedder.*transformer_model.pooler.dense.weight",
166
+ "_head_sentinel",
167
+ "head_arc_feedforward._linear_layers.*.weight",
168
+ "child_arc_feedforward._linear_layers.*.weight",
169
+ "head_tag_feedforward._linear_layers.*.weight",
170
+ "child_tag_feedforward._linear_layers.*.weight",
171
+ "arc_attention._weight_matrix",
172
+ "tag_bilinear.weight",
173
+ "tag_projection_layer._module.weight",
174
+ "crf",
175
+ "linear.weight",
176
+ "tagger_linear.weight"
177
+ ],
178
+ {}
179
+ ],
180
+ [
181
+ [
182
+ "head_arc_feedforward._linear_layers.*.bias",
183
+ "child_arc_feedforward._linear_layers.*.bias",
184
+ "head_tag_feedforward._linear_layers.*.bias",
185
+ "child_tag_feedforward._linear_layers.*.bias",
186
+ "arc_attention._bias",
187
+ "tag_bilinear.bias",
188
+ "tag_projection_layer._module.bias",
189
+ "linear.bias",
190
+ "tagger_linear.bias"
191
+ ],
192
+ {
193
+ "weight_decay": 0
194
+ }
195
+ ]
196
+ ],
197
+ "weight_decay": 0.01
198
+ },
199
+ "patience": 20,
200
+ "validation_metric": [
201
+ "+pos_accuracy"
202
+ ]
203
+ },
204
+ "data_loader": {
205
+ "type": "multitask",
206
+ "scheduler": {
207
+ "type": "unbalanced_homogeneous_roundrobin",
208
+ "batch_size": 128,
209
+ "dataset_sizes": {
210
+ "pos": 4935
211
+ }
212
+ },
213
+ "shuffle": true
214
+ },
215
+ "numpy_seed": 1279,
216
+ "pytorch_seed": 127,
217
+ "random_seed": 12790,
218
+ "validation_data_loader": {
219
+ "type": "multitask",
220
+ "scheduler": {
221
+ "type": "homogeneous_roundrobin",
222
+ "batch_size": 128
223
+ },
224
+ "shuffle": true
225
+ }
226
+ }
log/train/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc434c6da5af8279be80087cc906c7a771aac737e0430e6a0b12b82ea9bc551
3
+ size 4365860
log/validation/events.out.tfevents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22db75e8cedb36ea926adaee22a4a309640fe8b9efcdc5a8578e393992a9d977
3
+ size 7996
metrics.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_epoch": 32,
3
+ "peak_worker_0_memory_MB": 4411.46484375,
4
+ "peak_gpu_0_memory_MB": 20483.64892578125,
5
+ "training_duration": "0:09:30.396633",
6
+ "training_start_epoch": 0,
7
+ "training_epochs": 51,
8
+ "epoch": 51,
9
+ "training_pos_accuracy": 0.9999886954555731,
10
+ "training_pos_accuracy_words_only": 0.9999870104565824,
11
+ "training_loss": 0.00021654496140399715,
12
+ "training_worker_0_memory_MB": 4411.46484375,
13
+ "training_gpu_0_memory_MB": 20483.64892578125,
14
+ "validation_pos_accuracy": 0.989863375936536,
15
+ "validation_pos_accuracy_words_only": 0.9891052153737516,
16
+ "validation_loss": 0.10308615937829017,
17
+ "best_validation_pos_accuracy": 0.9908329660643456,
18
+ "best_validation_pos_accuracy_words_only": 0.9901139917280339,
19
+ "best_validation_loss": 0.09275933802127838
20
+ }
vocabulary/.lock ADDED
File without changes
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *tags
2
+ *labels
vocabulary/pos.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @@UNKNOWN@@
2
+ NOUN
3
+ VERB
4
+ PUNCT
5
+ ADP
6
+ DET
7
+ PRON
8
+ SCONJ
9
+ ADJ
10
+ PROPN
11
+ CCONJ
12
+ AUX
13
+ ADV
14
+ NUM
15
+ PART
16
+ X
17
+ INTJ
18
+ SYM