dortucx commited on
Commit
099bd9f
1 Parent(s): 3f55bda

Upload 7 files

Browse files
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - allennlp
5
+ - question-answering
6
+ ---
7
+
8
+ This is an implementation of the BiDAF model with ELMo embeddings. The basic layout is pretty simple: encode words as a combination of word embeddings and a character-level encoder, pass the word representations through a bi-LSTM/GRU, use a matrix of attentions to put question information into the passage word representations (this is the only part that is at all non-standard), pass this through another few layers of bi-LSTMs/GRUs, and do a softmax over span start and span end.
9
+
10
+ CAVEATS:
11
+ ------
12
+ This model is based on ELMo. ELMo is not deterministic, meaning that you will see slight differences every time you run it. Also, ELMo likes to be warmed up, so we recommend processing dummy input before processing real workloads with it.
config.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "squad",
4
+ "token_indexers": {
5
+ "elmo": {
6
+ "type": "elmo_characters"
7
+ },
8
+ "token_characters": {
9
+ "type": "characters",
10
+ "character_tokenizer": {
11
+ "byte_encoding": "utf-8",
12
+ "end_tokens": [
13
+ 260
14
+ ],
15
+ "start_tokens": [
16
+ 259
17
+ ]
18
+ },
19
+ "min_padding_length": 5
20
+ },
21
+ "tokens": {
22
+ "type": "single_id",
23
+ "lowercase_tokens": true
24
+ }
25
+ }
26
+ },
27
+ "model": {
28
+ "type": "bidaf",
29
+ "dropout": 0.2,
30
+ "matrix_attention": {
31
+ "type": "linear",
32
+ "combination": "x,y,x*y",
33
+ "tensor_1_dim": 200,
34
+ "tensor_2_dim": 200
35
+ },
36
+ "modeling_layer": {
37
+ "type": "lstm",
38
+ "bidirectional": true,
39
+ "dropout": 0.2,
40
+ "hidden_size": 100,
41
+ "input_size": 800,
42
+ "num_layers": 2
43
+ },
44
+ "num_highway_layers": 2,
45
+ "phrase_layer": {
46
+ "type": "lstm",
47
+ "bidirectional": true,
48
+ "hidden_size": 100,
49
+ "input_size": 1224,
50
+ "num_layers": 1
51
+ },
52
+ "span_end_encoder": {
53
+ "type": "lstm",
54
+ "bidirectional": true,
55
+ "hidden_size": 100,
56
+ "input_size": 1400,
57
+ "num_layers": 1
58
+ },
59
+ "text_field_embedder": {
60
+ "token_embedders": {
61
+ "elmo": {
62
+ "type": "elmo_token_embedder",
63
+ "do_layer_norm": false,
64
+ "dropout": 0,
65
+ "options_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
66
+ "weight_file": "https://allennlp.s3.amazonaws.com/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
67
+ },
68
+ "token_characters": {
69
+ "type": "character_encoding",
70
+ "dropout": 0.2,
71
+ "embedding": {
72
+ "embedding_dim": 16,
73
+ "num_embeddings": 262
74
+ },
75
+ "encoder": {
76
+ "type": "cnn",
77
+ "embedding_dim": 16,
78
+ "ngram_filter_sizes": [
79
+ 5
80
+ ],
81
+ "num_filters": 100
82
+ }
83
+ },
84
+ "tokens": {
85
+ "type": "embedding",
86
+ "embedding_dim": 100,
87
+ "pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.6B.100d.txt.gz",
88
+ "trainable": false
89
+ }
90
+ }
91
+ }
92
+ },
93
+ "train_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-train-v1.1.json",
94
+ "validation_data_path": "https://allennlp.s3.amazonaws.com/datasets/squad/squad-dev-v1.1.json",
95
+ "trainer": {
96
+ "callbacks": [
97
+ "tensorboard"
98
+ ],
99
+ "grad_norm": 5,
100
+ "learning_rate_scheduler": {
101
+ "type": "reduce_on_plateau",
102
+ "factor": 0.5,
103
+ "mode": "max",
104
+ "patience": 2
105
+ },
106
+ "num_epochs": 20,
107
+ "optimizer": {
108
+ "type": "adam",
109
+ "betas": [
110
+ 0.9,
111
+ 0.9
112
+ ]
113
+ },
114
+ "patience": 10,
115
+ "validation_metric": "+em"
116
+ },
117
+ "data_loader": {
118
+ "batch_sampler": {
119
+ "type": "bucket",
120
+ "batch_size": 40
121
+ }
122
+ }
123
+ }
metrics.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_epoch": 4,
3
+ "peak_worker_0_memory_MB": 39696.53125,
4
+ "peak_gpu_0_memory_MB": 7284.326171875,
5
+ "training_duration": "4:30:57.670185",
6
+ "epoch": 14,
7
+ "training_start_acc": 0.9643260767816985,
8
+ "training_end_acc": 0.9655932145344125,
9
+ "training_span_acc": 0.9391088939371454,
10
+ "training_em": 0.9497482847977716,
11
+ "training_f1": 0.9744178909985174,
12
+ "training_loss": 0.20648720288929873,
13
+ "training_worker_0_memory_MB": 39696.53125,
14
+ "training_gpu_0_memory_MB": 7283.4765625,
15
+ "validation_start_acc": 0.6269631031220435,
16
+ "validation_end_acc": 0.6750236518448439,
17
+ "validation_span_acc": 0.535666982024598,
18
+ "validation_em": 0.6783349101229896,
19
+ "validation_f1": 0.7866344427692674,
20
+ "validation_loss": 4.767929854482975,
21
+ "best_validation_start_acc": 0.6688741721854304,
22
+ "best_validation_end_acc": 0.7033112582781457,
23
+ "best_validation_span_acc": 0.5831598864711447,
24
+ "best_validation_em": 0.7221381267738883,
25
+ "best_validation_f1": 0.8105977611457257,
26
+ "best_validation_loss": 2.2097141022952096
27
+ }
model.th ADDED
Binary file (41 Bytes). View file
 
vocabulary/non_padded_namespaces.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *labels
2
+ *tags
vocabulary/tokens.txt ADDED
The diff for this file is too large to render. See raw diff
 
weights.th ADDED
Binary file (41 Bytes). View file