File size: 4,869 Bytes
67b337b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
{
    "dataset_reader": {
        "type": "drop",
        "instance_format": "drop",
        "passage_length_limit": 400,
        "question_length_limit": 50,
        "skip_when_all_empty": [
            "passage_span",
            "question_span",
            "addition_subtraction",
            "counting"
        ],
        "token_indexers": {
            "token_characters": {
                "type": "characters",
                "min_padding_length": 5
            },
            "tokens": {
                "type": "single_id",
                "lowercase_tokens": true
            }
        }
    },
    "model": {
        "type": "naqanet",
        "answering_abilities": [
            "passage_span_extraction",
            "question_span_extraction",
            "addition_subtraction",
            "counting"
        ],
        "dropout_prob": 0.1,
        "matrix_attention_layer": {
            "type": "linear",
            "combination": "x,y,x*y",
            "tensor_1_dim": 128,
            "tensor_2_dim": 128
        },
        "modeling_layer": {
            "type": "qanet_encoder",
            "attention_dropout_prob": 0,
            "attention_projection_dim": 128,
            "conv_kernel_size": 5,
            "dropout_prob": 0.1,
            "feedforward_hidden_dim": 128,
            "hidden_dim": 128,
            "input_dim": 128,
            "layer_dropout_undecayed_prob": 0.1,
            "num_attention_heads": 8,
            "num_blocks": 6,
            "num_convs_per_block": 2
        },
        "num_highway_layers": 2,
        "phrase_layer": {
            "type": "qanet_encoder",
            "attention_dropout_prob": 0,
            "attention_projection_dim": 128,
            "conv_kernel_size": 7,
            "dropout_prob": 0.1,
            "feedforward_hidden_dim": 128,
            "hidden_dim": 128,
            "input_dim": 128,
            "layer_dropout_undecayed_prob": 0.1,
            "num_attention_heads": 8,
            "num_blocks": 1,
            "num_convs_per_block": 4
        },
        "regularizer": {
            "regexes": [
                [
                    ".*",
                    {
                        "alpha": 1e-07,
                        "type": "l2"
                    }
                ]
            ]
        },
        "text_field_embedder": {
            "token_embedders": {
                "token_characters": {
                    "type": "character_encoding",
                    "embedding": {
                        "embedding_dim": 64
                    },
                    "encoder": {
                        "type": "cnn",
                        "embedding_dim": 64,
                        "ngram_filter_sizes": [
                            5
                        ],
                        "num_filters": 200
                    }
                },
                "tokens": {
                    "type": "embedding",
                    "embedding_dim": 300,
                    "pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip",
                    "trainable": false
                }
            }
        }
    },
    "train_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_train.json",
    "validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_dev.json",
    "trainer": {
        "callbacks": [
            "tensorboard"
        ],
        "grad_norm": 5,
        "moving_average": {
            "type": "exponential",
            "decay": 0.9999
        },
        "num_epochs": 50,
        "optimizer": {
            "type": "adam",
            "betas": [
                0.8,
                0.999
            ],
            "eps": 1e-07,
            "lr": 0.0005
        },
        "patience": 10,
        "validation_metric": "+f1"
    },
    "vocabulary": {
        "min_count": {
            "token_characters": 200
        },
        "only_include_pretrained_words": true,
        "pretrained_files": {
            "tokens": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip"
        }
    },
    "data_loader": {
        "batch_sampler": {
            "type": "bucket",
            "batch_size": 16
        }
    },
    "validation_dataset_reader": {
        "type": "drop",
        "instance_format": "drop",
        "passage_length_limit": 1000,
        "question_length_limit": 100,
        "skip_when_all_empty": [],
        "token_indexers": {
            "token_characters": {
                "type": "characters",
                "min_padding_length": 5
            },
            "tokens": {
                "type": "single_id",
                "lowercase_tokens": true
            }
        }
    }
}