File size: 2,540 Bytes
8745e78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{
    "dataset_reader": {
        "type": "snli",
        "token_indexers": {
            "elmo": {
                "type": "elmo_characters"
            }
        },
        "tokenizer": {
            "end_tokens": [
                "@@NULL@@"
            ]
        }
    },
    "train_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/snli/snli_1.0_train.jsonl",
    "validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/snli/snli_1.0_dev.jsonl",
    "model": {
        "type": "decomposable_attention",
        "text_field_embedder": {
          "token_embedders": {
            "elmo": {
                "type": "elmo_token_embedder",
                "do_layer_norm": false,
                "dropout": 0.2
            }
          }
        },
        "attend_feedforward": {
            "input_dim": 1024,
            "num_layers": 2,
            "hidden_dims": 200,
            "activations": "relu",
            "dropout": 0.2
        },
        "matrix_attention": {
          "type": "dot_product"
        },
        "compare_feedforward": {
            "input_dim": 2048,
            "num_layers": 2,
            "hidden_dims": 200,
            "activations": "relu",
            "dropout": 0.2
        },
        "aggregate_feedforward": {
            "input_dim": 400,
            "num_layers": 2,
            "hidden_dims": [
                200,
                3
            ],
            "activations": [
                "relu",
                "linear"
            ],
            "dropout": [
                0.2,
                0.0
            ]
        },
        "initializer": {
          "regexes": [
            [
                ".*linear_layers.*weight",
                {
                    "type": "xavier_normal"
                }
            ],
            [
                ".*token_embedder_tokens\\._projection.*weight",
                {
                    "type": "xavier_normal"
                }
            ]
          ]
        }
    },
    "iterator": {
        "type": "bucket",
        "sorting_keys": [
            [
                "premise",
                "num_tokens"
            ],
            [
                "hypothesis",
                "num_tokens"
            ]
        ],
        "batch_size": 64
    },
    "trainer": {
        "num_epochs": 140,
        "patience": 20,
        "cuda_device": 0,
        "grad_clipping": 5.0,
        "validation_metric": "+accuracy",
        "optimizer": {
            "type": "adagrad"
        }
    }
}