@@ -36,37 +36,62 @@ Identify if tests were conducted on a particular date or any diagnosis was made
36
36
{% include programmingLanguageSelectScalaPythonNLU.html %}
37
37
``` python
38
38
...
39
- words_embedder = WordEmbeddingsModel() \
40
- .pretrained(" embeddings_clinical" , " en" , " clinical/models" ) \
41
- .setInputCols([" sentences" , " tokens" ]) \
39
+ documenter = DocumentAssembler()\
40
+ .setInputCol(" text" )\
41
+ .setOutputCol(" document" )
42
+
43
+ sentencer = SentenceDetector()\
44
+ .setInputCols([" document" ])\
45
+ .setOutputCol(" sentences" )
46
+
47
+ tokenizer = sparknlp.annotators.Tokenizer()\
48
+ .setInputCols([" sentences" ])\
49
+ .setOutputCol(" tokens" )
50
+
51
+ words_embedder = WordEmbeddingsModel()\
52
+ .pretrained(" embeddings_clinical" , " en" , " clinical/models" )\
53
+ .setInputCols([" sentences" , " tokens" ])\
42
54
.setOutputCol(" embeddings" )
43
- ner_tagger = NerDLModel() \
44
- .pretrained(" jsl_ner_wip_greedy_clinical" , " en" , " clinical/models" ) \
45
- .setInputCols([" sentences" , " tokens" , " embeddings" ]) \
46
- .setOutputCol(" ner_tags" )
47
- ner_converter = NerConverter() \
48
- .setInputCols([" sentences" , " tokens" , " ner_tags" ]) \
55
+
56
+ pos_tagger = PerceptronModel()\
57
+ .pretrained(" pos_clinical" , " en" , " clinical/models" ) \
58
+ .setInputCols([" sentences" , " tokens" ])\
59
+ .setOutputCol(" pos_tags" )
60
+
61
+ events_ner_tagger = MedicalNerModel.pretrained(" ner_events_clinical" , " en" , " clinical/models" )\
62
+ .setInputCols(" sentences" , " tokens" , " embeddings" )\
63
+ .setOutputCol(" ner_tags" )
64
+
65
+ ner_chunker = NerConverterInternal()\
66
+ .setInputCols([" sentences" , " tokens" , " ner_tags" ])\
49
67
.setOutputCol(" ner_chunks" )
68
+
50
69
dependency_parser = DependencyParserModel() \
51
70
.pretrained(" dependency_conllu" , " en" ) \
52
71
.setInputCols([" sentences" , " pos_tags" , " tokens" ]) \
53
72
.setOutputCol(" dependencies" )
54
73
55
- # Set a filter on pairs of named entities which will be treated as relation candidates
56
- re_ner_chunk_filter = RENerChunksFilter() \
74
+ events_re_ner_chunk_filter = RENerChunksFilter() \
57
75
.setInputCols([" ner_chunks" , " dependencies" ])\
58
- .setMaxSyntacticDistance(10 )\
59
- .setOutputCol(" re_ner_chunks" ).setRelationPairs([' symptom-date' , ' date-procedure' , ' delativedate-test' , ' test-date' ])
76
+ .setOutputCol(" re_ner_chunks" )
60
77
61
- # The dataset this model is trained to is sentence-wise.
62
- # This model can also be trained on document-level relations - in which case, while predicting, use "document" instead of "sentence" as input.
63
- re_model = RelationExtractionDLModel()\
64
- .pretrained(' redl_date_clinical_biobert' , ' en' , " clinical/models" ) \
78
+ events_re_Model = RelationExtractionDLModel() \
79
+ .pretrained(' redl_date_clinical_biobert' , " en" , " clinical/models" )\
65
80
.setPredictionThreshold(0.5 )\
66
81
.setInputCols([" re_ner_chunks" , " sentences" ]) \
67
82
.setOutputCol(" relations" )
68
83
69
- pipeline = Pipeline(stages = [documenter, sentencer, tokenizer, pos_tagger, words_embedder, ner_tagger, ner_converter, dependency_parser, re_ner_chunk_filter, re_model])
84
+ pipeline = Pipeline(stages = [
85
+ documenter,
86
+ sentencer,
87
+ tokenizer,
88
+ words_embedder,
89
+ pos_tagger,
90
+ events_ner_tagger,
91
+ ner_chunker,
92
+ dependency_parser,
93
+ events_re_ner_chunk_filter,
94
+ events_re_Model])
70
95
71
96
text = " This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94."
72
97
data = spark.createDataFrame([[text]]).toDF(" text" )
@@ -75,36 +100,51 @@ result = p_model.transform(data)
75
100
```
76
101
``` scala
77
102
...
78
- val words_embedder = WordEmbeddingsModel ()
79
- .pretrained(" embeddings_clinical" , " en" , " clinical/models" )
80
- .setInputCols(Array (" sentences" , " tokens" ))
103
+ val documenter = DocumentAssembler ()
104
+ .setInputCol(" text" )
105
+ .setOutputCol(" document" )
106
+
107
+ val sentencer = SentenceDetector ()
108
+ .setInputCols(" document" )
109
+ .setOutputCol(" sentences" )
110
+
111
+ val tokenizer = sparknlp.annotators.Tokenizer ()
112
+ .setInputCols(" sentences" )
113
+ .setOutputCol(" tokens" )
114
+
115
+ val words_embedder = WordEmbeddingsModel .pretrained(" embeddings_clinical" , " en" , " clinical/models" )
116
+ .setInputCols(Array (" sentence" , " token" ))
81
117
.setOutputCol(" embeddings" )
82
- val ner_tagger = NerDLModel ()
83
- .pretrained(" ner_clinical" , " en" , " clinical/models" )
118
+
119
+ val pos_tagger = PerceptronModel ()
120
+ .pretrained(" pos_clinical" , " en" , " clinical/models" )
121
+ .setInputCols(Array (" sentences" , " tokens" ))
122
+ .setOutputCol(" pos_tags" )
123
+
124
+ val events_ner_tagger = MedicalNerModel .pretrained(" ner_events_clinical" , " en" , " clinical/models" )
84
125
.setInputCols(Array (" sentences" , " tokens" , " embeddings" ))
85
- .setOutputCol(" ner_tags" )
86
- val ner_converter = NerConverter ()
126
+ .setOutputCol(" ner_tags" )
127
+
128
+ val ner_chunker = NerConverterInternal ()
87
129
.setInputCols(Array (" sentences" , " tokens" , " ner_tags" ))
88
130
.setOutputCol(" ner_chunks" )
131
+
89
132
val dependency_parser = DependencyParserModel ()
90
133
.pretrained(" dependency_conllu" , " en" )
91
134
.setInputCols(Array (" sentences" , " pos_tags" , " tokens" ))
92
135
.setOutputCol(" dependencies" )
93
136
94
- // Set a filter on pairs of named entities which will be treated as relation candidates
95
- val re_ner_chunk_filter = RENerChunksFilter ()
137
+ val events_re_ner_chunk_filter = RENerChunksFilter ()
96
138
.setInputCols(Array (" ner_chunks" , " dependencies" ))
97
- .setMaxSyntacticDistance(10 )
98
- .setOutputCol(" re_ner_chunks" ).setRelationPairs(Array (' symptom - date', ' date- procedure', ' delativedate- test', ' test- date'))
99
-
100
- // The dataset this model is trained to is sentence-wise.
101
- // This model can also be trained on document-level relations - in which case, while predicting, use "document" instead of "sentence" as input.
102
- val re_model = RelationExtractionDLModel ()
103
- .pretrained(" redl_date_clinical_biobert" , " en" , " clinical/models" )
139
+ .setOutputCol(" re_ner_chunks" )
140
+
141
+ val events_re_Model = RelationExtractionDLModel ()
142
+ .pretrained('redl_date_clinical_biobert ' , " en" , " clinical/models" )
104
143
.setPredictionThreshold(0.5 )
105
- .setInputCols(Array (" re_ner_chunks" , " sentences" ))
144
+ .setInputCols(Array (" re_ner_chunks" , " sentences" ))
106
145
.setOutputCol(" relations" )
107
- val pipeline = new Pipeline ().setStages(Array (documenter, sentencer, tokenizer, pos_tagger, words_embedder, ner_tagger, ner_converter, dependency_parser, re_ner_chunk_filter, re_model))
146
+
147
+ val pipeline = new Pipeline ().setStages(Array (documenter,sentencer,tokenizer,words_embedder,pos_tagger,events_ner_tagger,ner_chunker,dependency_parser,events_re_ner_chunk_filter,events_re_Model))
108
148
109
149
val data = Seq (" This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94." ).toDF(" text" )
110
150
val result = pipeline.fit(data).transform(data)
@@ -143,4 +183,4 @@ Relation Recall Precision F1 Support
143
183
0 0.738 0.729 0.734 84
144
184
1 0.945 0.947 0.946 416
145
185
Avg. 0.841 0.838 0.840
146
- ```
186
+ ```
0 commit comments