Skip to content

Commit 9da2edd

Browse files
gadde5300vkocaman
authored andcommitted
Update 2021-06-01-redl_date_clinical_biobert_en.md
1 parent 69aae0a commit 9da2edd

File tree

1 file changed

+77
-37
lines changed

1 file changed

+77
-37
lines changed

docs/_posts/muhammetsnts/2021-06-01-redl_date_clinical_biobert_en.md

Lines changed: 77 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -36,37 +36,62 @@ Identify if tests were conducted on a particular date or any diagnosis was made
3636
{% include programmingLanguageSelectScalaPythonNLU.html %}
3737
```python
3838
...
39-
words_embedder = WordEmbeddingsModel() \
40-
.pretrained("embeddings_clinical", "en", "clinical/models") \
41-
.setInputCols(["sentences", "tokens"]) \
39+
documenter = DocumentAssembler()\
40+
.setInputCol("text")\
41+
.setOutputCol("document")
42+
43+
sentencer = SentenceDetector()\
44+
.setInputCols(["document"])\
45+
.setOutputCol("sentences")
46+
47+
tokenizer = sparknlp.annotators.Tokenizer()\
48+
.setInputCols(["sentences"])\
49+
.setOutputCol("tokens")
50+
51+
words_embedder = WordEmbeddingsModel()\
52+
.pretrained("embeddings_clinical", "en", "clinical/models")\
53+
.setInputCols(["sentences", "tokens"])\
4254
.setOutputCol("embeddings")
43-
ner_tagger = NerDLModel() \
44-
.pretrained("jsl_ner_wip_greedy_clinical", "en", "clinical/models") \
45-
.setInputCols(["sentences", "tokens", "embeddings"]) \
46-
.setOutputCol("ner_tags")
47-
ner_converter = NerConverter() \
48-
.setInputCols(["sentences", "tokens", "ner_tags"]) \
55+
56+
pos_tagger = PerceptronModel()\
57+
.pretrained("pos_clinical", "en", "clinical/models") \
58+
.setInputCols(["sentences", "tokens"])\
59+
.setOutputCol("pos_tags")
60+
61+
events_ner_tagger = MedicalNerModel.pretrained("ner_events_clinical", "en", "clinical/models")\
62+
.setInputCols("sentences", "tokens", "embeddings")\
63+
.setOutputCol("ner_tags")
64+
65+
ner_chunker = NerConverterInternal()\
66+
.setInputCols(["sentences", "tokens", "ner_tags"])\
4967
.setOutputCol("ner_chunks")
68+
5069
dependency_parser = DependencyParserModel() \
5170
.pretrained("dependency_conllu", "en") \
5271
.setInputCols(["sentences", "pos_tags", "tokens"]) \
5372
.setOutputCol("dependencies")
5473

55-
# Set a filter on pairs of named entities which will be treated as relation candidates
56-
re_ner_chunk_filter = RENerChunksFilter() \
74+
events_re_ner_chunk_filter = RENerChunksFilter() \
5775
.setInputCols(["ner_chunks", "dependencies"])\
58-
.setMaxSyntacticDistance(10)\
59-
.setOutputCol("re_ner_chunks").setRelationPairs(['symptom-date', 'date-procedure', 'delativedate-test', 'test-date'])
76+
.setOutputCol("re_ner_chunks")
6077

61-
# The dataset this model is trained to is sentence-wise.
62-
# This model can also be trained on document-level relations - in which case, while predicting, use "document" instead of "sentence" as input.
63-
re_model = RelationExtractionDLModel()\
64-
.pretrained('redl_date_clinical_biobert', 'en', "clinical/models") \
78+
events_re_Model = RelationExtractionDLModel() \
79+
.pretrained('redl_date_clinical_biobert', "en", "clinical/models")\
6580
.setPredictionThreshold(0.5)\
6681
.setInputCols(["re_ner_chunks", "sentences"]) \
6782
.setOutputCol("relations")
6883

69-
pipeline = Pipeline(stages=[documenter, sentencer, tokenizer, pos_tagger, words_embedder, ner_tagger, ner_converter, dependency_parser, re_ner_chunk_filter, re_model])
84+
pipeline = Pipeline(stages=[
85+
documenter,
86+
sentencer,
87+
tokenizer,
88+
words_embedder,
89+
pos_tagger,
90+
events_ner_tagger,
91+
ner_chunker,
92+
dependency_parser,
93+
events_re_ner_chunk_filter,
94+
events_re_Model])
7095

7196
text ="This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94."
7297
data = spark.createDataFrame([[text]]).toDF("text")
@@ -75,36 +100,51 @@ result = p_model.transform(data)
75100
```
76101
```scala
77102
...
78-
val words_embedder = WordEmbeddingsModel()
79-
.pretrained("embeddings_clinical", "en", "clinical/models")
80-
.setInputCols(Array("sentences", "tokens"))
103+
val documenter = DocumentAssembler()
104+
.setInputCol("text")
105+
.setOutputCol("document")
106+
107+
val sentencer = SentenceDetector()
108+
.setInputCols("document")
109+
.setOutputCol("sentences")
110+
111+
val tokenizer = sparknlp.annotators.Tokenizer()
112+
.setInputCols("sentences")
113+
.setOutputCol("tokens")
114+
115+
val words_embedder = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
116+
.setInputCols(Array("sentence", "token"))
81117
.setOutputCol("embeddings")
82-
val ner_tagger = NerDLModel()
83-
.pretrained("ner_clinical", "en", "clinical/models")
118+
119+
val pos_tagger = PerceptronModel()
120+
.pretrained("pos_clinical", "en", "clinical/models")
121+
.setInputCols(Array("sentences", "tokens"))
122+
.setOutputCol("pos_tags")
123+
124+
val events_ner_tagger = MedicalNerModel.pretrained("ner_events_clinical", "en", "clinical/models")
84125
.setInputCols(Array("sentences", "tokens", "embeddings"))
85-
.setOutputCol("ner_tags")
86-
val ner_converter = NerConverter()
126+
.setOutputCol("ner_tags")
127+
128+
val ner_chunker = NerConverterInternal()
87129
.setInputCols(Array("sentences", "tokens", "ner_tags"))
88130
.setOutputCol("ner_chunks")
131+
89132
val dependency_parser = DependencyParserModel()
90133
.pretrained("dependency_conllu", "en")
91134
.setInputCols(Array("sentences", "pos_tags", "tokens"))
92135
.setOutputCol("dependencies")
93136

94-
// Set a filter on pairs of named entities which will be treated as relation candidates
95-
val re_ner_chunk_filter = RENerChunksFilter()
137+
val events_re_ner_chunk_filter = RENerChunksFilter()
96138
.setInputCols(Array("ner_chunks", "dependencies"))
97-
.setMaxSyntacticDistance(10)
98-
.setOutputCol("re_ner_chunks").setRelationPairs(Array('symptom-date', 'date-procedure', 'delativedate-test', 'test-date'))
99-
100-
// The dataset this model is trained to is sentence-wise.
101-
// This model can also be trained on document-level relations - in which case, while predicting, use "document" instead of "sentence" as input.
102-
val re_model = RelationExtractionDLModel()
103-
.pretrained("redl_date_clinical_biobert", "en", "clinical/models")
139+
.setOutputCol("re_ner_chunks")
140+
141+
val events_re_Model = RelationExtractionDLModel()
142+
.pretrained('redl_date_clinical_biobert', "en", "clinical/models")
104143
.setPredictionThreshold(0.5)
105-
.setInputCols(Array("re_ner_chunks", "sentences"))
144+
.setInputCols(Array("re_ner_chunks", "sentences"))
106145
.setOutputCol("relations")
107-
val pipeline = new Pipeline().setStages(Array(documenter, sentencer, tokenizer, pos_tagger, words_embedder, ner_tagger, ner_converter, dependency_parser, re_ner_chunk_filter, re_model))
146+
147+
val pipeline = new Pipeline().setStages(Array(documenter,sentencer,tokenizer,words_embedder,pos_tagger,events_ner_tagger,ner_chunker,dependency_parser,events_re_ner_chunk_filter,events_re_Model))
108148

109149
val data = Seq("This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94.").toDF("text")
110150
val result = pipeline.fit(data).transform(data)
@@ -143,4 +183,4 @@ Relation Recall Precision F1 Support
143183
0 0.738 0.729 0.734 84
144184
1 0.945 0.947 0.946 416
145185
Avg. 0.841 0.838 0.840
146-
```
186+
```

0 commit comments

Comments
 (0)