Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 77 additions & 37 deletions docs/_posts/muhammetsnts/2021-06-01-redl_date_clinical_biobert_en.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,37 +36,62 @@ Identify if tests were conducted on a particular date or any diagnosis was made
{% include programmingLanguageSelectScalaPythonNLU.html %}
```python
...
words_embedder = WordEmbeddingsModel() \
.pretrained("embeddings_clinical", "en", "clinical/models") \
.setInputCols(["sentences", "tokens"]) \
documenter = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")

sentencer = SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentences")

tokenizer = sparknlp.annotators.Tokenizer()\
.setInputCols(["sentences"])\
.setOutputCol("tokens")

words_embedder = WordEmbeddingsModel()\
.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentences", "tokens"])\
.setOutputCol("embeddings")
ner_tagger = NerDLModel() \
.pretrained("jsl_ner_wip_greedy_clinical", "en", "clinical/models") \
.setInputCols(["sentences", "tokens", "embeddings"]) \
.setOutputCol("ner_tags")
ner_converter = NerConverter() \
.setInputCols(["sentences", "tokens", "ner_tags"]) \

pos_tagger = PerceptronModel()\
.pretrained("pos_clinical", "en", "clinical/models") \
.setInputCols(["sentences", "tokens"])\
.setOutputCol("pos_tags")

events_ner_tagger = MedicalNerModel.pretrained("ner_events_clinical", "en", "clinical/models")\
.setInputCols("sentences", "tokens", "embeddings")\
.setOutputCol("ner_tags")

ner_chunker = NerConverterInternal()\
.setInputCols(["sentences", "tokens", "ner_tags"])\
.setOutputCol("ner_chunks")

dependency_parser = DependencyParserModel() \
.pretrained("dependency_conllu", "en") \
.setInputCols(["sentences", "pos_tags", "tokens"]) \
.setOutputCol("dependencies")

# Set a filter on pairs of named entities which will be treated as relation candidates
re_ner_chunk_filter = RENerChunksFilter() \
events_re_ner_chunk_filter = RENerChunksFilter() \
.setInputCols(["ner_chunks", "dependencies"])\
.setMaxSyntacticDistance(10)\
.setOutputCol("re_ner_chunks").setRelationPairs(['symptom-date', 'date-procedure', 'delativedate-test', 'test-date'])
.setOutputCol("re_ner_chunks")

# The dataset this model is trained to is sentence-wise.
# This model can also be trained on document-level relations - in which case, while predicting, use "document" instead of "sentence" as input.
re_model = RelationExtractionDLModel()\
.pretrained('redl_date_clinical_biobert', 'en', "clinical/models") \
events_re_Model = RelationExtractionDLModel() \
.pretrained('redl_date_clinical_biobert', "en", "clinical/models")\
.setPredictionThreshold(0.5)\
.setInputCols(["re_ner_chunks", "sentences"]) \
.setOutputCol("relations")

pipeline = Pipeline(stages=[documenter, sentencer, tokenizer, pos_tagger, words_embedder, ner_tagger, ner_converter, dependency_parser, re_ner_chunk_filter, re_model])
pipeline = Pipeline(stages=[
documenter,
sentencer,
tokenizer,
words_embedder,
pos_tagger,
events_ner_tagger,
ner_chunker,
dependency_parser,
events_re_ner_chunk_filter,
events_re_Model])

text ="This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94."
data = spark.createDataFrame([[text]]).toDF("text")
Expand All @@ -75,36 +100,51 @@ result = p_model.transform(data)
```
```scala
...
val words_embedder = WordEmbeddingsModel()
.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens"))
val documenter = DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")

val sentencer = SentenceDetector()
.setInputCols("document")
.setOutputCol("sentences")

val tokenizer = sparknlp.annotators.Tokenizer()
.setInputCols("sentences")
.setOutputCol("tokens")

val words_embedder = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val ner_tagger = NerDLModel()
.pretrained("ner_clinical", "en", "clinical/models")

val pos_tagger = PerceptronModel()
.pretrained("pos_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens"))
.setOutputCol("pos_tags")

val events_ner_tagger = MedicalNerModel.pretrained("ner_events_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens", "embeddings"))
.setOutputCol("ner_tags")
val ner_converter = NerConverter()
.setOutputCol("ner_tags")

val ner_chunker = NerConverterInternal()
.setInputCols(Array("sentences", "tokens", "ner_tags"))
.setOutputCol("ner_chunks")

val dependency_parser = DependencyParserModel()
.pretrained("dependency_conllu", "en")
.setInputCols(Array("sentences", "pos_tags", "tokens"))
.setOutputCol("dependencies")

// Set a filter on pairs of named entities which will be treated as relation candidates
val re_ner_chunk_filter = RENerChunksFilter()
val events_re_ner_chunk_filter = RENerChunksFilter()
.setInputCols(Array("ner_chunks", "dependencies"))
.setMaxSyntacticDistance(10)
.setOutputCol("re_ner_chunks").setRelationPairs(Array('symptom-date', 'date-procedure', 'delativedate-test', 'test-date'))

// The dataset this model is trained to is sentence-wise.
// This model can also be trained on document-level relations - in which case, while predicting, use "document" instead of "sentence" as input.
val re_model = RelationExtractionDLModel()
.pretrained("redl_date_clinical_biobert", "en", "clinical/models")
.setOutputCol("re_ner_chunks")

val events_re_Model = RelationExtractionDLModel()
.pretrained('redl_date_clinical_biobert', "en", "clinical/models")
.setPredictionThreshold(0.5)
.setInputCols(Array("re_ner_chunks", "sentences"))
.setInputCols(Array("re_ner_chunks", "sentences"))
.setOutputCol("relations")
val pipeline = new Pipeline().setStages(Array(documenter, sentencer, tokenizer, pos_tagger, words_embedder, ner_tagger, ner_converter, dependency_parser, re_ner_chunk_filter, re_model))

val pipeline = new Pipeline().setStages(Array(documenter,sentencer,tokenizer,words_embedder,pos_tagger,events_ner_tagger,ner_chunker,dependency_parser,events_re_ner_chunk_filter,events_re_Model))

val data = Seq("This 73 y/o patient had CT on 1/12/95, with progressive memory and cognitive decline since 8/11/94.").toDF("text")
val result = pipeline.fit(data).transform(data)
Expand Down Expand Up @@ -143,4 +183,4 @@ Relation Recall Precision F1 Support
0 0.738 0.729 0.734 84
1 0.945 0.947 0.946 416
Avg. 0.841 0.838 0.840
```
```