-
Notifications
You must be signed in to change notification settings - Fork 23
SDK Annotate
André Santos edited this page Nov 9, 2016
·
7 revisions
If users do not want to take advantage of the Annotation CLI tool, it is also straightforward to annotate a document programatically.
The following source code snippet shows how to annotate a document, by creating a processing pipeline and using the data provided on the "example" folder.
// Set files
String documentFile = "example/annotate/in/22528326.txt";
String outputFile = "example/annotate/out/22528326.a1";
// Set resources
String dictionary1File = "example/dictionaries/Body_Part_Organ_or_Organ_Component_T023_ANAT.tsv";
String dictionary2File = "example/dictionaries/Disease_or_Syndrome_T047_DISO.tsv";
String modelFile = "example/models/prge/prge.properties";
// Create reader
Reader reader = new RawReader();
// Create parser
Parser parser = new GDepParser(ParserLanguage.ENGLISH, ParserLevel.CHUNKING,
new LingpipeSentenceSplitter(), false).launch();
// Create NLP
NLP nlp = new NLP(parser);
// Create dictionary matchers
List<String> dictionary1Lines = FileUtils.readLines(new File(dictionary1File));
Dictionary dictionary1 = VariantMatcherLoader.loadDictionaryFromLines(dictionary1Lines);
List<String> dictionary2Lines = FileUtils.readLines(new File(dictionary2File));
Dictionary dictionary2 = VariantMatcherLoader.loadDictionaryFromLines(dictionary2Lines);
DictionaryHybrid dictionaryMatcher1 = new DictionaryHybrid(dictionary1);
DictionaryHybrid dictionaryMatcher2 = new DictionaryHybrid(dictionary2);
// Create machine-learning model matcher
MLModel model = new MLModel("prge", new File(modelFile));
model.initialize();
MLHybrid mlModelMatcher = new MLHybrid(model.getCrf(), "prge");
// Create Writer
Writer writer = new A1Writer();
// Set document stream
InputStream documentStream = new FileInputStream(documentFile);
// Run pipeline to get annotations
Pipeline pipeline = new DefaultPipeline()
.add(reader)
.add(nlp)
.add(dictionaryMatcher1)
.add(dictionaryMatcher2)
.add(mlModelMatcher)
.add(writer);
OutputStream outputStream = pipeline.run(documentStream).get(0);
// Write annotations to output file
FileUtils.writeStringToFile(new File(outputFile), outputStream.toString());
// Close streams
documentStream.close();
outputStream.close();
// Close parser
parser.close();