Skip to content

SDK Annotate

André Santos edited this page Nov 9, 2016 · 7 revisions

If users do not want to take advantage of the Annotation CLI tool, it is also straightforward to annotate a document programatically.

The following source code snippet shows how to annotate a document, by creating a processing pipeline and using the data provided on the "example" folder.

// Set files
String documentFile = "example/annotate/in/22528326.txt";
String outputFile = "example/annotate/out/22528326.a1";
     
// Set resources
String dictionary1File = "example/dictionaries/Body_Part_Organ_or_Organ_Component_T023_ANAT.tsv";
String dictionary2File = "example/dictionaries/Disease_or_Syndrome_T047_DISO.tsv";
String modelFile = "example/models/prge/prge.properties";

// Create reader
Reader reader = new RawReader();
      
// Create parser
Parser parser = new GDepParser(ParserLanguage.ENGLISH, ParserLevel.CHUNKING, 
new LingpipeSentenceSplitter(), false).launch();
       
// Create NLP        
NLP nlp = new NLP(parser);
        
// Create dictionary matchers
List<String> dictionary1Lines = FileUtils.readLines(new File(dictionary1File));
Dictionary dictionary1 = VariantMatcherLoader.loadDictionaryFromLines(dictionary1Lines);
List<String> dictionary2Lines = FileUtils.readLines(new File(dictionary2File));
Dictionary dictionary2 = VariantMatcherLoader.loadDictionaryFromLines(dictionary2Lines);
        
DictionaryHybrid dictionaryMatcher1 = new DictionaryHybrid(dictionary1);
DictionaryHybrid dictionaryMatcher2 = new DictionaryHybrid(dictionary2);
        
// Create machine-learning model matcher
MLModel model = new MLModel("prge", new File(modelFile));
model.initialize();
MLHybrid mlModelMatcher = new MLHybrid(model.getCrf(), "prge");
        
 // Create Writer
 Writer writer = new A1Writer();
        
 // Set document stream
 InputStream documentStream = new FileInputStream(documentFile);

 // Run pipeline to get annotations
 Pipeline pipeline = new DefaultPipeline()
        .add(reader)
        .add(nlp)
        .add(dictionaryMatcher1)
        .add(dictionaryMatcher2)
        .add(mlModelMatcher)
        .add(writer);
OutputStream outputStream = pipeline.run(documentStream).get(0);

// Write annotations to output file
FileUtils.writeStringToFile(new File(outputFile), outputStream.toString());
        
// Close streams
documentStream.close();
outputStream.close();
        
// Close parser
parser.close();
Clone this wiki locally