-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSvmZoneClassifier.cs
54 lines (45 loc) · 2.08 KB
/
SvmZoneClassifier.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
using Accord.IO;
using Accord.MachineLearning.VectorMachines;
using Accord.Statistics.Kernels;
using System;
using System.IO;
using System.Linq;
using UglyToad.PdfPig;
using UglyToad.PdfPig.DocumentLayoutAnalysis.PageSegmenter;
namespace PdfPigSvmRegionClassifier
{
class SvmZoneClassifier
{
public static void Evaluate(string modelFolder, string dataFolder)
{
var svm = Serializer.Load<MulticlassSupportVectorMachine<Gaussian>>(Path.Combine(modelFolder, "model.gz"), SerializerCompression.GZip);
Trainer.Evaluate(svm, dataFolder);
}
public static void TestClassification(string trainingFolder, string pdfPath)
{
var svm = Serializer.Load<MulticlassSupportVectorMachine<Gaussian>>(Path.Combine(trainingFolder, "model.gz"), SerializerCompression.GZip);
using (var document = PdfDocument.Open(pdfPath))
{
for (var i = 0; i < document.NumberOfPages; i++)
{
var page = document.GetPage(i + 1);
var words = page.GetWords();
if (words.Count() == 0) continue;
var blocks = DocstrumBoundingBoxes.Instance.GetBlocks(words);
foreach (var block in blocks)
{
var letters = block.TextLines.SelectMany(li => li.Words).SelectMany(w => w.Letters);
var paths = FeatureHelper.GetPathsInside(block.BoundingBox, page.ExperimentalAccess.Paths);
var images = FeatureHelper.GetImagesInside(block.BoundingBox, page.GetImages());
var features = FeatureHelper.GetFeatures(page, block.BoundingBox, letters, paths, images);
var category = svm.Decide(features);
Console.WriteLine(FeatureHelper.Categories[category]);
Console.WriteLine(block.Text);
Console.WriteLine();
}
Console.ReadKey();
}
}
}
}
}