Skip to content

Commit cc8ad7a

Browse files
authored
Merge pull request #2 from Oceania2018/master
merge newestcode
2 parents 042cb91 + a8c5242 commit cc8ad7a

24 files changed

+576
-491
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace BotSharp.Algorithm.Bayes
6+
{
7+
public class BernoulliNaiveBayes
8+
{
9+
}
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Text;
4+
5+
namespace BotSharp.Algorithm.Bayes
6+
{
7+
public class GaussianNaiveBayes
8+
{
9+
}
10+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* BotSharp.Algorithm
3+
* Copyright (C) 2018 Haiping Chen
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
*/
18+
19+
using BotSharp.Algorithm.Estimators;
20+
using BotSharp.Algorithm.Features;
21+
using BotSharp.Algorithm.Statistics;
22+
using System;
23+
using System.Collections.Generic;
24+
using System.Linq;
25+
using System.Text;
26+
27+
namespace BotSharp.Algorithm.Bayes
28+
{
29+
/// <summary>
30+
/// https://en.wikipedia.org/wiki/Bayes%27_theorem
31+
/// </summary>
32+
public class MultinomiaNaiveBayes
33+
{
34+
public List<Probability> LabelDist { get; set; }
35+
36+
public List<Tuple<string, double[]>> FeatureSet { get; set; }
37+
38+
private double alpha { get; set; }
39+
40+
public MultinomiaNaiveBayes(double alpha = 0.5)
41+
{
42+
this.alpha = alpha;
43+
}
44+
45+
/// <summary>
46+
/// prior probability
47+
/// </summary>
48+
/// <param name="Y"></param>
49+
/// <returns></returns>
50+
public double CalPriorProb(string Y)
51+
{
52+
int N = FeatureSet.Count;
53+
int k = LabelDist.Count;
54+
int Nyk = LabelDist.First(x => x.Value == Y).Freq;
55+
56+
return (Nyk + alpha) / (N + k * alpha);
57+
}
58+
59+
public double CalCondProb(int x, string Y, double feature)
60+
{
61+
// posterior probability P(X1,...,Xn|Y) = Sum(P(X1|Y) +...+ P(Xn|Y)
62+
var featuresIfY = FeatureSet.Where(fd => fd.Item1 == Y).ToList();
63+
var matrix = ConstructMatrix(featuresIfY);
64+
65+
int freq = 0;
66+
for (int y = 0; y < featuresIfY.Count; y++)
67+
{
68+
if (matrix[y, x] == feature)
69+
{
70+
freq++;
71+
}
72+
}
73+
74+
int Nyk = featuresIfY.Count;
75+
int n = featuresIfY.Count;
76+
int Nykx = freq;
77+
78+
return Math.Log((Nykx + alpha) / (Nyk + n * alpha));
79+
}
80+
81+
/// <summary>
82+
/// calculate posterior probability P(Y|X)
83+
/// X is feature set, Y is label
84+
/// P(X1,...,Xn|Y) = Sum(P(X1|Y) +...+ P(Xn|Y)
85+
/// P(X, Y) = P(Y|X)P(X) = P(X|Y)P(Y) => P(Y|X) = P(Y)P(X|Y)/P(X)
86+
/// </summary>
87+
public double CalPosteriorProb(string Y, double[] features, double priorProb, Dictionary<string, double> condProbDictionary)
88+
{
89+
int featureCount = features.Length;
90+
91+
double postProb = priorProb;
92+
93+
// loop features
94+
for (int x = 0; x < featureCount; x++)
95+
{
96+
string key = $"{Y} f{x} {features[x]}";
97+
postProb += condProbDictionary[key];
98+
}
99+
100+
return Math.Pow(2, postProb);
101+
}
102+
103+
private double[,] ConstructMatrix(List<Tuple<string, double[]>> featuresIfY)
104+
{
105+
var featureCount = featuresIfY[0].Item2.Length;
106+
107+
double[,] matrix = new double[featuresIfY.Count, featureCount];
108+
for (int y = 0; y < featuresIfY.Count; y++)
109+
{
110+
for (int x = 0; x < featureCount; x++)
111+
{
112+
matrix[y, x] = featuresIfY[y].Item2[x];
113+
}
114+
}
115+
116+
return matrix;
117+
}
118+
}
119+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
using BotSharp.Algorithm.Statistics;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Text;
5+
6+
namespace BotSharp.Algorithm.Bayes
7+
{
8+
public class MultinomiaNaiveBayesModel
9+
{
10+
public List<Probability> LabelDist { get; set; }
11+
12+
public Dictionary<string, double> CondProbDictionary { get; set; }
13+
14+
public List<String> Values { get; set; }
15+
}
16+
}

BotSharp.Algorithm/Bayes/NaiveBayes.cs

Lines changed: 0 additions & 65 deletions
This file was deleted.

BotSharp.Algorithm/Estimators/Lidstone.cs renamed to BotSharp.Algorithm/Estimators/AdditiveSmoothing.cs

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,12 @@ namespace BotSharp.Algorithm.Estimators
3131
/// https://en.wikipedia.org/wiki/Additive_smoothing
3232
/// Used as Multinomial Naive Bayes
3333
/// </summary>
34-
public class Lidstone : IEstimator
34+
public class AdditiveSmoothing : IEstimator
3535
{
3636
/// <summary>
37-
/// α > 0 is the smoothing parameter
37+
/// 1 > α > 0 is the smoothing parameter is Lidstone
38+
/// α = 1 is Laplace
39+
/// α = 0 no smoothing
3840
/// </summary>
3941
public double Alpha { get; set; }
4042

@@ -62,5 +64,24 @@ public double Prob(List<Probability> dist, string sample)
6264

6365
return (x + Alpha) / (_N + Alpha * _d);
6466
}
67+
68+
public double Prob(List<Tuple<string, double>> dist, string sample)
69+
{
70+
if (Alpha == 0)
71+
{
72+
Alpha = 0.5D;
73+
}
74+
75+
// observation x = (x1, ..., xd)
76+
var p = dist.Find(f => f.Item1 == sample);
77+
double x = p == null ? 0D : p.Item2;
78+
79+
// N trials
80+
double _N = dist.Sum(f => f.Item2);
81+
82+
int _d = dist.Count;
83+
84+
return (x + Alpha) / (_N + Alpha * _d);
85+
}
6586
}
6687
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
using BotSharp.Core.Abstractions;
2+
using BotSharp.Core.Agents;
3+
using BotSharp.NLP;
4+
using BotSharp.NLP.Classify;
5+
using BotSharp.NLP.Txt2Vec;
6+
using Microsoft.Extensions.Configuration;
7+
using Newtonsoft.Json.Linq;
8+
using System;
9+
using System.Collections.Generic;
10+
using System.IO;
11+
using System.Linq;
12+
using System.Text;
13+
using System.Threading.Tasks;
14+
15+
namespace BotSharp.Core.Engines.BotSharp
16+
{
17+
public class BotSharpNBayesClassifier : INlpTrain, INlpPredict
18+
{
19+
public IConfiguration Configuration { get; set; }
20+
public PipeSettings Settings { get; set; }
21+
22+
public async Task<bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
23+
{
24+
meta.Model = "classification-nb.model";
25+
string modelFileName = Path.Combine(Settings.ModelDir, meta.Model);
26+
27+
var options = new ClassifyOptions
28+
{
29+
ModelFilePath = modelFileName
30+
};
31+
var classifier = new ClassifierFactory<NaiveBayesClassifier, SentenceFeatureExtractor>(options, SupportedLanguage.English);
32+
33+
var sentences = doc.Sentences.Select(x => new Sentence
34+
{
35+
Label = x.Intent.Label,
36+
Text = x.Text,
37+
Words = x.Tokens
38+
}).ToList();
39+
40+
classifier.Train(sentences);
41+
42+
Console.WriteLine($"Saved model to {modelFileName}");
43+
meta.Meta = new JObject();
44+
meta.Meta["compiled at"] = "Sep 12, 2018";
45+
46+
return true;
47+
}
48+
49+
public async Task<bool> Predict(Agent agent, NlpDoc doc, PipeModel meta)
50+
{
51+
var options = new ClassifyOptions
52+
{
53+
ModelFilePath = Path.Combine(Settings.ModelDir, meta.Model)
54+
};
55+
var classifier = new ClassifierFactory<NaiveBayesClassifier, SentenceFeatureExtractor>(options, SupportedLanguage.English);
56+
57+
var sentence = doc.Sentences.Select(s => new Sentence
58+
{
59+
Text = s.Text,
60+
Words = s.Tokens
61+
}).First();
62+
63+
64+
var result = classifier.Classify(sentence);
65+
66+
doc.Sentences[0].Intent = new TextClassificationResult
67+
{
68+
Classifier = "BotSharpNBayesClassifier",
69+
Label = result.First().Item1,
70+
Confidence = (decimal)result.First().Item2
71+
};
72+
73+
return true;
74+
}
75+
}
76+
}

BotSharp.Core/Engines/BotSharp/BotSharpSVMClassifier.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public async Task<bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
129129
ClassifyOptions classifyOptions = new ClassifyOptions();
130130
classifyOptions.ModelFilePath = Path.Combine(Settings.ModelDir, "svm_classifier_model");
131131
classifyOptions.TransformFilePath = Path.Combine(Settings.ModelDir, "transform_obj_data");
132-
svmClassifier.Train(featureSetList, classifyOptions);
132+
// svmClassifier.Train(featureSetList, classifyOptions);
133133

134134
meta.Meta = new JObject();
135135
meta.Meta["compiled at"] = "Aug 31, 2018";

BotSharp.Core/Engines/BotSharp/BotSharpTokenizer.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using BotSharp.Core.Abstractions;
22
using BotSharp.Core.Agents;
3+
using BotSharp.Core.Intents;
34
using BotSharp.NLP;
45
using BotSharp.NLP.Tokenize;
56
using Microsoft.Extensions.Configuration;
@@ -14,14 +15,12 @@ public class BotSharpTokenizer : INlpTrain, INlpPredict
1415
{
1516
public IConfiguration Configuration { get; set; }
1617
public PipeSettings Settings { get; set; }
17-
private TokenizerFactory<RegexTokenizer> _tokenizer;
18+
private TokenizerFactory<TreebankTokenizer> _tokenizer;
1819

1920
public BotSharpTokenizer()
2021
{
21-
_tokenizer = new TokenizerFactory<RegexTokenizer>(new TokenizationOptions
22+
_tokenizer = new TokenizerFactory<TreebankTokenizer>(new TokenizationOptions
2223
{
23-
Pattern = RegexTokenizer.WORD_PUNC,
24-
SpecialWords = new List<string> { "'s" }
2524
}, SupportedLanguage.English);
2625
}
2726

@@ -48,7 +47,8 @@ public async Task<bool> Train(Agent agent, NlpDoc doc, PipeModel meta)
4847
doc.Sentences.Add(new NlpDocSentence
4948
{
5049
Tokens = _tokenizer.Tokenize(say.Text),
51-
Text = say.Text
50+
Text = say.Text,
51+
Intent = new TextClassificationResult { Label = say.Intent }
5252
});
5353
});
5454

0 commit comments

Comments
 (0)