Skip to content

Commit e08079d

Browse files
authored
Merge pull request #10 from AndMu/feature/csvFileSupport
Feature/csv file support
2 parents c6bc149 + f20cb18 commit e08079d

File tree

17 files changed

+234
-73
lines changed

17 files changed

+234
-73
lines changed

README.md

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ Due to normalisation and standardisation, feature weights discovered in the trai
2828

2929
### To test using lexicon based model
3030
```
31-
Wikiled.Sentiment.ConsoleApp.exe test -Out=[OutPut] -Input=[Folder/File]
32-
Wikiled.Sentiment.ConsoleApp.exe test -Out=[OutPut] -Positive=[Folder/File] -Negative=[Folder/File]
31+
Wikiled.Sentiment.ConsoleApp.exe test -All=[Folder/File] -Out=[OutPut]
32+
Wikiled.Sentiment.ConsoleApp.exe test -Positive=[Folder/File] -Negative=[Folder/File] -Out=[OutPut]
3333
```
3434

3535
### To override lexicon
3636
```
37-
Wikiled.Sentiment.ConsoleApp.exe test -Out=[OutPut] -Input=[Folder/File] -Weights[WeightsFile] -FullWeightReset
37+
Wikiled.Sentiment.ConsoleApp.exe test -All=[Folder/File] -Out=[OutPut] -Weights[WeightsFile] -FullWeightReset
3838
```
3939

4040
### To train with non default lexicon
@@ -44,7 +44,7 @@ Wikiled.Sentiment.ConsoleApp.exe train -Positive=[Folder/File] -Negative=[Folder
4444

4545
### To Test with trained model
4646
```
47-
Wikiled.Sentiment.ConsoleApp.exe test -Out=[OutPut] -Input=[Folder/File] -Model=[Path to Trained Model]
47+
Wikiled.Sentiment.ConsoleApp.exe test -Out=[OutPut] -All=[Folder/File] -Model=[Path to Trained Model]
4848
```
4949

5050
## Docker service
@@ -53,24 +53,25 @@ An application is also available as a standalone docker based REST service, avai
5353

5454
## Linux support
5555

56-
[Supported OS](https://github.com/dotnet/core/blob/master/release-notes/2.0/2.0-supported-os.md)
56+
[Supported OS](https://github.com/dotnet/core/blob/master/release-notes/3.1/3.1-supported-os.md)
5757

5858
* Install [dotnet core](https://www.microsoft.com/net/download/)
5959
* Retrieve GIT repository source
6060
* dotnet build src/Utilities/Wikiled.Sentiment.ConsoleApp --configuration Release
61-
* dotnet src/Utilities/Wikiled.Sentiment.ConsoleApp/bit/Release/netcoreapp2.0/Wikiled.Sentiment.ConsoleApp.dll test -Input=[path to files] -out=Result -ExtractStyle]
61+
* dotnet src/Utilities/Wikiled.Sentiment.ConsoleApp/bit/Release/netcoreapp3.1/Wikiled.Sentiment.ConsoleApp.dll test -All=[path to files] -out=Result -ExtractStyle]
6262

6363
## C# Library
6464

6565

6666
### Training model
6767

6868
```
69-
var factory = MainContainerFactory.Setup()
70-
.Config()
71-
.Splitter();
72-
73-
container = factory.Create().StartSession();
69+
container = MainContainerFactory
70+
.Setup(service)
71+
.SetupLocalCache()
72+
.Config(item => item.SetConfiguration("resources", Path.Combine(TestContext.CurrentContext.TestDirectory, ConfigurationManager.AppSettings["resources"])))
73+
.Splitter()
74+
.Create();
7475
ITrainingClient client = container.GetTraining(Model);
7576
await client.Train(reviews).ConfigureAwait(false);
7677

src/Sentiment/Wikiled.Sentiment.AcceptanceTests/Wikiled.Sentiment.AcceptanceTests.csproj

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,29 @@
1010

1111
<ItemGroup>
1212
<PackageReference Include="Accord.MachineLearning" Version="3.8.0" />
13-
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="3.0.0" />
13+
<PackageReference Include="Microsoft.Extensions.DependencyInjection" Version="3.1.0" />
1414
<PackageReference Include="Moq" Version="4.13.1" />
1515
<PackageReference Include="Newtonsoft.Json" version="12.0.3" />
1616
<PackageReference Include="NLog" version="4.6.8" />
1717
<PackageReference Include="NLog.Extensions.Logging" Version="1.6.1" />
1818
<PackageReference Include="NUnit" version="3.12.0" />
19-
<PackageReference Include="Polly" version="7.1.1" />
19+
<PackageReference Include="Polly" version="7.2.0" />
2020
<PackageReference Include="Snappy.NET" version="1.1.1.8" />
2121
<PackageReference Include="StackExchange.Redis" Version="2.0.601" />
22-
<PackageReference Include="System.Reactive" version="4.2.0" />
23-
<PackageReference Include="Wikiled.Amazon" version="1.1.4" />
22+
<PackageReference Include="System.Reactive" version="4.3.2" />
23+
<PackageReference Include="Wikiled.Amazon" version="1.1.7" />
2424
<PackageReference Include="Wikiled.Arff" version="2.2.24" />
25-
<PackageReference Include="Wikiled.Common" Version="1.1.9" />
25+
<PackageReference Include="Wikiled.Common" Version="1.1.10" />
2626
<PackageReference Include="Wikiled.MachineLearning" version="2.4.25" />
27-
<PackageReference Include="Wikiled.Redis" version="3.5.61" />
27+
<PackageReference Include="Wikiled.Redis" version="3.6.8" />
2828
<PackageReference Include="Wikiled.Text.Analysis" version="1.7.42" />
2929
<packagereference Include="Microsoft.NET.Test.Sdk" Version="16.4.0"></packagereference>
30-
<packagereference Include="NUnit3TestAdapter" Version="3.15.1" />
31-
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="3.0.0" />
32-
<PackageReference Include="Microsoft.Extensions.Logging.Debug" Version="3.0.0" />
30+
<packagereference Include="NUnit3TestAdapter" Version="3.16.0">
31+
<PrivateAssets>all</PrivateAssets>
32+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
33+
</packagereference>
34+
<PackageReference Include="Microsoft.Extensions.Logging.Console" Version="3.1.0" />
35+
<PackageReference Include="Microsoft.Extensions.Logging.Debug" Version="3.1.0" />
3336
</ItemGroup>
3437

3538

src/Sentiment/Wikiled.Sentiment.Analysis.Tests/Wikiled.Sentiment.Analysis.Tests.csproj

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,24 @@
99
</PropertyGroup>
1010

1111
<ItemGroup>
12-
<PackageReference Include="Microsoft.Reactive.Testing" version="4.2.0" />
12+
<PackageReference Include="Microsoft.Reactive.Testing" version="4.3.2" />
1313
<PackageReference Include="Moq" Version="4.13.1" />
1414
<PackageReference Include="Newtonsoft.Json" version="12.0.3" />
1515
<PackageReference Include="NLog" version="4.6.8" />
1616
<PackageReference Include="NUnit" version="3.12.0" />
17-
<PackageReference Include="Polly" version="7.1.1" />
17+
<PackageReference Include="Polly" version="7.2.0" />
1818
<PackageReference Include="StackExchange.Redis" Version="2.0.601" />
19-
<PackageReference Include="System.Reactive" version="4.2.0" />
19+
<PackageReference Include="System.Reactive" version="4.3.2" />
2020
<PackageReference Include="Wikiled.Arff" version="2.2.24" />
21-
<PackageReference Include="Wikiled.Common" Version="1.1.9" />
21+
<PackageReference Include="Wikiled.Common" Version="1.1.10" />
2222
<PackageReference Include="Wikiled.MachineLearning" version="2.4.25" />
23-
<PackageReference Include="Wikiled.Redis" version="3.5.61" />
23+
<PackageReference Include="Wikiled.Redis" version="3.6.8" />
2424
<PackageReference Include="Wikiled.Text.Analysis" version="1.7.42" />
2525
<packagereference Include="Microsoft.NET.Test.Sdk" Version="16.4.0"></packagereference>
26-
<packagereference Include="NUnit3TestAdapter" Version="3.15.1" />
26+
<packagereference Include="NUnit3TestAdapter" Version="3.16.0">
27+
<PrivateAssets>all</PrivateAssets>
28+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
29+
</packagereference>
2730
</ItemGroup>
2831

2932
<ItemGroup>
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.IO;
4+
using System.Linq;
5+
using System.Reactive.Linq;
6+
using System.Threading.Tasks;
7+
using CsvHelper;
8+
using Microsoft.Extensions.Logging;
9+
using Wikiled.Common.Serialization;
10+
using Wikiled.Sentiment.Text.Data.Review;
11+
12+
namespace Wikiled.Sentiment.Analysis.Processing.Persistency
13+
{
14+
public class CsvDataSource : IDataSource
15+
{
16+
private readonly ILogger<CsvDataSource> logger;
17+
18+
private readonly string path;
19+
20+
public CsvDataSource(ILogger<CsvDataSource> logger, string path)
21+
{
22+
this.logger = logger;
23+
this.path = path;
24+
}
25+
26+
public IObservable<DataPair> Load()
27+
{
28+
return GetReviews().ToObservable();
29+
}
30+
31+
public IEnumerable<DataPair> GetReviews()
32+
{
33+
logger.LogInformation("Reading: {0}", path);
34+
if (string.IsNullOrEmpty(path))
35+
{
36+
logger.LogWarning("One of paths is empty");
37+
return new DataPair[] { };
38+
}
39+
40+
return GetReview();
41+
}
42+
43+
44+
private IEnumerable<DataPair> GetReview()
45+
{
46+
logger.LogInformation("Loading {0}", path);
47+
var counter = 0;
48+
using (var streamRead = new StreamReader(path))
49+
using (var csvData = new CsvReader(streamRead))
50+
{
51+
csvData.Read();
52+
csvData.ReadHeader();
53+
csvData.Configuration.MissingFieldFound = null;
54+
var headerTable = csvData.Context.HeaderRecord.ToLookup(item => item, StringComparer.OrdinalIgnoreCase);
55+
while (csvData.Read())
56+
{
57+
counter++;
58+
var id = counter.ToString();
59+
double? stars = null;
60+
SentimentClass? sentimentClass = null;
61+
if (headerTable.Contains("id"))
62+
{
63+
id = csvData.GetField(headerTable["id"].First());
64+
}
65+
66+
if (headerTable.Contains("sentiment"))
67+
{
68+
sentimentClass = csvData.GetField<SentimentClass?>(headerTable["sentiment"].First());
69+
}
70+
71+
string author = null;
72+
if (headerTable.Contains("userid"))
73+
{
74+
author = csvData.GetField(headerTable["userid"].First());
75+
}
76+
77+
if (headerTable.Contains("author"))
78+
{
79+
author = csvData.GetField(headerTable["author"].First());
80+
}
81+
82+
if (headerTable.Contains("stars"))
83+
{
84+
stars = csvData.GetField<double?>(headerTable["stars"].First());
85+
}
86+
87+
if (headerTable.Contains("text"))
88+
{
89+
var text = csvData.GetField(headerTable["text"].First());
90+
var item = new SingleProcessingData(text.SanitizeXmlString());
91+
item.Id = id;
92+
item.Author = author;
93+
item.Stars = stars;
94+
95+
if (stars != null)
96+
{
97+
sentimentClass = stars > 3 ? SentimentClass.Positive : SentimentClass.Negative;
98+
}
99+
100+
yield return new DataPair(sentimentClass, Task.FromResult(item));
101+
}
102+
}
103+
}
104+
}
105+
}
106+
}

src/Sentiment/Wikiled.Sentiment.Analysis/Processing/Persistency/DataLoader.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ public IDataSource Load(IDataSourceConfig source)
3535
return new XmlDataLoader(loggerFactory.CreateLogger<XmlDataLoader>()).LoadOldXml(source.All);
3636
}
3737

38+
if (source.All.EndsWith(".csv", StringComparison.OrdinalIgnoreCase))
39+
{
40+
return new CsvDataSource(loggerFactory.CreateLogger<CsvDataSource>(), source.All);
41+
}
42+
3843
logger.LogInformation("Loading {0} as JSON", source.All);
3944
var data = new JsonDataSource(loggerFactory.CreateLogger<JsonDataSource>(), source.All);
4045
return data;

src/Sentiment/Wikiled.Sentiment.Analysis/Wikiled.Sentiment.Analysis.csproj

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,17 @@
2222
<ItemGroup>
2323
<PackageReference Include="Knuppe.SharpNL" Version="1.3.0" />
2424
<PackageReference Include="Accord.MachineLearning" Version="3.8.0" />
25-
<PackageReference Include="morelinq" Version="3.2.0" />
26-
<PackageReference Include="CsvHelper" version="12.2.1" />
25+
<PackageReference Include="morelinq" Version="3.3.2" />
26+
<PackageReference Include="CsvHelper" version="12.2.3" />
2727
<PackageReference Include="Newtonsoft.Json" version="12.0.3" />
28-
<PackageReference Include="Polly" version="7.1.1" />
28+
<PackageReference Include="Polly" version="7.2.0" />
2929
<PackageReference Include="StackExchange.Redis" Version="2.0.601" />
30-
<PackageReference Include="System.Reactive" version="4.2.0" />
30+
<PackageReference Include="System.Reactive" version="4.3.2" />
3131
<PackageReference Include="System.Threading.Tasks.Extensions" Version="4.5.3" />
3232
<PackageReference Include="Wikiled.Arff" version="2.2.24" />
33-
<PackageReference Include="Wikiled.Common" Version="1.1.9" />
33+
<PackageReference Include="Wikiled.Common" Version="1.1.10" />
3434
<PackageReference Include="Wikiled.MachineLearning" version="2.4.25" />
35-
<PackageReference Include="Wikiled.Redis" version="3.5.61" />
35+
<PackageReference Include="Wikiled.Redis" version="3.6.8" />
3636
<PackageReference Include="Wikiled.Text.Analysis" version="1.7.42" />
3737
<PackageReference Include="Wikiled.Text.Inquirer" version="1.2.25" />
3838
</ItemGroup>

src/Sentiment/Wikiled.Sentiment.Integration.Tests/Parser/RedisDocumentCacheTests.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using NUnit.Framework;
99
using Wikiled.Common.Logging;
1010
using Wikiled.Common.Utilities.Modules;
11+
using Wikiled.Common.Utilities.Serialization;
1112
using Wikiled.Redis.Config;
1213
using Wikiled.Redis.Logic;
1314
using Wikiled.Sentiment.TestLogic.Shared.Helpers;
@@ -39,8 +40,9 @@ public void Setup()
3940
redis = new RedisInside.Redis(i => i.Port(6666).LogTo(item => log.LogDebug(item)));
4041
IServiceCollection service = new ServiceCollection();
4142
service.RegisterModule(new RedisServerModule(new RedisConfiguration("localhost", 6666) { ServiceName = "Test" }));
42-
link = service.BuildServiceProvider().GetService<IRedisLink>();
43-
instance = new RedisDocumentCache(new NullLogger<RedisDocumentCache>(), POSTaggerType.Simple, link, local);
43+
var provider = service.BuildServiceProvider();
44+
link = provider.GetService<IRedisLink>();
45+
instance = new RedisDocumentCache(new NullLogger<RedisDocumentCache>(), POSTaggerType.Simple, link, local, provider.GetService<IJsonSerializer>());
4446
}
4547

4648
[TearDown]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
using System.IO;
2+
using Microsoft.Extensions.Logging.Abstractions;
3+
using NUnit.Framework;
4+
using System.Linq;
5+
using Wikiled.Sentiment.Analysis.Processing.Persistency;
6+
7+
namespace Wikiled.Sentiment.Integration.Tests.Processing.Persistency
8+
{
9+
[TestFixture]
10+
public class CsvDataSourceTests
11+
{
12+
[Test]
13+
public void GetReviews()
14+
{
15+
var source = new CsvDataSource(new NullLogger<CsvDataSource>(), Path.Combine(TestContext.CurrentContext.TestDirectory, @"Processing\Persistency\data.csv"));
16+
var reviews = source.GetReviews().ToArray();
17+
Assert.AreEqual(4, reviews.Length);
18+
Assert.AreEqual("561697961110167552", reviews[0].Data.Result.Id);
19+
Assert.AreEqual(1, reviews[0].Data.Result.Stars);
20+
Assert.AreEqual("putting apple's record-breaking quarter into context: $aapl URL_URL via @forbestech", reviews[0].Data.Result.Text);
21+
}
22+
}
23+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Id,Time,UserId,Company,Text,Stars
2+
561697961110167552,01/02/2015 01:30:04,2265020497,AAPL,putting apple's record-breaking quarter into context: $aapl URL_URL via @forbestech,1
3+
561731412915929088,01/02/2015 03:42:59,84165902,AAPL,"""@wsj: apple plans to start shipping apple watch in april: URL_URL $aapl URL_URL"""
4+
561940570667769856,01/02/2015 17:34:07,82142636,NKE,"nike has tested the $92 level 3 times this year, looks vulnerable to break now. $nke URL_URL"
5+
561982725846859776,01/02/2015 20:21:37,82142636,GOOG,"""@howardlindzon: me too (long) @sexonthebeach: $goog i think google goes back to 600."""

src/Sentiment/Wikiled.Sentiment.Integration.Tests/Wikiled.Sentiment.Integration.Tests.csproj

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,18 @@
1414
<PackageReference Include="Newtonsoft.Json" version="12.0.3" />
1515
<PackageReference Include="NLog" version="4.6.8" />
1616
<PackageReference Include="NUnit" version="3.12.0" />
17-
<PackageReference Include="Polly" version="7.1.1" />
17+
<PackageReference Include="Polly" version="7.2.0" />
1818
<PackageReference Include="StackExchange.Redis" Version="2.0.601" />
19-
<PackageReference Include="System.Reactive" version="4.2.0" />
19+
<PackageReference Include="System.Reactive" version="4.3.2" />
2020
<PackageReference Include="Wikiled.Arff" version="2.2.24" />
21-
<PackageReference Include="Wikiled.Redis" version="3.5.61" />
22-
<PackageReference Include="Wikiled.RedisInside" Version="3.2.101.34" />
21+
<PackageReference Include="Wikiled.Redis" version="3.6.8" />
22+
<PackageReference Include="Wikiled.RedisInside" Version="3.2.101.43" />
2323
<PackageReference Include="Wikiled.Text.Analysis" version="1.7.42" />
2424
<packagereference Include="Microsoft.NET.Test.Sdk" Version="16.4.0"></packagereference>
25-
<packagereference Include="NUnit3TestAdapter" Version="3.15.1" />
25+
<packagereference Include="NUnit3TestAdapter" Version="3.16.0">
26+
<PrivateAssets>all</PrivateAssets>
27+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
28+
</packagereference>
2629
</ItemGroup>
2730

2831
<ItemGroup>
@@ -35,6 +38,9 @@
3538
<None Update="NLog.config">
3639
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
3740
</None>
41+
<None Update="Processing\Persistency\data.csv">
42+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
43+
</None>
3844
</ItemGroup>
3945

4046

0 commit comments

Comments
 (0)