Skip to content

Commit e17955d

Browse files
committed
解耦了下载器、数据管道、解析器
1 parent 0d27df1 commit e17955d

File tree

232 files changed

+1782
-2578
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

232 files changed

+1782
-2578
lines changed

Design.zh-CN.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# DESIGN
2+
3+
在此重大更新之前, 下载器 & 选择器 & 数据管理是耦合在一起的, 经过许久思考我决定把它们解耦出来, 这样用户可以自由的选择他们喜爱的组件. 比如说, 下载器使用框架自带的Downloader、WebClientApi、苏菲的HttpHelper等; 解析器可以使用框架自带的Extraction、AngleSharp等;
4+
5+
### Dowloader
6+
7+
Downloader is a independent module to help user to download data from target website. There are a lot of details, see below:
8+
9+
1. Two ways to set cookie, one is call the AddCookie method in downloader, it add cookie to CookieContainer so impact every request.
10+
Set cookie header in request, the result is combine you cookie header and cookies in CookieContainer.
11+
2. CookieInjector in downloader is invoked one time, and inject cookies to CookieContainer.
12+
13+
### Scheduler
14+
15+
#### Request hash
16+
17+
1. Same url different headers are different requests, so headers are a factor
18+
2. There is a CycleRetryTimes property in a request, if value are different, then requests are different. Depth property is not
19+
a factor.
20+
21+

DotnetSpider.sln

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ Microsoft Visual Studio Solution File, Format Version 12.00
33
# Visual Studio 15
44
VisualStudioVersion = 15.0.27703.2042
55
MinimumVisualStudioVersion = 10.0.40219.1
6-
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Common", "src\DotnetSpider.Common\DotnetSpider.Common.csproj", "{F1C6C272-A72A-4A5B-95EE-846643A29A3A}"
7-
EndProject
86
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Extraction", "src\DotnetSpider.Extraction\DotnetSpider.Extraction.csproj", "{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}"
97
EndProject
108
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.HtmlAgilityPack.Css", "src\DotnetSpider.HtmlAgilityPack.Css\DotnetSpider.HtmlAgilityPack.Css.csproj", "{38DFF949-761C-4DC1-ADC6-D3F535E84AEF}"
@@ -36,11 +34,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
3634
.editorconfig = .editorconfig
3735
.gitignore = .gitignore
3836
.travis.yml = .travis.yml
37+
Design.md = Design.md
38+
Design.zh-CN.md = Design.zh-CN.md
3939
DistributeDesign.md = DistributeDesign.md
4040
publishToNuget.bat = publishToNuget.bat
4141
README.md = README.md
4242
runtests.sh = runtests.sh
43-
Design.md = Design.md
4443
EndProjectSection
4544
EndProject
4645
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Worker", "src\DotnetSpider.Worker\DotnetSpider.Worker.csproj", "{C416B779-5018-42AF-A1A5-98186389CCED}"
@@ -49,18 +48,14 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Migrator", "sr
4948
EndProject
5049
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Broker.Test", "src\DotnetSpider.Broker.Test\DotnetSpider.Broker.Test.csproj", "{6CAEECB0-0BD0-4A32-B057-99C7DADE3F4C}"
5150
EndProject
52-
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DotnetSpider.Broker", "src\DotnetSpider.Broker\DotnetSpider.Broker.csproj", "{AAD552D8-0D0A-43B0-9C5D-E542AA8998CE}"
51+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotnetSpider.Broker", "src\DotnetSpider.Broker\DotnetSpider.Broker.csproj", "{AAD552D8-0D0A-43B0-9C5D-E542AA8998CE}"
5352
EndProject
5453
Global
5554
GlobalSection(SolutionConfigurationPlatforms) = preSolution
5655
Debug|Any CPU = Debug|Any CPU
5756
Release|Any CPU = Release|Any CPU
5857
EndGlobalSection
5958
GlobalSection(ProjectConfigurationPlatforms) = postSolution
60-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
61-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Debug|Any CPU.Build.0 = Debug|Any CPU
62-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Release|Any CPU.ActiveCfg = Release|Any CPU
63-
{F1C6C272-A72A-4A5B-95EE-846643A29A3A}.Release|Any CPU.Build.0 = Release|Any CPU
6459
{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
6560
{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}.Debug|Any CPU.Build.0 = Debug|Any CPU
6661
{C5A68E4D-E9B4-4B2D-B198-74FA88C8CA22}.Release|Any CPU.ActiveCfg = Release|Any CPU

src/DotnetSpider.Broker.Test/BaseTest.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
11
using DotnetSpider.Broker.Data;
22
using DotnetSpider.Broker.Hubs;
3-
using Microsoft.AspNetCore.Builder.Internal;
43
using Microsoft.EntityFrameworkCore;
54
using Microsoft.Extensions.Configuration;
65
using Microsoft.Extensions.DependencyInjection;
76
using System;
8-
using System.Collections.Generic;
9-
using System.Data;
10-
using System.Data.SqlClient;
117
using System.IO;
12-
using System.Text;
138
using DotnetSpider.Broker.Services;
149

1510
namespace DotnetSpider.Broker.Test

src/DotnetSpider.Broker.Test/WorkerServiceTest.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
using DotnetSpider.Broker.Data;
2-
using DotnetSpider.Broker.Hubs;
32
using DotnetSpider.Broker.Services;
43
using Microsoft.Extensions.DependencyInjection;
54
using System;
6-
using System.Collections.Generic;
7-
using System.Text;
85
using Xunit;
96

107
namespace DotnetSpider.Broker.Test

src/DotnetSpider.Broker/ApiAuthorizeMiddleware.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
using Microsoft.AspNetCore.Http;
2-
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
52
using System.Net;
63
using System.Threading.Tasks;
74

src/DotnetSpider.Broker/BrokerOptions.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Threading.Tasks;
1+
using System.Collections.Generic;
52

63
namespace DotnetSpider.Broker
74
{

src/DotnetSpider.Broker/Controllers/BrokerController.cs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
using Microsoft.AspNetCore.Mvc;
22
using Microsoft.Extensions.Logging;
3-
using System;
4-
using System.Collections.Generic;
5-
using System.Linq;
6-
using System.Threading.Tasks;
73

84
namespace DotnetSpider.Broker.Controllers
95
{

src/DotnetSpider.Broker/Controllers/HomeController.cs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Diagnostics;
4-
using System.Linq;
5-
using System.Threading.Tasks;
1+
using System.Diagnostics;
62
using Microsoft.AspNetCore.Mvc;
73
using DotnetSpider.Broker.Models;
84

src/DotnetSpider.Broker/Data/Block.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Threading.Tasks;
52

63
namespace DotnetSpider.Broker.Data
74
{

src/DotnetSpider.Broker/Data/BrokerDbContext.cs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Text;
4-
using Microsoft.AspNetCore.Identity.EntityFrameworkCore;
1+
using Microsoft.AspNetCore.Identity.EntityFrameworkCore;
52
using Microsoft.EntityFrameworkCore;
63

74
namespace DotnetSpider.Broker.Data

0 commit comments

Comments
 (0)