Skip to content

Commit e835044

Browse files
authored
Merge pull request #97 from JuliaText/as/towards07
Prepare for 1.0
2 parents 58057e9 + dbf5bed commit e835044

21 files changed

+143
-136
lines changed

.travis.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@ language: julia
22
os:
33
- linux
44
julia:
5-
- 0.6
5+
- 0.7
6+
- 1.0
67
notifications:
78
email: false
8-
script:
9-
- if [[ -a .git/shallow ]]; then git fetch --unshallow; fi
10-
- julia -e 'Pkg.clone(pwd()); Pkg.build("TextAnalysis"); Pkg.test("TextAnalysis"; coverage=true)';
119
after_success:
1210
- julia -e 'cd(Pkg.dir("TextAnalysis")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())';

REQUIRE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
julia 0.6
1+
julia 0.7
22
BinaryProvider
3-
Languages 0.2.0
3+
Languages 0.4.0
44
DataFrames
55
WordTokenizers
66
Flux

appveyor.yml

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
11
environment:
22
matrix:
3-
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe"
4-
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe"
5-
# - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe"
6-
# - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe"
3+
- julia_version: 0.7
4+
- julia_version: 1
5+
- julia_version: nightly
6+
7+
platform:
8+
- x86 # 32-bit
9+
- x64 # 64-bit
10+
11+
# # Uncomment the following lines to allow failures on nightly julia
12+
# # (tests will run but not make your overall status red)
13+
# matrix:
14+
allow_failures:
15+
- julia_version: nightly
716

817
branches:
918
only:
@@ -17,24 +26,18 @@ notifications:
1726
on_build_status_changed: false
1827

1928
install:
20-
- ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12"
21-
# If there's a newer build queued for the same PR, cancel this one
22-
- ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
23-
https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
24-
Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
25-
throw "There are newer queued builds for this pull request, failing early." }
26-
# Download most recent Julia Windows binary
27-
- ps: (new-object net.webclient).DownloadFile(
28-
$env:JULIA_URL,
29-
"C:\projects\julia-binary.exe")
30-
# Run installer silently, output to C:\projects\julia
31-
- C:\projects\julia-binary.exe /S /D=C:\projects\julia
29+
- ps: iex ((new-object net.webclient).DownloadString("https://raw.githubusercontent.com/JuliaCI/Appveyor.jl/version-1/bin/install.ps1"))
3230

3331
build_script:
34-
# Need to convert from shallow to complete for Pkg.clone to work
35-
- IF EXIST .git\shallow (git fetch --unshallow)
36-
- C:\projects\julia\bin\julia -e "versioninfo();
37-
Pkg.clone(pwd(), \"TextAnalysis\"); Pkg.build(\"TextAnalysis\")"
32+
- echo "%JL_BUILD_SCRIPT%"
33+
- C:\julia\bin\julia -e "%JL_BUILD_SCRIPT%"
3834

3935
test_script:
40-
- C:\projects\julia\bin\julia -e "Pkg.test(\"TextAnalysis\")"
36+
- echo "%JL_TEST_SCRIPT%"
37+
- C:\julia\bin\julia -e "%JL_TEST_SCRIPT%"
38+
39+
# # Uncomment to support code coverage upload. Should only be enabled for packages
40+
# # which would have coverage gaps without running on Windows
41+
# on_success:
42+
# - echo "%JL_CODECOV_SCRIPT%"
43+
# - C:\julia\bin\julia -e "%JL_CODECOV_SCRIPT%"

docs/push-gh-pages.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
last_commit=readchomp(`git --no-pager log -1 --pretty=format:"%h:%s"`)
66

7-
ENV["GIT_DIR"]=abspath(chomp(readstring(`git rev-parse --git-dir`)))
7+
ENV["GIT_DIR"]=abspath(chomp(read(`git rev-parse --git-dir`, String)))
88

9-
old_sha = chomp(readstring(`git rev-parse refs/remotes/origin/gh-pages`))
9+
old_sha = chomp(read(`git rev-parse refs/remotes/origin/gh-pages`, String))
1010

1111
#run(`julia make.jl`)
1212

@@ -16,13 +16,13 @@ cd("build") do
1616
ENV["GIT_INDEX_FILE"]=gif
1717
ENV["GIT_WORK_TREE"]=pwd()
1818
run(`git add -A`)
19-
tsha=chomp(readstring(`git write-tree`))
19+
tsha=chomp(read(`git write-tree`, String))
2020
mesg="Deploy docs for master@$last_commit"
2121

2222
if length(old_sha) == 40
23-
csha = chomp(readstring(`git commit-tree $tsha -p $old_sha -m $(mesg)`))
23+
csha = chomp(read(`git commit-tree $tsha -p $old_sha -m $(mesg)`, String))
2424
else
25-
csha = chomp(readstring(`git commit-tree $tsha -m $(mesg)`))
25+
csha = chomp(read(`git commit-tree $tsha -m $(mesg)`, String))
2626
end
2727

2828
print("Created commit $csha")

src/TextAnalysis.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
using DataFrames
22

33
module TextAnalysis
4+
using SparseArrays
5+
using Printf
6+
using LinearAlgebra
7+
48
using Languages
59
using DataFrames
610
using WordTokenizers

src/corpus.jl

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ function DirectoryCorpus(dirname::AbstractString)
4545

4646
cd(dirname)
4747
for filename in readdir(".")
48-
if isfile(filename) && !ismatch(r"^\.", filename)
48+
if isfile(filename) && !occursin(r"^\.", filename)
4949
push!(docs, FileDocument(abspath(filename)))
5050
end
5151
if isdir(filename) && !islink(filename)
@@ -102,9 +102,10 @@ end
102102
#
103103
##############################################################################
104104

105-
Base.start(crps::Corpus) = 1
106-
Base.next(crps::Corpus, ind::Int) = (crps.documents[ind], ind + 1)
107-
Base.done(crps::Corpus, ind::Int) = ind > length(crps.documents)
105+
function Base.iterate(crps::Corpus, ind=1)
106+
ind > length(crps.documents) && return nothing
107+
crps.documents[ind], ind+1
108+
end
108109

109110
##############################################################################
110111
#
@@ -115,8 +116,8 @@ Base.done(crps::Corpus, ind::Int) = ind > length(crps.documents)
115116
Base.push!(crps::Corpus, d::AbstractDocument) = push!(crps.documents, d)
116117
Base.pop!(crps::Corpus) = pop!(crps.documents)
117118

118-
Base.unshift!(crps::Corpus, d::AbstractDocument) = unshift!(crps.documents, d)
119-
Base.shift!(crps::Corpus) = shift!(crps.documents)
119+
Base.pushfirst!(crps::Corpus, d::AbstractDocument) = pushfirst!(crps.documents, d)
120+
Base.popfirst!(crps::Corpus) = popfirst!(crps.documents)
120121

121122
function Base.insert!(crps::Corpus, index::Int, d::AbstractDocument)
122123
insert!(crps.documents, index, d)
@@ -133,8 +134,8 @@ Base.delete!(crps::Corpus, index::Integer) = delete!(crps.documents, index)
133134
##############################################################################
134135

135136
Base.getindex(crps::Corpus, ind::Real) = crps.documents[ind]
136-
Base.getindex{T <: Real}(crps::Corpus, inds::Vector{T}) = crps.documents[inds]
137-
Base.getindex(crps::Corpus, r::Range) = crps.documents[r]
137+
Base.getindex(crps::Corpus, inds::Vector{T}) where {T <: Real} = crps.documents[inds]
138+
Base.getindex(crps::Corpus, r::AbstractRange) = crps.documents[r]
138139
Base.getindex(crps::Corpus, term::AbstractString) = get(crps.inverse_index, term, Int[])
139140

140141
##############################################################################
@@ -226,7 +227,7 @@ hash_function!(crps::Corpus, f::TextHashFunction) = (crps.h = f; nothing)
226227
#
227228
##############################################################################
228229

229-
function standardize!{T <: AbstractDocument}(crps::Corpus, ::Type{T})
230+
function standardize!(crps::Corpus, ::Type{T}) where T <: AbstractDocument
230231
for i in 1:length(crps)
231232
crps.documents[i] = convert(T, crps.documents[i])
232233
end

src/deprecations.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,22 @@
11

22
## Deprecations for Languages
33

4-
function tokenize{S <: Language, T <: AbstractString}(::Type{S}, s::T)
4+
function tokenize(::Type{S}, s::T) where {S <: Language, T <: AbstractString}
55
depwarn("Use of Languages as types is deprecated. Use instances.", Symbol(S))
66
tokenize(S(), s)
77
end
88

9-
function ngramize{S <: Language, T <: AbstractString}(::Type{S}, words::Vector{T}, n::Int)
9+
function ngramize(::Type{S}, words::Vector{T}, n::Int) where {S <: Language, T <: AbstractString}
1010
depwarn("Use of Languages as types is deprecated. Use instances.", Symbol(S))
1111
ngramize(S(), words, n)
1212
end
1313

14-
function onegramize{S <: Language, T <: AbstractString}(::Type{S}, words::Vector{T})
14+
function onegramize(::Type{S}, words::Vector{T}) where {S <: Language, T <: AbstractString}
1515
depwarn("Use of Languages as types is deprecated. Use instances.", Symbol(S))
1616
onegramize(S(), words)
1717
end
1818

19-
function stem_all{S <: Language}(stemmer::Stemmer, lang::Type{S}, sentence::AbstractString)
19+
function stem_all(stemmer::Stemmer, lang::Type{S}, sentence::AbstractString) where S <: Language
2020
depwarn("Use of Languages as types is deprecated. Use instances.", Symbol(S))
2121
stem_all(stemmer, S(), sentence)
2222
end

src/document.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
##############################################################################
66

7-
type DocumentMetadata
7+
mutable struct DocumentMetadata
88
language
99
name::String
1010
author::String
@@ -31,7 +31,7 @@ abstract type AbstractDocument; end
3131
#
3232
##############################################################################
3333

34-
type FileDocument <: AbstractDocument
34+
mutable struct FileDocument <: AbstractDocument
3535
filename::String
3636
metadata::DocumentMetadata
3737
end
@@ -48,7 +48,7 @@ end
4848
#
4949
##############################################################################
5050

51-
type StringDocument{T<:AbstractString} <: AbstractDocument
51+
mutable struct StringDocument{T<:AbstractString} <: AbstractDocument
5252
text::T
5353
metadata::DocumentMetadata
5454
end
@@ -61,14 +61,14 @@ StringDocument(txt::AbstractString) = StringDocument(txt, DocumentMetadata())
6161
#
6262
##############################################################################
6363

64-
type TokenDocument{T<:AbstractString} <: AbstractDocument
64+
mutable struct TokenDocument{T<:AbstractString} <: AbstractDocument
6565
tokens::Vector{T}
6666
metadata::DocumentMetadata
6767
end
6868
function TokenDocument(txt::AbstractString, dm::DocumentMetadata)
6969
TokenDocument(tokenize(dm.language, String(txt)), dm)
7070
end
71-
function TokenDocument{T <: AbstractString}(tkns::Vector{T})
71+
function TokenDocument(tkns::Vector{T}) where T <: AbstractString
7272
TokenDocument(tkns, DocumentMetadata())
7373
end
7474
TokenDocument(txt::AbstractString) = TokenDocument(String(txt), DocumentMetadata())
@@ -79,7 +79,7 @@ TokenDocument(txt::AbstractString) = TokenDocument(String(txt), DocumentMetadata
7979
#
8080
##############################################################################
8181

82-
type NGramDocument{T<:AbstractString} <: AbstractDocument
82+
mutable struct NGramDocument{T<:AbstractString} <: AbstractDocument
8383
ngrams::Dict{T,Int}
8484
n::Int
8585
metadata::DocumentMetadata
@@ -91,7 +91,7 @@ end
9191
function NGramDocument(txt::AbstractString, n::Integer=1)
9292
NGramDocument(txt, DocumentMetadata(), n)
9393
end
94-
function NGramDocument{T <: AbstractString}(ng::Dict{T, Int}, n::Integer=1)
94+
function NGramDocument(ng::Dict{T, Int}, n::Integer=1) where T <: AbstractString
9595
NGramDocument(merge(Dict{AbstractString,Int}(), ng), n, DocumentMetadata())
9696
end
9797

@@ -103,12 +103,12 @@ end
103103

104104
function text(fd::FileDocument)
105105
!isfile(fd.filename) && error("Can't find file: $(fd.filename)")
106-
readstring(fd.filename)
106+
read(fd.filename, String)
107107
end
108108

109109
text(sd::StringDocument) = sd.text
110110
function text(td::TokenDocument)
111-
warn("TokenDocument's can only approximate the original text")
111+
@warn("TokenDocument's can only approximate the original text")
112112
join(td.tokens, " ")
113113
end
114114
function text(ngd::NGramDocument)
@@ -132,8 +132,8 @@ function tokens(d::NGramDocument)
132132
error("The tokens of an NGramDocument cannot be reconstructed")
133133
end
134134

135-
tokens!{T <: AbstractString}(d::TokenDocument, new_tokens::Vector{T}) = (d.tokens = new_tokens)
136-
function tokens!{T <: AbstractString}(d::AbstractDocument, new_tokens::Vector{T})
135+
tokens!(d::TokenDocument, new_tokens::Vector{T}) where {T <: AbstractString} = (d.tokens = new_tokens)
136+
function tokens!(d::AbstractDocument, new_tokens::Vector{T}) where T <: AbstractString
137137
error("The tokens of a $(typeof(d)) cannot be directly edited")
138138
end
139139

@@ -199,7 +199,7 @@ const GenericDocument = Union{
199199
##############################################################################
200200

201201
Document(str::AbstractString) = isfile(str) ? FileDocument(str) : StringDocument(str)
202-
Document{T <: AbstractString}(tkns::Vector{T}) = TokenDocument(tkns)
202+
Document(tkns::Vector{T}) where {T <: AbstractString} = TokenDocument(tkns)
203203
Document(ng::Dict{String, Int}) = NGramDocument(ng)
204204

205205
##############################################################################

src/dtm.jl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#
55
##############################################################################
66

7-
type DocumentTermMatrix
7+
mutable struct DocumentTermMatrix
88
dtm::SparseMatrixCSC{Int, Int}
99
terms::Vector{String}
1010
column_indices::Dict{String, Int}
@@ -32,9 +32,9 @@ function DocumentTermMatrix(crps::Corpus, terms::Vector{String})
3232
m = length(crps)
3333
n = length(terms)
3434

35-
rows = Array{Int}(0)
36-
columns = Array{Int}(0)
37-
values = Array{Int}(0)
35+
rows = Array{Int}(undef, 0)
36+
columns = Array{Int}(undef, 0)
37+
values = Array{Int}(undef, 0)
3838
for i in 1:m
3939
doc = crps.documents[i]
4040
ngs = ngrams(doc)
@@ -57,7 +57,7 @@ function DocumentTermMatrix(crps::Corpus, terms::Vector{String})
5757
end
5858
DocumentTermMatrix(crps::Corpus) = DocumentTermMatrix(crps, lexicon(crps))
5959

60-
DocumentTermMatrix(crps::Corpus, lex::Associative) = DocumentTermMatrix(crps, sort(collect(keys(lex))))
60+
DocumentTermMatrix(crps::Corpus, lex::AbstractDict) = DocumentTermMatrix(crps, sort(collect(keys(lex))))
6161

6262
DocumentTermMatrix(dtm::SparseMatrixCSC{Int, Int},terms::Vector{String}) = DocumentTermMatrix(dtm, terms, columnindices(terms))
6363

@@ -71,7 +71,7 @@ function dtm(d::DocumentTermMatrix, density::Symbol)
7171
if density == :sparse
7272
return d.dtm
7373
else
74-
return full(d.dtm)
74+
return Matrix(d.dtm)
7575
end
7676
end
7777

@@ -99,8 +99,8 @@ tdm(crps::Corpus) = dtm(crps)' #'
9999

100100
function dtm_entries(d::AbstractDocument, lex::Dict{String, Int})
101101
ngs = ngrams(d)
102-
indices = Array{Int}(0)
103-
values = Array{Int}(0)
102+
indices = Array{Int}(undef, 0)
103+
values = Array{Int}(undef, 0)
104104
terms = sort(collect(keys(lex)))
105105
column_indices = columnindices(terms)
106106

@@ -166,7 +166,7 @@ hash_tdm(crps::Corpus) = hash_dtm(crps)' #'
166166
#
167167
##############################################################################
168168

169-
type EachDTV
169+
mutable struct EachDTV
170170
crps::Corpus
171171
end
172172

@@ -178,7 +178,7 @@ end
178178

179179
done(edt::EachDTV, state::Int) = state > length(edt.crps.documents)
180180

181-
type EachHashDTV
181+
mutable struct EachHashDTV
182182
crps::Corpus
183183
end
184184

src/hash.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
#
1919
##############################################################################
2020

21-
type TextHashFunction
21+
mutable struct TextHashFunction
2222
hash_function::Function
2323
cardinality::Int
2424
end

0 commit comments

Comments
 (0)