diff --git a/.editorconfig b/.editorconfig
deleted file mode 100644
index d4a2c4405..000000000
--- a/.editorconfig
+++ /dev/null
@@ -1,21 +0,0 @@
-# http://editorconfig.org
-
-root = true
-
-[*]
-indent_style = space
-indent_size = 4
-trim_trailing_whitespace = true
-insert_final_newline = true
-charset = utf-8
-end_of_line = lf
-
-[*.bat]
-indent_style = tab
-end_of_line = crlf
-
-[LICENSE]
-insert_final_newline = false
-
-[Makefile]
-indent_style = tab
diff --git "a/docs/\340\270\247\340\270\264\340\270\230\340\270\265\340\270\225\340\270\264\340\270\224\340\270\225\340\270\261\340\271\211\340\270\207 PyThaiNLP \340\271\200\340\270\247\340\270\212\340\270\261\340\271\210\340\270\231\340\270\245\340\271\210\340\270\262\340\270\252\340\270\270\340\270\224\340\270\210\340\270\262\340\270\201 GitHub.md" "b/docs/\340\270\247\340\270\264\340\270\230\340\270\265\340\270\225\340\270\264\340\270\224\340\270\225\340\270\261\340\271\211\340\270\207 PyThaiNLP \340\271\200\340\270\247\340\270\212\340\270\261\340\271\210\340\270\231\340\270\245\340\271\210\340\270\262\340\270\252\340\270\270\340\270\224\340\270\210\340\270\262\340\270\201 GitHub.md"
deleted file mode 100644
index 2357c6ccb..000000000
--- "a/docs/\340\270\247\340\270\264\340\270\230\340\270\265\340\270\225\340\270\264\340\270\224\340\270\225\340\270\261\340\271\211\340\270\207 PyThaiNLP \340\271\200\340\270\247\340\270\212\340\270\261\340\271\210\340\270\231\340\270\245\340\271\210\340\270\262\340\270\252\340\270\270\340\270\224\340\270\210\340\270\262\340\270\201 GitHub.md"	
+++ /dev/null
@@ -1,7 +0,0 @@
-# วิธีติดตั้ง PyThaiNLP เวชั่นล่าสุดจาก GitHub
-
-ใช้คำสั่งนี้ในคอมมาไลน์
-
-```
-pip install -U https://github.com/wannaphongcom/pythainlp/archive/pythainlp1.4.zip
-```
\ No newline at end of file
diff --git a/examples/collation.py b/examples/collation.py
index f78b7ce77..80e66f0d6 100644
--- a/examples/collation.py
+++ b/examples/collation.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
\ No newline at end of file
+
+print(collation(["ไก่", "ไข่", "ก", "ฮา"]))  # ['ก', 'ไก่', 'ไข่', 'ฮา']
diff --git a/examples/etcc.py b/examples/etcc.py
index 6e56f1979..f732fdf11 100644
--- a/examples/etcc.py
+++ b/examples/etcc.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.tokenize import etcc
-print(etcc.etcc('คืนความสุข')) # /คืน/ความสุข
\ No newline at end of file
+
+print(etcc.etcc("คืนความสุข"))  # /คืน/ความสุข
diff --git a/examples/normalize.py b/examples/normalize.py
index c83d6ddb9..cac000306 100644
--- a/examples/normalize.py
+++ b/examples/normalize.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.util import normalize
-print(normalize("เเปลก")=="แปลก") # เ เ ป ล ก กับ แปลก
\ No newline at end of file
+
+print(normalize("เเปลก") == "แปลก")  # เ เ ป ล ก กับ แปลก
diff --git a/examples/romanization.py b/examples/romanization.py
index 78c59f572..38ac4840a 100644
--- a/examples/romanization.py
+++ b/examples/romanization.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.romanization import romanization
-print(romanization("แมว"))
\ No newline at end of file
+
+print(romanization("แมว"))
diff --git a/examples/segment.py b/examples/segment.py
deleted file mode 100644
index 665b26960..000000000
--- a/examples/segment.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-from pythainlp.tokenize import word_tokenize
-a =u'ฉันรักภาษาไทยเพราะฉันเป็นคนไทยและฉันใช้ภาษาไทย'
-b = word_tokenize(a)
-print(b)
diff --git a/examples/soundex.py b/examples/soundex.py
index 98b2d7677..3b8e35c2e 100644
--- a/examples/soundex.py
+++ b/examples/soundex.py
@@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
-from pythainlp.soundex import LK82,Udom83
-print(LK82('รถ')==LK82('รด'))
-print(Udom83('วรร')==Udom83('วัน'))
\ No newline at end of file
+
+from pythainlp.soundex import LK82, Udom83
+
+print(LK82("รถ") == LK82("รด"))
+
+print(Udom83("วรร") == Udom83("วัน"))
diff --git a/examples/spell.py b/examples/spell.py
index 221c8089e..0d39ff07f 100644
--- a/examples/spell.py
+++ b/examples/spell.py
@@ -1,4 +1,8 @@
 # -*- coding: utf-8 -*-
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
\ No newline at end of file
+
+from pythainlp.spell import spell
+
+a = spell("สี่เหลียม")
+print(a)  # ['สี่เหลี่ยม']
+
+# a = spell("สี่เหลียม", engine="hunspell")  # available in some Linux systems
diff --git a/examples/tcc.py b/examples/tcc.py
index b999880f5..4d95aed43 100644
--- a/examples/tcc.py
+++ b/examples/tcc.py
@@ -1,3 +1,10 @@
 # -*- coding: utf-8 -*-
+
 from pythainlp.tokenize import tcc
-print(tcc.tcc('ประเทศไทย')) # ป/ระ/เท/ศ/ไท/ย
\ No newline at end of file
+
+print(tcc.tcc("ประเทศไทย"))  # ป/ระ/เท/ศ/ไท/ย
+
+print(tcc.tcc_pos("ประเทศไทย"))  # {1, 3, 5, 6, 8, 9}
+
+for ch in tcc.tcc_gen("ประเทศไทย"):  # ป-ระ-เท-ศ-ไท-ย-
+    print(ch, end='-')
diff --git a/examples/tokenize.py b/examples/tokenize.py
new file mode 100644
index 000000000..c6b6028e5
--- /dev/null
+++ b/examples/tokenize.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+from pythainlp.tokenize import sent_tokenize, word_tokenize
+
+text = "ฉันรักภาษาไทย เพราะฉันใช้ภาษาไทย "
+print(text)
+
+print(sent_tokenize(text))
+# ['ฉันรักภาษาไทย', 'เพราะฉันใช้ภาษาไทย', '']
+
+print(word_tokenize(text))
+# ['ฉัน', 'รัก', 'ภาษาไทย', ' ', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย', ' ']
+
+print(word_tokenize(text, whitespaces=False))
+# ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย']
+
+text2 = "กฎหมายแรงงาน"
+print(text2)
+
+print(word_tokenize(text2))
+# ['กฎหมายแรงงาน']
+
+print(word_tokenize(text2, engine="longest-matching"))
+# ['กฎหมาย', 'แรงงาน']
diff --git a/examples/whitespaceTokenizer.py b/examples/whitespaceTokenizer.py
deleted file mode 100644
index 89e975282..000000000
--- a/examples/whitespaceTokenizer.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-from pythainlp.tokenize import WhitespaceTokenizer
-print(WhitespaceTokenizer("ทดสอบ ตัดคำช่องว่าง"))
\ No newline at end of file
diff --git a/pythainlp/corpus/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/corpus/.ipynb_checkpoints/__init__-checkpoint.py
deleted file mode 100644
index b8361f789..000000000
--- a/pythainlp/corpus/.ipynb_checkpoints/__init__-checkpoint.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
-from pythainlp.tools import get_path_db,get_path_data
-from tinydb import TinyDB,Query
-from future.moves.urllib.request import urlopen
-from tqdm import tqdm
-import requests
-import os
-import math
-import requests
-from nltk.corpus import names
-#__all__ = ["thaipos", "thaiword","alphabet","tone","country","wordnet"]
-path_db_=get_path_db()
-def get_file(name):
-    db=TinyDB(path_db_)
-    temp = Query()
-    if len(db.search(temp.name==name))>0:
-        path= get_path_data(db.search(temp.name==name)[0]['file'])
-        db.close()
-        if not os.path.exists(path):
-            download(name)
-        return path
-def download_(url, dst):
-    """
-    @param: url to download file
-    @param: dst place to put the file
-    """
-    file_size = int(urlopen(url).info().get('Content-Length', -1))
-    if os.path.exists(dst):
-        first_byte = os.path.getsize(dst)
-    else:
-        first_byte = 0
-    if first_byte >= file_size:
-        return file_size
-    header = {"Range": "bytes=%s-%s" % (first_byte, file_size)}
-    pbar = tqdm(
-        total=file_size, initial=first_byte,
-        unit='B', unit_scale=True, desc=url.split('/')[-1])
-    req = requests.get(url, headers=header, stream=True)
-    with(open(get_path_data(dst), 'wb')) as f:
-        for chunk in req.iter_content(chunk_size=1024):
-            if chunk:
-                f.write(chunk)
-                pbar.update(1024)
-    pbar.close()
-    #return file_size
-def download(name,force=False):
-    db=TinyDB(path_db_)
-    temp = Query()
-    data=requests.get("https://raw.githubusercontent.com/PyThaiNLP/pythainlp-corpus/master/db.json")
-    data_json=data.json()
-    if name in list(data_json.keys()):
-        temp_name=data_json[name]
-        print("Download : "+name)
-        if len(db.search(temp.name==name))==0:
-            print(name+" "+temp_name['version'])
-            download_(temp_name['download'],temp_name['file_name'])
-            db.insert({'name': name, 'version': temp_name['version'],'file':temp_name['file_name']})
-        else:
-            if len(db.search(temp.name==name and temp.version==temp_name['version']))==0:
-                print("have update")
-                print("from "+name+" "+db.search(temp.name==name)[0]['version']+" update to "+name+" "+temp_name['version'])
-                yes_no="y"
-                if force==False:
-                    yes_no=str(input("y or n : ")).lower()
-                if "y"==yes_no:
-                    download_(temp_name['download'],temp_name['file_name'])
-                    db.update({'version':temp_name['version']},temp.name==name)
-            else:
-                print("re-download")
-                print("from "+name+" "+db.search(temp.name==name)[0]['version']+" update to "+name+" "+temp_name['version'])
-                yes_no="y"
-                if force==False:
-                    yes_no=str(input("y or n : ")).lower()
-                if "y"==yes_no:
-                    download_(temp_name['download'],temp_name['file_name'])
-                    db.update({'version':temp_name['version']},temp.name==name)
-    db.close()
\ No newline at end of file
diff --git a/pythainlp/sentiment/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/sentiment/.ipynb_checkpoints/__init__-checkpoint.py
deleted file mode 100644
index 77275d0db..000000000
--- a/pythainlp/sentiment/.ipynb_checkpoints/__init__-checkpoint.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals,print_function
-import six
-import sys
-if six.PY2:
-	print("Thai sentiment in pythainlp. Not support python 2.7")
-	sys.exit(0)
-import pythainlp
-from pythainlp.corpus import stopwords
-import os
-from pythainlp.tokenize import word_tokenize
-try:
-	import dill
-except ImportError:
-	import pip
-	pip.main(['install','dill'])
-	try:
-		import dill
-	except ImportError:
-		print("Error ! using 'pip install dill'")
-		sys.exit(0)
-
-templates_dir = os.path.join(os.path.dirname(pythainlp.__file__), 'sentiment')
-def sentiment(text):
-	"""
-	sentiment ภาษาไทย
-	ใช้ข้อมูลจาก https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/
-	รับค่าสตริง str คืนค่า pos , neg"""
-	with open(os.path.join(templates_dir, 'vocabulary.data'), 'rb') as in_strm:
-		vocabulary = dill.load(in_strm)
-	in_strm.close()
-	with open(os.path.join(templates_dir, 'sentiment.data'), 'rb') as in_strm:
-		classifier = dill.load(in_strm)
-	in_strm.close()
-	text=set(word_tokenize(text))-set(stopwords.words('thai'))
-	featurized_test_sentence =  {i:(i in text) for i in vocabulary}
-	return classifier.classify(featurized_test_sentence)
-if __name__ == '__main__':
-	d="เสียใจแย่มากเลย"
-	print(sentiment(d))
\ No newline at end of file
diff --git a/pythainlp/sentiment/.ipynb_checkpoints/build_pythainlp-checkpoint.tool b/pythainlp/sentiment/.ipynb_checkpoints/build_pythainlp-checkpoint.tool
deleted file mode 100644
index b7a7f7107..000000000
--- a/pythainlp/sentiment/.ipynb_checkpoints/build_pythainlp-checkpoint.tool
+++ /dev/null
@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-# เครื่องมือในการ build sentiment เพื่อใช้ใน pythainlp
-# เวชั่น 0.3
-# 2018/01/18
-# เขียนโดย นาย วรรณพงษ์  ภัททิยไพบูลย์
-# cc-by 3.0 Thai Sentiment Text https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/
-# อ่านบทความได้ที่ https://python3.wannaphong.com/2017/02/ทำ-sentiment-analysis-ภาษาไทยใน-python.html
-from nltk import NaiveBayesClassifier as nbc
-import dill
-from pythainlp.tokenize import word_tokenize
-from pythainlp.corpus import stopwords
-import codecs
-from itertools import chain
-thaistopwords = stopwords.words('thai')
-# pos.txt
-with codecs.open('pos.txt', 'r', "utf-8") as f:
-    lines = f.readlines()
-listpos=[x for x in [e.strip() for e in lines] if x not in thaistopwords]
-del lines
-f.close() # ปิดไฟล์
-# neg.txt
-with codecs.open('neg.txt', 'r', "utf-8") as f:
-    lines = f.readlines()
-listneg=[x for x in [e.strip() for e in lines] if x not in thaistopwords]
-f.close() # ปิดไฟล์
-print(1)
-pos1=['pos']*len(listpos)
-neg1=['neg']*len(listneg)
-print(2)
-training_data = list(zip(listpos,pos1)) + list(zip(listneg,neg1))
-print(3)
-#vocabulary = set(chain(*[(set(word_tokenize(i[0]))-set(stopwords.words('thai'))) for i in training_data]))
-#vocabulary = set(chain(*[x for x in a if x not in [list(set(word_tokenize(i[0]))) for i in training_data]]))
-vocabulary = set(chain(*[word_tokenize(i[0]) for i in training_data]))
-#print(vocabulary)
-print(3.1)
-feature_set = [({i:(i in word_tokenize(sentence)) for i in vocabulary},tag) for sentence, tag in training_data]
-#print(feature_set)
-print(4)
-classifier = nbc.train(feature_set)
-print(5)
-with open('vocabulary.data', 'wb') as out_strm: 
-    dill.dump(vocabulary,out_strm)
-out_strm.close()
-with open('sentiment.data', 'wb') as out_strm: 
-    dill.dump(classifier,out_strm)
-out_strm.close()
-print('OK')
\ No newline at end of file
diff --git a/pythainlp/sentiment/.ipynb_checkpoints/ulmfit_sent-checkpoint.py b/pythainlp/sentiment/.ipynb_checkpoints/ulmfit_sent-checkpoint.py
deleted file mode 100644
index aacd9e844..000000000
--- a/pythainlp/sentiment/.ipynb_checkpoints/ulmfit_sent-checkpoint.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Code by https://github.com/cstorm125/thai2vec/tree/master/notebook
-'''
-from __future__ import absolute_import,unicode_literals
-import os
-import sys
-from collections import defaultdict
-
-#numpy and dill
-try:
-    import numpy as np
-    import dill as pickle
-except ImportError:
-    import pip
-    pip.main(['install','numpy','dill'])
-    try:
-        import numpy as np
-        import dill as pickle
-    except ImportError:
-        print("Error installing using 'pip install numpy dill'")
-        sys.exit(0)
-
-#import torch
-try:
-    import torch
-except ImportError:
-    print('PyTorch required. See https://pytorch.org/.')
-import torch
-from torch.autograd import Variable
-from torch import LongTensor
-
-#import fastai for multiBatchRNN
-try:
-    from fastai.text import *
-except ImportError:
-    print(
-    """
-    fastai required for multiBatchRNN. 
-    Run 'pip install https://github.com/fastai/fastai/archive/master.zip'
-    """)
-
-from pythainlp.tokenize import word_tokenize
-from pythainlp.corpus import get_file
-from pythainlp.corpus import download
-
-MODEL_NAME = 'sent_model'
-ITOS_NAME = 'itos_sent'
-
-#download pretrained model
-def get_path(fname):
-	path = get_file(fname)
-	if path==None:
-		download(fname)
-		path = get_file(fname)
-	return(path)
-
-#load model
-m = torch.load(get_path(MODEL_NAME))
-m.eval()
-#load itos and stoi
-itos = pickle.load(open(get_path(ITOS_NAME),'rb'))
-stoi = defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})
-
-
-#get sentiment; 1 for positive and 0 for negative
-#or score if specified return_score=True
-softmax = lambda x : np.exp(x)/np.sum(np.exp(x))
-def get_sentiment(ss,return_score=False):
-    s = word_tokenize(ss)
-    t = LongTensor([stoi[i] for i in s]).view(-1,1).cpu()
-    t = Variable(t,volatile=False)
-    m.reset()
-    pred,*_ = m(t)
-    result = pred.data.cpu().numpy().reshape(-1)
-    if return_score:
-        return(softmax(result))
-    else:
-        return(np.argmax(result))
-
-def about():
-	return '''
-	Sentiment Analyzer based on thai2vec
-	Data is from various online reviews including but not limited to JagerV3 and Wongnai Challenge.
-    89% accuracy based on 15% validation set compared to 72% of fastText and 52% most-frequent-class baseline.
-	
-	Development : Charin Polpanumas
-	GitHub : https://github.com/cstorm125/thai2vec
-	'''
\ No newline at end of file
diff --git a/pythainlp/tokenize/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/tokenize/.ipynb_checkpoints/__init__-checkpoint.py
deleted file mode 100644
index c3d3f3731..000000000
--- a/pythainlp/tokenize/.ipynb_checkpoints/__init__-checkpoint.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
-import nltk
-import re
-import codecs
-from six.moves import zip
-from pythainlp.corpus.thaisyllable import get_data
-from pythainlp.corpus.thaiword import get_data as get_dict
-from marisa_trie import Trie
-
-DEFAULT_DICT_TRIE = Trie(get_dict())
-
-def dict_word_tokenize(text, custom_dict_trie, engine='newmm'):
-	'''
-	dict_word_tokenize(text,file,engine)
-	เป็นคำสั่งสำหรับตัดคำโดยใช้ข้อมูลที่ผู้ใช้กำหนด
-	text คือ ข้อความที่ต้องการตัดคำ
-	custom_dict_trie คือ trie ที่สร้างจาก create_custom_dict_trie
-	engine คือ เครื่องมือตัดคำ
-	- newmm ตัดคำด้วย newmm
-    - wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
-	- mm ตัดคำด้วย mm
-    - longest-matching ตัดคำโดยใช้ longest matching
-	'''
-
-	if engine=="newmm":
-		from .newmm import mmcut as segment
-	elif engine=="mm":
-		from .mm import segment
-	elif engine=='longest-matching':
-		from .longest import segment
-	elif engine=='wordcutpy':
-		from .wordcutpy import segment
-		return segment(text, custom_dict_trie.keys())
-	
-	return segment(text, custom_dict_trie)
-
-def word_tokenize(text, engine='newmm',whitespaces=True):
-	"""
-	ระบบตัดคำภาษาไทย
-
-	word_tokenize(text,engine='newmm')
-	text คือ ข้อความในรูปแบบ str
-	engine มี
-	- newmm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่ (ค่าเริ่มต้น)
-	- icu -  engine ตัวดั้งเดิมของ PyThaiNLP (ความแม่นยำต่ำ)
-	- dict - ใช้ dicu ในการตัดคำไทย จะคืนค่า False หากไม่สามารถตัดคำไทย
-	- longest-matching ใช้ Longest matching ในการตัดคำ
-	- mm ใช้ Maximum Matching algorithm - โค้ดชุดเก่า
-	- pylexto ใช้ LexTo ในการตัดคำ
-	- deepcut ใช้ Deep Neural Network ในการตัดคำภาษาไทย
-	- wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
-	"""
-	
-	if engine=='icu':
-		'''
-		ตัดคำภาษาไทยโดยใช้ icu ในการตัดคำ
-		คำเตือน !!! \n คำสั่ง word_tokenize(text) ใน PyThaiNLP 1.6
-		ค่าเริ่มต้นจะเปลี่ยนจาก icu ไปเป็น newmm'''
-		from .pyicu import segment
-	elif engine=='dict':
-		'''
-		ใช้ dicu ในการตัดคำไทย
-		จะคืนค่า False หากไม่สามารถตัดคำไทย
-		'''
-		from .dictsegment import segment
-	elif engine=='mm':
-		'''
-		ใช้ Maximum Matching algorithm - โค้ดชุดเก่า
-		'''
-		from .mm import segment
-	elif engine=='newmm':
-		'''
-		ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่
-		'''
-		from .newmm import mmcut as segment
-	elif engine=='longest-matching':
-		'''
-		ใช้ Longest matching ในการตัดคำ
-		'''
-		from .longest import segment
-	elif engine=='pylexto':
-		'''
-		ใช้ LexTo ในการตัดคำ
-		'''
-		from .pylexto import segment
-	elif engine=='deepcut':
-		'''
-		ใช้ Deep Neural Network ในการตัดคำภาษาไทย
-		'''
-		from .deepcut import segment
-	elif engine=='cutkum':
-		'''
-		ใช้ Deep Neural Network ในการตัดคำภาษาไทย (https://github.com/pucktada/cutkum)
-		'''
-		from .cutkum import segment
-	elif engine=='wordcutpy':
-		'''
-		wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
-		'''
-		from .wordcutpy import segment
-	else:
-		raise Exception("error no have engine.")
-	if whitespaces==False:
-		return [i.strip(' ') for i in segment(text) if i.strip(' ')!='']
-	return segment(text)
-
-def sent_tokenize(text,engine='whitespace+newline'):
-	'''
-	sent_tokenize(text,engine='whitespace+newline')
-	ตัดประโยคเบื้องต้น โดยการแบ่งด้วยช่องว่าง
-	'''
-	if engine=='whitespace':
-		data=nltk.tokenize.WhitespaceTokenizer().tokenize(text)
-	elif engine=='whitespace+newline':
-		data=re.sub(r'\n+|\s+','|',text,re.U).split('|')
-	return data
-def wordpunct_tokenize(text):
-	'''
-	wordpunct_tokenize(text)
-	It is nltk.tokenize.wordpunct_tokenize(text).
-	'''
-	return nltk.tokenize.wordpunct_tokenize(text)
-def WhitespaceTokenizer(text):
-	return nltk.tokenize.WhitespaceTokenizer().tokenize(text)
-def isthai(text,check_all=False):
-	"""
-	สำหรับเช็คว่าเป็นตัวอักษรภาษาไทยหรือไม่
-	isthai(text,check_all=False)
-	text คือ ข้อความหรือ list ตัวอักษร
-	check_all สำหรับส่งคืนค่า True หรือ False เช็คทุกตัวอักษร
-
-	การส่งคืนค่า
-	{'thai':% อักษรภาษาไทย,'check_all':tuple โดยจะเป็น (ตัวอักษร,True หรือ False)}
-	"""
-	listext=list(text)
-	i=0
-	num_isthai=0
-	if check_all==True:
-		listthai=[]
-	while i<len(listext):
-		cVal = ord(listext[i])
-		if(cVal >= 3584 and cVal <= 3711):
-			num_isthai+=1
-			if check_all==True:
-				listthai.append(True)
-		else:
-			if check_all==True:
-				listthai.append(False)
-		i+=1
-	thai=(num_isthai/len(listext))*100
-	if check_all==True:
-		dictthai=tuple(zip(listext,listthai))
-		data= {'thai':thai,'check_all':dictthai}
-	else:
-		data= {'thai':thai}
-	return data
-def syllable_tokenize(text1):
-	"""
-	syllable_tokenize(text)
-	เป็นคำสั่งสำหรับใช้ตัดพยางค์ในภาษาไทย
-	รับ str
-	ส่งออก list
-	"""
-	text1=word_tokenize(text1)
-	data=[]
-	trie = create_custom_dict_trie(custom_dict_source=get_data())
-	if(len(text1)>0):
-		i=0
-		while(i<len(text1)):
-			data.extend(dict_word_tokenize(text=text1[i], custom_dict_trie=trie))
-			i+=1
-	else:
-		data=dict_word_tokenize(text=text1, custom_dict_trie=trie)
-	return data
-
-def create_custom_dict_trie(custom_dict_source):
-	"""The function is used to create a custom dict trie which will be
-	used for word_tokenize() function
-	
-	Arguments:
-		custom_dict_source {string or list} -- a list of vocaburaries or a path to source file
-	
-	Raises:
-		ValueError -- Invalid custom_dict_source's object type
-	
-	Returns:
-		Trie -- A trie created from custom dict input
-	"""
-
-	if type(custom_dict_source) is str:
-		# Receive a file path of the custom dict to read
-		with codecs.open(custom_dict_source, 'r',encoding='utf8') as f:
-			_vocabs = f.read().splitlines()
-			return Trie(_vocabs)
-	elif isinstance(custom_dict_source, (list, tuple, set)):
-		# Received a sequence type object of vocabs
-		return Trie(custom_dict_source)
-	else:
-		raise TypeError(
-			'Type of custom_dict_source must be either str (path to source file) or collections'
-		)
-
-class Tokenizer:
-	def __init__(self, custom_dict=None):
-		"""
-		Initialize tokenizer object
-		
-		Keyword arguments:
-		custom_dict -- a file path or a list of vocaburaies to be used to create a trie (default - original lexitron)
-
-		Object variables:
-		trie_dict -- a trie to use in tokenizing engines
-		"""
-		if custom_dict:
-			if type(custom_dict) is list:
-				self.trie_dict = Trie(custom_dict)
-			elif type(custom_dict) is str:
-				with codecs.open(custom_dict, 'r',encoding='utf8') as f:
-					vocabs = f.read().splitlines()
-				self.trie_dict = Trie(vocabs)
-		else:
-			self.trie_dict = Trie(get_dict())
-	
-	def word_tokenize(self, text, engine='newmm'):
-		from .newmm import mmcut as segment
-		return segment(text, self.trie_dict)
\ No newline at end of file
diff --git a/pythainlp/tokenize/.ipynb_checkpoints/dictsegment-checkpoint.py b/pythainlp/tokenize/.ipynb_checkpoints/dictsegment-checkpoint.py
deleted file mode 100644
index e52f7445f..000000000
--- a/pythainlp/tokenize/.ipynb_checkpoints/dictsegment-checkpoint.py
+++ /dev/null
@@ -1,33 +0,0 @@
-﻿# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals,print_function
-from .thai import data
-lines = data()
-# fork from https://stackoverflow.com/a/16690988
-def segment(string):
-    """รับค่าสตริง str คืนค่า list"""
-    # Sort wset in decreasing string order
-    lines.sort(key=len, reverse=True)
-    result = tokenize(string, lines, "")
-    if result:
-        result.pop() # Remove the empty string token
-        result.reverse() # Put the list into correct order
-    return result
-
-def tokenize(string, wset, token):
-    """Returns either false if the string can't be segmented by 
-    the current wset or a list of words that segment the string
-    in reverse order."""
-    # Are we done yet?
-    if string == "":
-        return [token]
-    # Find all possible prefixes
-    for pref in wset:
-        if string.startswith(pref):
-            res = tokenize(string.replace(pref, '', 1), wset, pref)
-            if res:
-                res.append(token)
-                return res
-    # Not possible
-    return False
-if __name__ == "__main__":
-	print(segment("ฉันรักเธอ"))
\ No newline at end of file
diff --git a/pythainlp/tools/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/tools/.ipynb_checkpoints/__init__-checkpoint.py
deleted file mode 100644
index 83847e460..000000000
--- a/pythainlp/tools/.ipynb_checkpoints/__init__-checkpoint.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
-import os
-import dill
-from pythainlp.tokenize import tcc
-import marisa_trie
-def get_path_db():
-	path = os.path.join(get_path_pythainlp_data(), "db.json")
-	if not os.path.exists(path):
-		from tinydb import TinyDB
-		db=TinyDB(path)
-		#db.insert({'name': 'hi', 'version': '0.1','file':''})
-	return path
-def get_path_data(filename):
-	return os.path.join(get_path_pythainlp_data(), filename)
-def get_path_pythainlp_data():
-	path= os.path.join(os.path.expanduser("~"), 'pythainlp-data')
-	if not os.path.exists(path):
-		os.makedirs(path)
-	return path
-def file_trie(data):
-	'''
-	ใช้สร้างไฟล์ข้อมูลสำหรับระบบที่ใช้ trie
-	'''
-	path = get_path_pythainlp_data()
-	if not os.path.exists(path):
-		os.makedirs(path)
-	if data=="newmm":
-		path = os.path.join(path, 'pythainlp_trie-tcc1.data')
-	elif data=="old":
-		path = os.path.join(path, 'pythainlp_trie2.data')
-	else:
-		path = os.path.join(path, 'pythainlp_trie2.data')
-	if not os.path.exists(path):
-		#ถ้าไม่มีไฟล์
-		if data=="newmm":
-			from pythainlp.corpus.thaiword import get_data # ข้อมูลเก่า
-			data2=get_data()
-			i=0
-			while i<len(data2):
-				data2[i]=tcc.tcc(data2[i],sep='#')
-				if(data2[len(data2[i])-1]!="#"):
-					data2[i]+="#"
-				i+=1
-			data=data2
-		elif data=='old':
-			from pythainlp.corpus.thaiword import get_data # ข้อมูลเก่า
-			data=get_data()
-		else:
-			from pythainlp.corpus.newthaiword import get_data # ข้อมูลใหม่
-			data=get_data()
-		with open(path,'wb') as dill_file:
-			dill.dump(marisa_trie.Trie(data),dill_file)
-		dill_file.close()
-	with open(path,'rb') as dill_file:
-		data=dill.load(dill_file)
-	dill_file.close()
-	return data
-def test_segmenter(segmenter, test):
-    '''
-    ระบบทดสอบการตัดคำ
-    '''
-    words = test
-    result = segmenter
-    correct = (result == words)
-    if not correct:
-        print ('expected', words)
-        print('got     ', result)
-    return correct
-if __name__ == "__main__":
-    from pythainlp.tokenize import word_tokenize
-    text="ฉันเป็นคนและฉันรักภาษาไทยฉันอยู่ประเทศไทยฉันศึกษาอยู่ที่มหาวิทยาลัยพายุฝนกำลังมาต้องหลบแล้วล่ะคุณสบายดีไหม"
-    test=["ฉัน","เป็น","คน","และ","ฉัน","รัก","ภาษาไทย","ฉัน","อยู่","ประเทศไทย","ฉัน","ศึกษา","อยู่","ที่","มหาวิทยาลัย","พายุฝน","กำลัง","มา","ต้อง","หลบ","แล้ว","ล่ะ","คุณ","สบายดี","ไหม"]
-    print("icu :")
-    pyicu=test_segmenter(word_tokenize(text,engine='icu'),test)
-    print(pyicu)
-    print("newmm :")
-    newmm=test_segmenter(word_tokenize(text,engine='newmm'),test)
-    print(newmm)
-    print("mm :")
-    mm=test_segmenter(word_tokenize(text,engine='mm'),test)
-    print(mm)
diff --git a/pythainlp/ulmfit/.ipynb_checkpoints/__init__-checkpoint b/pythainlp/ulmfit/.ipynb_checkpoints/__init__-checkpoint
deleted file mode 100644
index 22e30a360..000000000
--- a/pythainlp/ulmfit/.ipynb_checkpoints/__init__-checkpoint
+++ /dev/null
@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
-from .thai2vec import *
\ No newline at end of file
diff --git a/pythainlp/ulmfit/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/ulmfit/.ipynb_checkpoints/__init__-checkpoint.py
deleted file mode 100644
index d61c3dfd8..000000000
--- a/pythainlp/ulmfit/.ipynb_checkpoints/__init__-checkpoint.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
\ No newline at end of file
diff --git a/pythainlp/ulmfit/.ipynb_checkpoints/utils-checkpoint.py b/pythainlp/ulmfit/.ipynb_checkpoints/utils-checkpoint.py
deleted file mode 100644
index 0c2eed0f5..000000000
--- a/pythainlp/ulmfit/.ipynb_checkpoints/utils-checkpoint.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Code by https://github.com/cstorm125/thai2vec/tree/master/notebook
-'''
-from __future__ import absolute_import,unicode_literals
-import os
-import sys
-import re
-import torch
-
-#numpy and fastai
-try:
-    import numpy as np
-    from fastai.text import *
-    import dill as pickle
-except ImportError:
-    import pip
-    pip.main(['install','fastai','numpy','dill'])
-    try:
-        import numpy as np
-        from fastai.text import *
-        import dill as pickle
-    except ImportError:
-        print("Error installing using 'pip install fastai numpy dill'")
-        sys.exit(0)
-
-#import torch
-try:
-    import torch
-except ImportError:
-    print('PyTorch required. See https://pytorch.org/.')
-
-from pythainlp.tokenize import word_tokenize
-from pythainlp.corpus import get_file
-from pythainlp.corpus import download
-MODEL_NAME = 'thwiki_model2'
-ITOS_NAME = 'itos'
-
-#paralellized thai tokenizer with some text cleaning
-class ThaiTokenizer():
-    def __init__(self, engine='newmm'):
-        self.engine = engine
-        self.re_br = re.compile(r'<\s*br\s*/?>', re.IGNORECASE)
-        self.re_rep = re.compile(r'(\S)(\1{3,})')
-
-    def sub_br(self,x): return self.re_br.sub("\n", x)
-
-    def tokenize(self,x):
-        return [t for t in word_tokenize(self.sub_br(x),engine=self.engine)]
-    
-    #replace aaaaaaaa
-    @staticmethod
-    def replace_rep(m):
-        TK_REP = 'tkrep'
-        c,cc = m.groups()
-        return f'{TK_REP}{len(cc)+1}{c}'
-
-    def proc_text(self, s):
-        s = self.re_rep.sub(ThaiTokenizer.replace_rep, s)
-        s = re.sub(r'([/#])', r' \1 ', s)
-        #remvoe double space
-        s = re.sub(' {2,}', ' ', s)
-        return self.tokenize(s)
-
-    @staticmethod
-    def proc_all(ss):
-        tok = ThaiTokenizer()
-        return [tok.proc_text(s) for s in ss]
-
-    @staticmethod
-    def proc_all_mp(ss):
-        ncpus = num_cpus()//2
-        with ProcessPoolExecutor(ncpus) as e:
-            return sum(e.map(ThaiTokenizer.proc_all, ss), [])
-        
-#get tokenized texts
-BOS = 'xbos'  # beginning-of-sentence tag
-def get_texts(df):
-    labels = df.iloc[:,0].values.astype(np.int64)
-    texts = BOS+df.iloc[:,1].astype(str).apply(lambda x: x.rstrip())
-    tok = ThaiTokenizer().proc_all_mp(partition_by_cores(texts))
-    return tok, list(labels)
-
-#get all tokenized texts
-def get_all(df):
-    tok, labels = [], []
-    for i, r in enumerate(df):
-        tok_, labels_ = get_texts(r)
-        tok += tok_;
-        labels += labels_
-    return tok, labels
-
-#convert text dataframe to numericalized dataframes
-def numericalizer(df, itos=None, max_vocab = 60000, min_freq = 2, pad_tok = '_pad_', unk_tok = '_unk_'):
-    tok, labels = get_all(df)
-    freq = Counter(p for o in tok for p in o)
-    if itos is None:
-        itos = [o for o,c in freq.most_common(max_vocab) if c>min_freq]
-        itos.insert(0, pad_tok)
-        itos.insert(0, unk_tok)
-    stoi = collections.defaultdict(lambda:0, {v:k for k,v in enumerate(itos)})
-    lm = np.array([[stoi[o] for o in p] for p in tok])
-    return(lm,tok,labels,itos,stoi,freq)
-
-#get document vectors from language model
-def document_vector(ss, m, stoi,tok_engine='newmm'):
-    s = word_tokenize(ss)
-    t = LongTensor([stoi[i] for i in s]).view(-1,1).cuda()
-    t = Variable(t,volatile=False)
-    m.reset()
-    pred,*_ = m[0](t)
-    #get average of last lstm layer along bptt
-    res = to_np(torch.mean(pred[-1],0).view(-1))
-    return(res)
-    
-#merge pretrained embeddings with current embeddings
-def merge_wgts(em_sz, wgts, itos_pre, itos_cls):
-    vocab_size = len(itos_cls)
-    enc_wgts = to_np(wgts['0.encoder.weight'])
-    #average weight of encoding
-    row_m = enc_wgts.mean(0)
-    stoi_pre = collections.defaultdict(lambda:-1, {v:k for k,v in enumerate(itos_pre)})
-    #new embedding based on classification dataset
-    new_w = np.zeros((vocab_size, em_sz), dtype=np.float32)
-    for i,w in enumerate(itos_cls):
-        r = stoi_pre[w]
-        #use pretrianed embedding if present; else use the average
-        new_w[i] = enc_wgts[r] if r>=0 else row_m
-    wgts['0.encoder.weight'] = T(new_w)
-    wgts['0.encoder_with_dropout.embed.weight'] = T(np.copy(new_w))
-    wgts['1.decoder.weight'] = T(np.copy(new_w))
-    return(wgts)
-
-#feature extractor
-class SaveFeatures():
-    features=None
-    def __init__(self, m): self.hook = m.register_forward_hook(self.hook_fn)
-    def hook_fn(self, module, input, output): self.features = output
-    def remove(self): self.hook.remove()
-        
-#download pretrained language model
-def get_path(fname):
-	path = get_file(fname)
-	if path==None:
-		download(fname)
-		path = get_file(fname)
-	return(path)
-
-def load_pretrained_model():
-    path = get_path(MODEL_NAME)
-    wgts = torch.load(path, map_location=lambda storage, loc: storage)
-    return(wgts)
-
-def load_pretrained_itos():
-    path = get_path(ITOS_NAME)
-    itos = pickle.load(open(path,'rb'))
-    return(itos)
-
-def about():
-	return '''
-	thai2vec
-	State-of-the-Art Language Modeling, Text Feature Extraction and Text Classification in Thai Language.
-    Created as part of pyThaiNLP with ULMFit implementation from fast.ai
-	
-	Development : Charin Polpanumas
-	GitHub : https://github.com/cstorm125/thai2vec
-	'''
-
-
diff --git a/pythainlp/word_vector/.ipynb_checkpoints/__init__-checkpoint b/pythainlp/word_vector/.ipynb_checkpoints/__init__-checkpoint
deleted file mode 100644
index 22e30a360..000000000
--- a/pythainlp/word_vector/.ipynb_checkpoints/__init__-checkpoint
+++ /dev/null
@@ -1,3 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
-from .thai2vec import *
\ No newline at end of file
diff --git a/pythainlp/word_vector/.ipynb_checkpoints/__init__-checkpoint.py b/pythainlp/word_vector/.ipynb_checkpoints/__init__-checkpoint.py
deleted file mode 100644
index d61c3dfd8..000000000
--- a/pythainlp/word_vector/.ipynb_checkpoints/__init__-checkpoint.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
\ No newline at end of file
diff --git a/pythainlp/word_vector/.ipynb_checkpoints/thai2vec-checkpoint.py b/pythainlp/word_vector/.ipynb_checkpoints/thai2vec-checkpoint.py
deleted file mode 100644
index 264228d70..000000000
--- a/pythainlp/word_vector/.ipynb_checkpoints/thai2vec-checkpoint.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Code by https://github.com/cstorm125/thai2vec/blob/master/notebooks/examples.ipynb
-'''
-from __future__ import absolute_import,unicode_literals
-import six
-import sys
-if six.PY2:
-	print("Thai sentiment in pythainlp. Not support python 2.7")
-	sys.exit(0)
-try:
-	from gensim.models import KeyedVectors
-	import numpy as np
-except ImportError:
-	import pip
-	pip.main(['install','gensim','numpy'])
-	try:
-		from gensim.models import KeyedVectors
-		import numpy as np
-	except ImportError:
-		print("Error ! using 'pip install gensim numpy'")
-		sys.exit(0)
-from pythainlp.tokenize import word_tokenize
-from pythainlp.corpus import get_file
-from pythainlp.corpus import download as download_data
-import os
-
-def download():
-	path = get_file('thai2vec02')
-	if path==None:
-		download_data('thai2vec02')
-		path = get_file('thai2vec02')
-	return path
-def get_model():
-	return KeyedVectors.load_word2vec_format(download(),binary=False)
-def most_similar_cosmul(positive,negative):
-	'''
-	การใช้งาน
-	input list
-	'''
-	return get_model().most_similar_cosmul(positive=positive, negative=negative)
-def doesnt_match(listdata):
-	return get_model().doesnt_match(listdata)
-def similarity(word1,word2):
-	return get_model().similarity(word1,word2)
-def sentence_vectorizer(ss,dim=300,use_mean=False):
-    s = word_tokenize(ss)
-    vec = np.zeros((1,dim))
-    for word in s:
-        if word in get_model().wv.index2word:
-            vec+= get_model().wv.word_vec(word)
-        else: pass
-    if use_mean: vec /= len(s)
-    return(vec)
-
-def about():
-	return '''
-	thai2vec
-	State-of-the-Art Language Modeling, Text Feature Extraction and Text Classification in Thai Language.
-    Created as part of pyThaiNLP with ULMFit implementation from fast.ai
-	
-	Development : Charin Polpanumas
-	GitHub : https://github.com/cstorm125/thai2vec
-	'''