Source code for nltk.corpus.reader.wordlist

# Natural Language Toolkit: Word List Corpus Reader
#
# Copyright (C) 2001-2013 NLTK Project
# Author: Steven Bird <stevenbird1@gmail.com>
#         Edward Loper <edloper@gmail.com>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT

from nltk import compat
from nltk.tokenize import line_tokenize

from .util import *
from .api import *

[docs]class WordListCorpusReader(CorpusReader): """ List of words, one per line. Blank lines are ignored. """
[docs] def words(self, fileids=None): return line_tokenize(self.raw(fileids))
[docs] def raw(self, fileids=None): if fileids is None: fileids = self._fileids elif isinstance(fileids, compat.string_types): fileids = [fileids] return concat([self.open(f).read() for f in fileids])
[docs]class SwadeshCorpusReader(WordListCorpusReader):
[docs] def entries(self, fileids=None): """ :return: a tuple of words for the specified fileids. """ if not fileids: fileids = self.fileids() wordlists = [self.words(f) for f in fileids] return list(zip(*wordlists))