Source code for nltk.parse.generate

# -*- coding: utf-8 -*-
# Natural Language Toolkit: Generating from a CFG
#
# Copyright (C) 2001-2013 NLTK Project
# Author: Steven Bird <stevenbird1@gmail.com>
#         Peter Ljunglöf <peter.ljunglof@heatherleaf.se>
# URL: <http://nltk.org/>
# For license information, see LICENSE.TXT
#
from __future__ import print_function

import itertools
import sys
from nltk.grammar import Nonterminal, parse_cfg


[docs]def generate(grammar, start=None, depth=None, n=None): """ Generates an iterator of all sentences from a CFG. :param grammar: The Grammar used to generate sentences. :param start: The Nonterminal from which to start generate sentences. :param depth: The maximal depth of the generated tree. :param n: The maximum number of sentences to return. :return: An iterator of lists of terminal tokens. """ if not start: start = grammar.start() if depth is None: depth = sys.maxsize iter = _generate_all(grammar, [start], depth) if n: iter = itertools.islice(iter, n) return iter
def _generate_all(grammar, items, depth): if items: for frag1 in _generate_one(grammar, items[0], depth): for frag2 in _generate_all(grammar, items[1:], depth): yield frag1 + frag2 else: yield [] def _generate_one(grammar, item, depth): if depth > 0: if isinstance(item, Nonterminal): for prod in grammar.productions(lhs=item): for frag in _generate_all(grammar, prod.rhs(), depth-1): yield frag else: yield [item] demo_grammar = """ S -> NP VP NP -> Det N PP -> P NP VP -> 'slept' | 'saw' NP | 'walked' PP Det -> 'the' | 'a' N -> 'man' | 'park' | 'dog' P -> 'in' | 'with' """
[docs]def demo(N=23): print('Generating the first %d sentences for demo grammar:' % (N,)) print(demo_grammar) grammar = parse_cfg(demo_grammar) for n, sent in enumerate(generate(grammar, n=N), 1): print('%3d. %s' % (n, ' '.join(sent)))
if __name__ == '__main__': demo()