Tutorial 2.2: Learning Lexicon Entries in a Combinatory Categorial Grammar#

[1]:

import jacinle
from tabulate import tabulate

[2]:

# From tutorial/1-dsl/1-types-and-functions
from concepts.dsl.dsl_types import ValueType, ConstantType, BOOL, FLOAT32, VectorValueType
from concepts.dsl.dsl_functions import Function, FunctionTyping
from concepts.dsl.function_domain import FunctionDomain

t_item = ValueType('item')
t_item_set = ValueType('item_set')
t_concept_name = ConstantType('concept_name')
t_shape = ValueType('shape')
t_color = ValueType('color')
t_size = VectorValueType(FLOAT32, 3, alias='size')

domain = FunctionDomain()
domain.define_type(t_item)
domain.define_type(t_item_set)
domain.define_type(t_concept_name)
domain.define_type(t_color)
domain.define_type(t_shape)
domain.define_type(t_size)
domain.define_function(Function('scene', FunctionTyping[t_item_set]()))
domain.define_function(Function('filter_color', FunctionTyping[t_item_set](t_item_set, t_concept_name)))
domain.define_function(Function('filter_shape', FunctionTyping[t_item_set](t_item_set, t_concept_name)))
domain.define_function(Function('unique', FunctionTyping[t_item](t_item_set)))
domain.define_function(Function('color_of', FunctionTyping[t_color](t_item)))
domain.define_function(Function('shape_of', FunctionTyping[t_shape](t_item)))
domain.define_function(Function('size_of', FunctionTyping[t_size](t_item)))
domain.define_function(Function('same_color', FunctionTyping[BOOL](t_color, t_color)))
domain.define_function(Function('same_shape', FunctionTyping[BOOL](t_shape, t_shape)))
domain.define_function(Function('same_size', FunctionTyping[BOOL](t_size, t_size)))

[2]:

Function<same_size(#0: size, #1: size) -> bool>

[3]:

# From tutorial/2-ccg/1-parsing
from concepts.language.ccg.syntax import CCGSyntaxSystem

ss = CCGSyntaxSystem()
ss.define_primitive_type('S')
ss.define_primitive_type('N')

[4]:

from concepts.language.ccg.search import CCGSyntaxEnumerativeSearcher
syntax_searcher = CCGSyntaxEnumerativeSearcher(ss, starting_symbols=['S'])

[5]:

syntax_searcher.gen(2)

[5]:

[CCGSyntaxSearchResult(syntax=CCGPrimitiveSyntaxType<S>, depth=1),
 CCGSyntaxSearchResult(syntax=CCGPrimitiveSyntaxType<N>, depth=1),
 CCGSyntaxSearchResult(syntax=CCGComposedSyntaxType<S/N>, depth=2),
 CCGSyntaxSearchResult(syntax=CCGComposedSyntaxType<S\N>, depth=2),
 CCGSyntaxSearchResult(syntax=CCGComposedSyntaxType<N/N>, depth=2),
 CCGSyntaxSearchResult(syntax=CCGComposedSyntaxType<N\N>, depth=2)]

[6]:

from concepts.language.ccg.search import CCGSemanticsEnumerativeSearcher
semantics_searcher = CCGSemanticsEnumerativeSearcher(domain)

[7]:

from concepts.dsl.dsl_types import FormatContext
search_results = semantics_searcher.gen(max_depth=2)

# Use the FormatContext to format the function forms in a "lambda-function" style.
with FormatContext(function_format_lambda=True).as_default():
    print(tabulate(
        [(s.semantics.value, s.depth, s.nr_constant_arguments, s.nr_variable_arguments) for s in search_results],
        headers=['form', 'depth', '#consts', '#vars']
    ))

form                                                depth    #consts    #vars
------------------------------------------------  -------  ---------  -------
scene()                                                 1          0        0
lam #0.lam #1.filter_color(V::#0, V::#1)                1          1        1
lam #0.lam #1.filter_shape(V::#0, V::#1)                1          1        1
lam #0.unique(V::#0)                                    1          0        1
lam #0.color_of(V::#0)                                  1          0        1
lam #0.shape_of(V::#0)                                  1          0        1
lam #0.size_of(V::#0)                                   1          0        1
lam #0.lam #1.same_color(V::#0, V::#1)                  1          0        2
lam #0.lam #1.same_shape(V::#0, V::#1)                  1          0        2
lam #0.lam #1.same_size(V::#0, V::#1)                   1          0        2
lam #0.filter_color(scene(), V::#0)                     2          1        0
lam #0.filter_shape(scene(), V::#0)                     2          1        0
unique(scene())                                         2          0        0
lam #0.lam #1.unique(filter_color(V::#0, V::#1))        2          1        1
lam #0.lam #1.unique(filter_shape(V::#0, V::#1))        2          1        1
lam #0.color_of(unique(V::#0))                          2          0        1
lam #0.shape_of(unique(V::#0))                          2          0        1
lam #0.size_of(unique(V::#0))                           2          0        1
lam #0.lam #1.same_color(color_of(V::#0), V::#1)        2          0        2
lam #0.lam #1.same_color(V::#1, color_of(V::#0))        2          0        2
lam #0.lam #1.same_shape(shape_of(V::#0), V::#1)        2          0        2
lam #0.lam #1.same_shape(V::#1, shape_of(V::#0))        2          0        2
lam #0.lam #1.same_size(size_of(V::#0), V::#1)          2          0        2
lam #0.lam #1.same_size(V::#1, size_of(V::#0))          2          0        2

[8]:

from concepts.language.ccg.grammar import CCG
ccg = CCG(domain, ss)
ccg.add_entry_simple('red', ss['N/N'], domain.lam(lambda x: domain.f_filter_color(x, 'red')))
ccg.add_entry_simple('object', ss['N'], domain.lam(lambda: domain.f_scene()))

[9]:

from concepts.language.ccg.learning import by_parsing
learning_results = by_parsing(
    ccg, 'blue object',
    syntax_searcher=syntax_searcher,
    semantics_searcher=semantics_searcher,
    syntax_searcher_kwargs={'max_depth': 2},
    semantics_searcher_kwargs={'max_depth': 2},
    bind_concepts=True  # If true, the algorithm will automatically create "new" concepts!
)

# Prettify the learning results
learning_results_table = list()
for r in learning_results:
    assert len(r.words) == 1  # there is only one novel word.

    learning_results_table.append((
        r.lexicons[0].syntax, r.lexicons[0].semantics.value, r.parsing_results[0].semantics.value
    ))

print('Learning results for "blue"')
with FormatContext(function_format_lambda=True).as_default():
    print(tabulate(learning_results_table, headers=['syntax', 'semantics', 'parsing ("blue object")']))

Learning results for "blue"
syntax    semantics                                 parsing ("blue object")
--------  ----------------------------------------  ----------------------------------------------------------
S/N       lam #0.filter_color(V::#0, blue)          filter_color(scene(), V(blue, dtype=concept_name))
S/N       lam #0.filter_shape(V::#0, blue)          filter_shape(scene(), V(blue, dtype=concept_name))
S/N       lam #0.unique(V::#0)                      unique(scene())
S/N       lam #0.unique(filter_color(V::#0, blue))  unique(filter_color(scene(), V(blue, dtype=concept_name)))
S/N       lam #0.unique(filter_shape(V::#0, blue))  unique(filter_shape(scene(), V(blue, dtype=concept_name)))
S/N       lam #0.color_of(unique(V::#0))            color_of(unique(scene()))
S/N       lam #0.shape_of(unique(V::#0))            shape_of(unique(scene()))
S/N       lam #0.size_of(unique(V::#0))             size_of(unique(scene()))

[10]:

# TODO: Learning with "by_grounding"