Source code for concepts.pdsketch.planners.optimistic_search_bilevel_legacy

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : optimistic_search_bilevel_legacy.py
# Author : Jiayuan Mao
# Email  : maojiayuan@gmail.com
# Date   : 09/10/2023
#
# This file is part of Project Concepts.
# Distributed under terms of the MIT license.

from typing import Union, Sequence, Iterable, Tuple, List, Set

import jacinle

from concepts.dsl.dsl_types import QINDEX
from concepts.dsl.expression import ValueOutputExpression, is_and_expr
from concepts.dsl.constraint import ConstraintSatisfactionProblem
from concepts.pdsketch.domain import State
from concepts.pdsketch.executor import PDSketchExecutor
from concepts.pdsketch.operator import OperatorApplier, OperatorApplicationExpression
from concepts.pdsketch.regression_rule import AchieveExpression
from concepts.pdsketch.regression_utils import ground_fol_expression, ground_operator_application_expression, surface_fol_downcast
from concepts.pdsketch.crow.crow_state import TotallyOrderedPlan, PartiallyOrderedPlan
from concepts.pdsketch.planners.optimistic_search_bilevel_utils import extract_bounded_variables_from_nonzero, extract_bounded_variables_from_nonzero_dc

__all__ = [
    'enumerate_possible_symbolic_plans_regression_c_v1', 'gen_applicable_regression_rules_v1',
    'enumerate_possible_symbolic_plans_regression_c_v2', 'gen_applicable_regression_rules_v2'
]


[docs] def enumerate_possible_symbolic_plans_regression_c_v1( executor: PDSketchExecutor, state: State, goal_expr: Union[str, ValueOutputExpression], max_actions: int = 10, ) -> Iterable[Tuple[Sequence[OperatorApplier], ConstraintSatisfactionProblem, State, State]]: """Enumerate all possible plans that can achieve the goal. Args: executor: the executor. state: the initial state. goal_expr: the goal expression. max_actions: the maximum number of actions in a plan. Returns: A list of plans. Each plan is a tuple of (actions, csp, initial_state, final_state). """ if isinstance(goal_expr, str): goal_expr = executor.parse(goal_expr) @jacinle.log_function(verbose=False) def dfs(state, goal_set, maintains, csp, previous_actions, nr_high_level_actions=0): jacinle.log_function.print('Current goal_set', goal_set) rv = min([executor.execute(goal, state=state, bounded_variables=dict()).item() for goal in goal_set]) if rv > 0.5: return [(state, None, previous_actions)] if nr_high_level_actions > max_actions: return [] # jacinle.log_function.print('Current state', state) # import ipdb; ipdb.set_trace() all_possible_plans = list() candidate_regression_rules = gen_applicable_regression_rules_v1(executor, state, goal_set, maintains) jacinle.log_function.print('Candidate regression rules', candidate_regression_rules) if len(candidate_regression_rules) == 0: print('Entering a debug mode: no candidate regression rules.') import ipdb; ipdb.set_trace() return [] for goal_index, rule, bounded_variables in candidate_regression_rules: grounded_subgoals = list() for item in rule.body[:-1]: assert isinstance(item, AchieveExpression) grounded_subgoals.append(ground_fol_expression(item.goal, bounded_variables)) assert isinstance(rule.body[-1], OperatorApplicationExpression) if len(goal_set) == 1: # If there is only one single goal left, we should just directly try this regression rule. max_prefix_length = 0 else: # TODO(Jiayuan Mao @ 2023/09/06): find the last [always] achievement statement. max_prefix_length = len(rule.body) - 1 for prefix_length in range(0, max_prefix_length + 1): # assuming the last item is OperatorApplicationExpression jacinle.log_function.print('Applying rule', rule, 'for', goal_set[goal_index], 'and prefix length', prefix_length) other_goals = goal_set[:goal_index] + goal_set[goal_index + 1:] + grounded_subgoals[:prefix_length] if len(other_goals) > 0: # jacinle.log_function.print('Other goals', other_goals) possible_branches = dfs(state, other_goals, maintains, csp, previous_actions, nr_high_level_actions=nr_high_level_actions + 1) else: possible_branches = [(state, csp, previous_actions)] if len(possible_branches) == 0: jacinle.log_function.print('Prefix planning failed!!! Stop.') # If it's not possible to achieve the subset of goals, then it's not possible to achieve the whole goal set. # Therefore, this is a break, not a continue. break for i in range(prefix_length, len(rule.body)): item = rule.body[i] next_possible_branches = list() jacinle.log_function.print(f'Now enter item #{i}: {item}') for cur_state, cur_csp, actions in possible_branches: if isinstance(item, AchieveExpression): subgoal = grounded_subgoals[i] # jacinle.log_function.print('Achieve subgoal', subgoal) next_possible_branches.extend(dfs(cur_state, [subgoal], maintains, None, actions, nr_high_level_actions=nr_high_level_actions + 1)) elif isinstance(item, OperatorApplicationExpression): cur_action = ground_operator_application_expression(item, bounded_variables) succ, cur_state = executor.apply(cur_action, cur_state, csp=None, clone=True) jacinle.log_function.print('Applying action', cur_action, 'with result', succ) if succ: next_possible_branches.append((cur_state, None, actions + [cur_action])) else: raise TypeError() possible_branches = next_possible_branches # all_possible_plans.extend(possible_branches) found_plan = False for plan in possible_branches: rv = min([executor.execute(goal, state=plan[0], bounded_variables=dict()).item() for goal in goal_set]) if rv > 0.5: jacinle.log_function.print('Found plan', plan[2]) found_plan = True all_possible_plans.append(plan) else: jacinle.log_function.print('Plan', plan[2], 'failed') if found_plan: break # TODO(Jiayuan Mao @ 2023/09/06): think about what's the semantics for this early return. # The idea is that, as soon as we found a plan using one of the regression rules, we return it, and don't worry about the other rules. # Therefore, it is not guaranteed that the returned plan is the shortest (optimal) one. if len(all_possible_plans) > 0: break if len(all_possible_plans) == 0: jacinle.log_function.print('No possible plans for goal set', goal_set) return [] shortest_plan = min(all_possible_plans, key=lambda x: len(x[2])) return [shortest_plan] if is_and_expr(goal_expr): goal_set = list(goal_expr.arguments) else: goal_set = [goal_expr] candidate_plans = dfs(state, goal_set, None, None, list()) candidate_plans = [(actions, csp, state, final_state) for final_state, csp, actions in candidate_plans] return candidate_plans
[docs] def gen_applicable_regression_rules_v1(executor: PDSketchExecutor, state: State, goal_set: List[ValueOutputExpression], maintains: Set[ValueOutputExpression]): """Generate applicable regression rules for the given goal set and maintains set.""" # TODO(Jiayuan Mao @ 2023/09/04): implement CSP. # TODO(Jiayuan Mao @ 2023/09/04): implement "maintains". candidate_regression_rules = list() for i, goal in enumerate(goal_set): for regression_rule in executor.domain.regression_rules.values(): goal_expr = regression_rule.goal_expression if (variable_binding := surface_fol_downcast(goal_expr, goal)) is None: continue bounded_variables = dict() for v in regression_rule.goal_arguments: bounded_variables[v] = variable_binding[v.name].name if len(regression_rule.binding_arguments) > 0: for v in regression_rule.binding_arguments: bounded_variables[v] = QINDEX if len(regression_rule.preconditions_conjunction.arguments) > 0: rv = executor.execute(regression_rule.preconditions_conjunction, state=state, bounded_variables=bounded_variables) else: rv = None if rv is None: candidate_regression_rules.append((i, regression_rule, bounded_variables)) else: all_forall, candidate_bounded_variables = extract_bounded_variables_from_nonzero(state, rv, regression_rule, default_bounded_variables=bounded_variables) if all_forall and regression_rule.always: candidate_regression_rules.append((i, regression_rule, candidate_bounded_variables[0])) else: candidate_regression_rules.extend([(i, regression_rule, cbv) for cbv in candidate_bounded_variables]) return candidate_regression_rules
[docs] def enumerate_possible_symbolic_plans_regression_c_v2( executor: PDSketchExecutor, state: State, goal_expr: Union[str, ValueOutputExpression], max_depth: int = 10, enable_reordering: bool = True, verbose: bool = False ) -> Iterable[Tuple[Sequence[OperatorApplier], ConstraintSatisfactionProblem, State, State]]: """Enumerate all possible plans that can achieve the goal. Args: executor: the executor. state: the initial state. goal_expr: the goal expression. max_depth: the maximum depth of goal regression search. enable_reordering: whether to enable reordering of the subgoals coming from different chains. verbose: whether to print verbose information. Returns: A list of plans. Each plan is a tuple of (actions, csp, initial_state, final_state). """ if isinstance(goal_expr, str): goal_expr = executor.parse(goal_expr) search_cache = dict() search_stat = {'nr_expanded_nodes': 0} def return_with_cache(goal_set, previous_actions, rv): if len(previous_actions) == 0: goal_str = goal_set.gen_string() if goal_str not in search_cache: search_cache[goal_str] = rv return rv def try_retrive_cache(goal_set, previous_actions): if len(previous_actions) == 0: goal_str = goal_set.gen_string() if goal_str in search_cache: return search_cache[goal_str] return None @jacinle.log_function(verbose=False) def dfs(state: State, goal_set: PartiallyOrderedPlan, maintains, csp, previous_actions, nr_high_level_actions=0): if verbose: jacinle.log_function.print('Current goal_set', goal_set) jacinle.log_function.print('Previous actions', previous_actions) if (rv := try_retrive_cache(goal_set, previous_actions)) is not None: return rv if len(goal_set) == 0: return return_with_cache(goal_set, previous_actions, [(state, None, previous_actions)]) rv = min([executor.execute(goal, state=state, bounded_variables=dict()).item() for goal in goal_set.iter_goals()]) if rv > 0.5: return return_with_cache(goal_set, previous_actions, [(state, None, previous_actions)]) if nr_high_level_actions > max_depth: return return_with_cache(goal_set, previous_actions, []) search_stat['nr_expanded_nodes'] += 1 all_possible_plans = list() candidate_regression_rules = gen_applicable_regression_rules_v2(executor, state, goal_set, maintains) if verbose: jacinle.log_function.print('Candidate regression rules', candidate_regression_rules) if sum(len(r) for _, _, r in candidate_regression_rules) == 0: # print('Entering a debug mode: no candidate regression rules.') # import ipdb; ipdb.set_trace() return return_with_cache(goal_set, previous_actions, []) for chain_index, subgoal_index, this_candidate_regression_rules in candidate_regression_rules: other_goals = goal_set.exclude(chain_index, subgoal_index) cur_goal = goal_set.chains[chain_index].sequence[subgoal_index] if verbose: jacinle.log_function.print('Now trying to excluding goal', cur_goal) if len(other_goals) == 0: other_goals_plans = [(state, None, previous_actions)] else: other_goals_plans = dfs(state, other_goals, maintains, csp, previous_actions, nr_high_level_actions=nr_high_level_actions) for cur_state, cur_csp, cur_actions in other_goals_plans: rv = executor.execute(cur_goal, state=state, bounded_variables=dict()).item() if rv > 0.5: # Found a plan that directly achieve the goal without any regression. all_possible_plans.append((cur_state, cur_csp, cur_actions)) if len(all_possible_plans) > 0: break if len(other_goals) == 0 or not enable_reordering: max_prefix_length = 0 else: max_prefix_length = max(len(rule.body) - 1 for rule, _ in this_candidate_regression_rules) grounded_subgoals_cache = dict() for prefix_length in range(max_prefix_length + 1): for regression_rule_index, (rule, bounded_variables) in enumerate(this_candidate_regression_rules): if prefix_length > len(rule.body) - 1: continue if verbose: jacinle.log_function.print('Applying rule', rule, 'for', cur_goal, 'and prefix length', prefix_length, 'goal set is', goal_set) # construct grounded_subgoals if regression_rule_index in grounded_subgoals_cache: grounded_subgoals = grounded_subgoals_cache[regression_rule_index] else: grounded_subgoals = list() for item in rule.body: if isinstance(item, AchieveExpression): grounded_subgoals.append(ground_fol_expression(item.goal, bounded_variables)) grounded_subgoals_cache[regression_rule_index] = grounded_subgoals if prefix_length == 0: possible_branches = other_goals_plans else: cur_other_goals = other_goals.add_chain(grounded_subgoals[:prefix_length]) possible_branches = dfs(state, cur_other_goals, maintains, csp, previous_actions, nr_high_level_actions=nr_high_level_actions + 1) if len(possible_branches) == 0: if verbose: jacinle.log_function.print('Prefix planning failed!!! Stop.') # If it's not possible to achieve the subset of goals, then it's not possible to achieve the whole goal set. # Therefore, this is a break, not a continue. break for i in range(prefix_length, len(rule.body)): item = rule.body[i] next_possible_branches = list() if verbose: jacinle.log_function.print(f'Now enter item #{i}: {item}') for cur_state, cur_csp, actions in possible_branches: if isinstance(item, AchieveExpression): subgoal = grounded_subgoals[i] # jacinle.log_function.print('Achieve subgoal', subgoal) next_possible_branches.extend(dfs(cur_state, PartiallyOrderedPlan.from_single_goal(subgoal), maintains, None, actions, nr_high_level_actions=nr_high_level_actions + 1)) elif isinstance(item, OperatorApplicationExpression): cur_action = ground_operator_application_expression(item, bounded_variables) succ, cur_state = executor.apply(cur_action, cur_state, csp=None, clone=True) # if verbose: # jacinle.log_function.print('Applying action', cur_action, 'with result', succ) if succ: next_possible_branches.append((cur_state, None, actions + [cur_action])) else: raise TypeError() possible_branches = next_possible_branches # all_possible_plans.extend(possible_branches) found_plan = False for plan in possible_branches: rv = min([executor.execute(goal, state=plan[0], bounded_variables=dict()).item() for goal in goal_set.iter_goals()]) if rv > 0.5: if verbose: jacinle.log_function.print('Found plan', plan[2], 'for goal', goal_set) found_plan = True all_possible_plans.append(plan) else: if verbose: jacinle.log_function.print('Plan', plan[2], 'failed') if found_plan: # TODO(Jiayuan Mao @ 2023/09/06): since we have changed the order of prefix_length for-loop and the regression rule for-loop. # We need to use an additional dictionary to store whether we have found a plan for a particular regression rule. # Right now this doens't matter because we only use the first plan. break # TODO(Jiayuan Mao @ 2023/09/06): think about what's the semantics for this early return. # The idea is that, as soon as we found a plan using one of the regression rules, we return it, and don't worry about the other rules. # Therefore, it is not guaranteed that the returned plan is the shortest (optimal) one. if len(all_possible_plans) > 0: break if len(all_possible_plans) > 0: break if len(all_possible_plans) == 0: if verbose: jacinle.log_function.print('No possible plans for goal set', goal_set) return return_with_cache(goal_set, previous_actions, []) shortest_plan = min(all_possible_plans, key=lambda x: len(x[2])) return return_with_cache(goal_set, previous_actions, [shortest_plan]) if is_and_expr(goal_expr): goal_set = list(goal_expr.arguments) else: goal_set = [goal_expr] goal_set = PartiallyOrderedPlan((TotallyOrderedPlan(goal_set, is_ordered=True),)) candidate_plans = dfs(state, goal_set, None, None, list()) candidate_plans = [(actions, csp, state, final_state) for final_state, csp, actions in candidate_plans] return candidate_plans, search_stat
[docs] def gen_applicable_regression_rules_v2(executor: PDSketchExecutor, state: State, goals: PartiallyOrderedPlan, maintains: Set[ValueOutputExpression]): # TODO(Jiayuan Mao @ 2023/09/10): implement maintains. candidate_regression_rules = list() for chain_index, chain in goals.iter_feasible_chains(): if chain.is_ordered: subgoal_indices = [len(chain) - 1] else: subgoal_indices = list(range(len(chain))) for subgoal_index in subgoal_indices: subgoal = chain.sequence[subgoal_index] this_chain_candidate_regression_rules = list() for regression_rule in executor.domain.regression_rules.values(): goal_expr = regression_rule.goal_expression if (variable_binding := surface_fol_downcast(goal_expr, subgoal)) is None: continue bounded_variables = dict() for v in regression_rule.goal_arguments: bounded_variables[v] = variable_binding[v.name].name if len(regression_rule.binding_arguments) > 0: for v in regression_rule.binding_arguments: bounded_variables[v] = QINDEX if len(regression_rule.preconditions_conjunction.arguments) > 0: if len(regression_rule.binding_arguments) > 4 and len(regression_rule.preconditions_conjunction.arguments) >= 2: rv = extract_bounded_variables_from_nonzero_dc(executor, state, regression_rule, bounded_variables, use_optimistic=False) else: rv = executor.execute(regression_rule.preconditions_conjunction, state=state, bounded_variables=bounded_variables) rv = extract_bounded_variables_from_nonzero(state, rv, regression_rule, default_bounded_variables=bounded_variables, use_optimistic=False) else: rv = None if rv is None: this_chain_candidate_regression_rules = [(regression_rule, bounded_variables)] if regression_rule.always: break else: all_forall, candidate_bounded_variables = rv if all_forall and regression_rule.always: this_chain_candidate_regression_rules = [(regression_rule, candidate_bounded_variables[0])] break else: this_chain_candidate_regression_rules.extend([(regression_rule, cbv) for cbv in candidate_bounded_variables]) candidate_regression_rules.append((chain_index, subgoal_index, this_chain_candidate_regression_rules)) return candidate_regression_rules