forked from sagnik/Project_Velocity
fix: Oracle Canvas Metadata and deterministic semantic repair
This commit is contained in:
@@ -29,6 +29,8 @@ class FieldDescriptor:
|
||||
confidence: str
|
||||
description: str
|
||||
notes: str = ""
|
||||
valid_values: tuple[str, ...] = ()
|
||||
examples: tuple[str, ...] = ()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -54,6 +56,115 @@ class ConceptDescriptor:
|
||||
|
||||
CATALOG_VERSION = "velocity_semantic_v2026_04_25_01"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ColumnMetadata:
|
||||
table: str
|
||||
column: str
|
||||
topic: str
|
||||
meaning: str
|
||||
reliability: str
|
||||
valid_values: tuple[str, ...] = ()
|
||||
examples: tuple[str, ...] = ()
|
||||
usage: str = ""
|
||||
avoid: bool = False
|
||||
|
||||
|
||||
VALID_QD_SCORE_TYPES: tuple[str, ...] = (
|
||||
"overall",
|
||||
"intent",
|
||||
"engagement",
|
||||
"urgency",
|
||||
"financial_qualification",
|
||||
)
|
||||
|
||||
|
||||
COLUMN_METADATA: list[ColumnMetadata] = [
|
||||
ColumnMetadata(
|
||||
"intel_qd_scores",
|
||||
"score_type",
|
||||
"qd_score",
|
||||
"Score family/category. There is no score_type value named QD.",
|
||||
Confidence.RELIABLE,
|
||||
valid_values=VALID_QD_SCORE_TYPES,
|
||||
examples=("overall", "intent", "engagement"),
|
||||
usage=(
|
||||
"For generic QD score prompts, prefer score_type = 'overall'. "
|
||||
"For specific intent/engagement/urgency/financial prompts, use the matching valid value. "
|
||||
"Never filter score_type = 'QD'."
|
||||
),
|
||||
),
|
||||
ColumnMetadata(
|
||||
"intel_qd_scores",
|
||||
"current_value",
|
||||
"qd_score",
|
||||
"Authoritative numeric score value for the selected score_type.",
|
||||
Confidence.RELIABLE,
|
||||
examples=("98.0", "72.4"),
|
||||
usage="Rank, sort, average, or threshold QD-style scores with this column.",
|
||||
),
|
||||
ColumnMetadata(
|
||||
"intel_qd_scores",
|
||||
"computed_at",
|
||||
"qd_score",
|
||||
"Timestamp when the score was computed.",
|
||||
Confidence.RELIABLE,
|
||||
examples=("2026-04-18T00:00:00"),
|
||||
usage="Use for score freshness, not client contact recency.",
|
||||
),
|
||||
ColumnMetadata(
|
||||
"intel_interactions",
|
||||
"happened_at",
|
||||
"contact_recency",
|
||||
"Primary timestamp for client contact and interaction recency.",
|
||||
Confidence.RELIABLE,
|
||||
usage="Use for contacted, last contacted, recent contact, activity, and timeline prompts.",
|
||||
),
|
||||
ColumnMetadata(
|
||||
"read_last_contacted",
|
||||
"last_contact_at",
|
||||
"contact_recency",
|
||||
"Precomputed per-client last contact timestamp.",
|
||||
Confidence.RELIABLE,
|
||||
usage="Prefer for client-level last-contact summaries when this read model is available.",
|
||||
),
|
||||
ColumnMetadata(
|
||||
"edge_communication_events",
|
||||
"timestamp",
|
||||
"contact_recency",
|
||||
"Legacy/sparse event timestamp that is not reliable for Oracle CRM recency.",
|
||||
Confidence.SPARSE,
|
||||
usage="Do not use for contact prompts.",
|
||||
avoid=True,
|
||||
),
|
||||
ColumnMetadata(
|
||||
"crm_property_interests",
|
||||
"last_discussed_at",
|
||||
"contact_recency",
|
||||
"Sparse legacy field; property interest does not prove recent contact.",
|
||||
Confidence.SPARSE,
|
||||
usage="Do not use as the primary recency filter.",
|
||||
avoid=True,
|
||||
),
|
||||
ColumnMetadata(
|
||||
"crm_property_interests",
|
||||
"project_name",
|
||||
"property_interest",
|
||||
"Human-readable project/property name attached to a client's interest.",
|
||||
Confidence.RELIABLE,
|
||||
examples=("Atri Surya Toron", "Godrej Elevate"),
|
||||
usage="Use ILIKE filters for property/project scoped prompts.",
|
||||
),
|
||||
ColumnMetadata(
|
||||
"crm_property_interests",
|
||||
"interest_level",
|
||||
"property_interest",
|
||||
"Interest strength label or score imported from CRM enrichment.",
|
||||
Confidence.RELIABLE,
|
||||
usage="Use with project_name and person_id to rank interested clients or properties.",
|
||||
),
|
||||
]
|
||||
|
||||
CONCEPTS: list[ConceptDescriptor] = [
|
||||
ConceptDescriptor(
|
||||
concept_id="person_identity",
|
||||
@@ -95,7 +206,14 @@ CONCEPTS: list[ConceptDescriptor] = [
|
||||
authoritative_fields=[
|
||||
FieldDescriptor("intel_qd_scores", "person_id", Confidence.RELIABLE, "FK to crm_people"),
|
||||
FieldDescriptor("intel_qd_scores", "current_value", Confidence.RELIABLE, "Authoritative QD score"),
|
||||
FieldDescriptor("intel_qd_scores", "score_type", Confidence.RELIABLE, "Score family"),
|
||||
FieldDescriptor(
|
||||
"intel_qd_scores",
|
||||
"score_type",
|
||||
Confidence.RELIABLE,
|
||||
"Score family",
|
||||
notes="Valid values are overall, intent, engagement, urgency, financial_qualification. There is no value named QD.",
|
||||
valid_values=VALID_QD_SCORE_TYPES,
|
||||
),
|
||||
FieldDescriptor("intel_qd_scores", "computed_at", Confidence.RELIABLE, "Score timestamp"),
|
||||
],
|
||||
deprecated_fields=[
|
||||
@@ -105,7 +223,9 @@ CONCEPTS: list[ConceptDescriptor] = [
|
||||
],
|
||||
usage_notes=(
|
||||
"When a prompt mentions QD, qualification, desire, or intent score, "
|
||||
"use intel_qd_scores.current_value. Do not substitute engagement_score."
|
||||
"use intel_qd_scores.current_value. Do not substitute engagement_score. "
|
||||
"Do not filter score_type = 'QD'. For generic QD prompts, use score_type = 'overall'. "
|
||||
"Use intent, engagement, urgency, or financial_qualification only when the prompt asks for that specific family."
|
||||
),
|
||||
),
|
||||
ConceptDescriptor(
|
||||
@@ -141,10 +261,10 @@ CONCEPTS: list[ConceptDescriptor] = [
|
||||
description="Per-person last-contact summary materialization.",
|
||||
authoritative_fields=[
|
||||
FieldDescriptor("read_last_contacted", "person_id", Confidence.RELIABLE, "FK to crm_people"),
|
||||
FieldDescriptor("read_last_contacted", "last_contacted_at", Confidence.RELIABLE, "Last contact time"),
|
||||
FieldDescriptor("read_last_contacted", "last_contact_at", Confidence.RELIABLE, "Last contact time"),
|
||||
FieldDescriptor("read_last_contacted", "last_channel", Confidence.RELIABLE, "Last contact channel"),
|
||||
FieldDescriptor("read_last_contacted", "days_since_last_contact", Confidence.RELIABLE, "Recency in days"),
|
||||
FieldDescriptor("read_last_contacted", "staleness_label", Confidence.RELIABLE, "Hot/warm/cold bucket"),
|
||||
FieldDescriptor("read_last_contacted", "days_since_contact", Confidence.RELIABLE, "Recency in days"),
|
||||
FieldDescriptor("read_last_contacted", "interactions_last_90d", Confidence.RELIABLE, "Recent interaction volume"),
|
||||
],
|
||||
deprecated_fields=[
|
||||
FieldDescriptor("crm_property_interests", "last_discussed_at", Confidence.DEPRECATED, "Stale field"),
|
||||
@@ -318,6 +438,8 @@ def _field_to_dict(field: FieldDescriptor) -> dict[str, Any]:
|
||||
"confidence": field.confidence,
|
||||
"description": field.description,
|
||||
**({"notes": field.notes} if field.notes else {}),
|
||||
**({"valid_values": list(field.valid_values)} if field.valid_values else {}),
|
||||
**({"examples": list(field.examples)} if field.examples else {}),
|
||||
}
|
||||
|
||||
|
||||
@@ -351,10 +473,40 @@ def build_semantic_context_for_planner(detected_intents: list[str], *, max_conce
|
||||
if concept.concept_id not in seen:
|
||||
seen.add(concept.concept_id)
|
||||
ordered.append(concept)
|
||||
relevant_topics = set(detected_intents)
|
||||
if "last_contacted" in relevant_topics or "timeline" in relevant_topics:
|
||||
relevant_topics.add("contact_recency")
|
||||
if "interested_clients" in relevant_topics or "inventory" in relevant_topics:
|
||||
relevant_topics.add("property_interest")
|
||||
if "qd_score" in relevant_topics:
|
||||
relevant_topics.add("qd_score")
|
||||
|
||||
column_metadata = [
|
||||
{
|
||||
"table": item.table,
|
||||
"column": item.column,
|
||||
"topic": item.topic,
|
||||
"meaning": item.meaning,
|
||||
"reliability": item.reliability,
|
||||
**({"valid_values": list(item.valid_values)} if item.valid_values else {}),
|
||||
**({"examples": list(item.examples)} if item.examples else {}),
|
||||
**({"usage": item.usage} if item.usage else {}),
|
||||
**({"avoid": item.avoid} if item.avoid else {}),
|
||||
}
|
||||
for item in COLUMN_METADATA
|
||||
if item.topic in relevant_topics or item.avoid
|
||||
]
|
||||
return json.dumps(
|
||||
{
|
||||
"catalog_version": CATALOG_VERSION,
|
||||
"concepts": [concept_to_dict(concept) for concept in ordered[:max_concepts]],
|
||||
"column_metadata": column_metadata,
|
||||
"global_rules": [
|
||||
"Do not invent enum values. Use only valid_values from column_metadata when filtering enum-like columns.",
|
||||
"Queries that return zero rows because of impossible enum filters are invalid plans.",
|
||||
"For contact recency, use read_last_contacted.last_contact_at or intel_interactions.happened_at.",
|
||||
"Do not use fields marked avoid=true for the main business filter.",
|
||||
],
|
||||
},
|
||||
separators=(",", ":"),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user