# Datasets and Indexes Reference ## Datasets ### Upload File ```python from azure.ai.projects.models import DatasetVersion dataset = project_client.datasets.upload_file( name="my-dataset", version="1.0", file_path="./data/training_data.csv", connection_name="my-storage-connection", ) print(f"Dataset uploaded: {dataset.name} v{dataset.version}") ``` ### Upload Folder ```python import re from azure.ai.projects.models import DatasetVersion dataset = project_client.datasets.upload_folder( name="document-collection", version="2.0", folder="./data/documents/", connection_name="my-storage-connection", file_pattern=re.compile(r"\.(txt|csv|md|json)$", re.IGNORECASE), ) print(f"Folder uploaded: {dataset.name} v{dataset.version}") ``` ### Get Dataset ```python dataset = project_client.datasets.get(name="my-dataset", version="1.0") print(f"Name: {dataset.name}") print(f"Version: {dataset.version}") ``` ### Get Dataset Credentials ```python credentials = project_client.datasets.get_credentials( name="my-dataset", version="1.0", ) # Use credentials to access dataset storage ``` ### List Datasets ```python # List all datasets for dataset in project_client.datasets.list(): print(f"{dataset.name}: {dataset.version}") # List versions of a specific dataset for dataset in project_client.datasets.list_versions(name="my-dataset"): print(f"Version: {dataset.version}") ``` ### Delete Dataset ```python project_client.datasets.delete(name="my-dataset", version="1.0") ``` ## Indexes ### Create or Update Index ```python from azure.ai.projects.models import AzureAISearchIndex index = project_client.indexes.create_or_update( name="my-index", version="1.0", index=AzureAISearchIndex( connection_name="my-ai-search-connection", index_name="products-index", ), ) print(f"Index created: {index.name} v{index.version}") ``` ### Get Index ```python index = project_client.indexes.get(name="my-index", version="1.0") print(f"Name: {index.name}") print(f"Version: {index.version}") ``` ### List Indexes ```python # List all indexes for index in project_client.indexes.list(): print(f"{index.name}: {index.version}") # List versions of a specific index for index in project_client.indexes.list_versions(name="my-index"): print(f"Version: {index.version}") ``` ### Delete Index ```python project_client.indexes.delete(name="my-index", version="1.0") ``` ## Using Indexes with Agents ```python from azure.ai.projects.models import ( AzureAISearchAgentTool, AzureAISearchToolResource, AISearchIndexResource, AzureAISearchQueryType, PromptAgentDefinition, ) # Create index reference index = project_client.indexes.get(name="products-index", version="1.0") # Get connection for the index search_connection = project_client.connections.get("my-ai-search-connection") # Create agent with index agent = project_client.agents.create_version( agent_name="search-agent", definition=PromptAgentDefinition( model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"], instructions="Search the product catalog to answer questions.", tools=[ AzureAISearchAgentTool( azure_ai_search=AzureAISearchToolResource( indexes=[ AISearchIndexResource( project_connection_id=search_connection.id, index_name="products-index", query_type=AzureAISearchQueryType.SEMANTIC, ) ] ) ) ], ), ) ``` ## Version Management Pattern ```python # Semantic versioning for datasets dataset_v1 = project_client.datasets.upload_file( name="training-data", version="1.0.0", file_path="./v1/data.csv", connection_name="storage", ) # Update with new version dataset_v2 = project_client.datasets.upload_file( name="training-data", version="1.1.0", # Minor version bump file_path="./v2/data.csv", connection_name="storage", ) # List all versions versions = list(project_client.datasets.list_versions(name="training-data")) print(f"Available versions: {[v.version for v in versions]}") ```