Skip to main content

Workflow Overview:

  1. Authentication
    • Establish a secure session by authenticating the user against the API.
    • Ensure all subsequent requests include the required security credentials.
  2. Dataset Upload
    • Upload the tabular training data by sending a POST request to /api/datasets.
    • Assign a unique and identifiable dataset_name in the payload to reference this file later.
  3. Model Training Initiation
    • Trigger the training job for your specific task (e.g., prediction, clustering) via a POST request to /api/models/{task_type}/train.
    • System Response: The API will immediately return a unique model_id and an initial status of PENDING.
  4. Training Status Monitoring
    • Monitor the job progress by polling the status endpoint via a GET request to /api/models/{model_id}.
    • Wait for the training_status to transition to COMPLETE before proceeding.
  5. Inference Execution
    • Generate predictions or embeddings by sending a POST request to /api/models/{task_type}/{model_id}/infer.
    • Required Inputs: Include the model_id (from Step 3) and the target dataset_id of an uploaded dataset (repeat Step 2 to upload a new dataset for inference).

Example requests:

With the following environment variables:
export WOODWIDE_API_KEY="sk_your_api_key_here"
export BASE_URL="https://beta.woodwide.ai"

List datasets:

curl -X GET "$BASE_URL/api/datasets" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY"

Create a new dataset:

curl -X POST "$BASE_URL/api/datasets" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY" \
  -H "Content-Type: multipart/form-data" \
  -F "file=@mydata.csv;type=text/csv" \
  -F "name=mydata" \
  -F "overwrite=false"

Train a new model:

curl -X POST "$BASE_URL/api/models/prediction/train?dataset_name=my_dataset_name" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY" \
  -H "Content-Type: application/x-www-form-urlencoded" \
  -d "model_name=my_model_name" \
  -d "label_column=my_label_column_name" \
  -d "overwrite=false"

Check model training progress:

curl -X GET "$BASE_URL/api/models/{model_id}" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY"

Use model to make predictions on dataset:

curl -X POST "$BASE_URL/api/models/prediction/{model_id}/infer?dataset_id={dataset_id}" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY" \
  -H "Content-Type: application/x-www-form-urlencoded"

Example script for training and running inference with a prediction model:

#!/bin/sh

export BASE_URL="https://beta.woodwide.ai/"
export WOODWIDE_API_KEY="sk_your_api_key_here"
usage() {
    echo "Usage: $0 -k WOODWIDE_API_KEY -m MODEL_NAME -d DATASET_NAME -r TRAIN_DATA -t TEST_DATA -l LABEL_COLUMN [-o OUTPUT_FILE]"
    echo "  -k WOODWIDE_API_KEY    Woodwide API Key"
    echo "  -m MODEL_NAME          Name for the model"
    echo "  -d DATASET_NAME        Name for the dataset"
    echo "  -r TRAIN_DATA          Path to the training CSV file"
    echo "  -t TEST_DATA           Path to the testing CSV file"
    echo "  -l LABEL_COLUMN        Name of the label column for training"
    echo "  -o OUTPUT_FILE         (Optional) File path to save inference results"
    exit 1
}

# Check if jq is installed
if ! command -v jq > /dev/null 2>&1; then
    echo "Error: jq is required but not installed."
    exit 1
fi

while getopts ":k:m:d:r:t:l:o:" opt; do
  case $opt in
    k) WOODWIDE_API_KEY="$OPTARG" ;;
    m) MODEL_NAME="$OPTARG" ;;
    d) DATASET_NAME="$OPTARG" ;;
    r) TRAIN_DATA="$OPTARG" ;;
    t) TEST_DATA="$OPTARG" ;;
    l) LABEL_COLUMN="$OPTARG" ;;
    o) OUTPUT_FILE="$OPTARG" ;;
    \?) echo "Invalid option: -$OPTARG" >&2; usage ;;
    :) echo "Option -$OPTARG requires an argument." >&2; usage ;;
  esac
done

if [ -z "$WOODWIDE_API_KEY" ] || [ -z "$MODEL_NAME" ] || [ -z "$DATASET_NAME" ] || [ -z "$TRAIN_DATA" ] || [ -z "$TEST_DATA" ] || [ -z "$LABEL_COLUMN" ]; then
    echo "Error: Missing required arguments."
    usage
fi

if [ ! -f "$TRAIN_DATA" ]; then
    echo "Error: Training data file '$TRAIN_DATA' not found."
    exit 1
fi

if [ ! -f "$TEST_DATA" ]; then
    echo "Error: Test data file '$TEST_DATA' not found."
    exit 1
fi

# Function to extract ID from JSON response or fail
get_id_or_fail() {
	# var name prefix to minimize risk of name collisions
    _giof_response="$1"
    _giof_context="$2"

    # Extract ID using jq. Returns empty string if not found or null.
    _giof_id=$(echo "$_giof_response" | jq -r '.id // empty')

    if [ -z "$_giof_id" ]; then
        echo "Error: Could not retrieve ID for $_giof_context."
        echo "Response from server:"
        echo "$_giof_response" | jq .
        exit 1
    fi
    echo "$_giof_id"
}

curl_with_time() {
    _cwt_temp=$(mktemp)
    curl -w "Time: %{time_total}s\n" -o "$_cwt_temp" "$@" >&2
    _cwt_ret=$?
    cat "$_cwt_temp"
    rm "$_cwt_temp"
    return $_cwt_ret
}

echo "Uploading Training Dataset ($TRAIN_DATA) with name '$DATASET_NAME'..."
TRAIN_UPLOAD_RESPONSE=$(curl_with_time -s -X POST "$BASE_URL/api/datasets" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY" \
  -H "Content-Type: multipart/form-data" \
  -F "file=@$TRAIN_DATA;type=text/csv" \
  -F "name=$DATASET_NAME" \
  -F "overwrite=true")

# Validate training upload by checking for ID
TRAIN_DATASET_ID=$(get_id_or_fail "$TRAIN_UPLOAD_RESPONSE" "training dataset upload")
echo "Training Dataset Uploaded. ID: $TRAIN_DATASET_ID"
echo ""

TEST_DATASET_NAME="${DATASET_NAME}_test"
echo "Uploading Test Dataset ($TEST_DATA) with name '$TEST_DATASET_NAME'..."
TEST_UPLOAD_RESPONSE=$(curl_with_time -s -X POST "$BASE_URL/api/datasets" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY" \
  -H "Content-Type: multipart/form-data" \
  -F "file=@$TEST_DATA;type=text/csv" \
  -F "name=$TEST_DATASET_NAME" \
  -F "overwrite=true")

TEST_DATASET_ID=$(get_id_or_fail "$TEST_UPLOAD_RESPONSE" "test dataset upload")
echo "Test Dataset Uploaded. ID: $TEST_DATASET_ID"
echo ""

echo "Training Model '$MODEL_NAME' using dataset '$DATASET_NAME'..."
TRAIN_RESPONSE=$(curl_with_time -s -X POST "$BASE_URL/api/models/prediction/train?dataset_name=$DATASET_NAME" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY" \
  -H "Content-Type: application/x-www-form-urlencoded" \
  -d "model_name=$MODEL_NAME" \
  -d "label_column=$LABEL_COLUMN" \
  -d "overwrite=true")

MODEL_ID=$(get_id_or_fail "$TRAIN_RESPONSE" "model training")
echo "Model Training Started. ID: $MODEL_ID"
echo ""

echo "Waiting for Model Training to Complete (ID: $MODEL_ID)..."
START_TIME=$(date +%s)
TIMEOUT=3000

while true; do
    STATUS_RESPONSE=$(curl_with_time -s -X GET "$BASE_URL/api/models/$MODEL_ID" \
      -H "accept: application/json" \
      -H "Authorization: Bearer $WOODWIDE_API_KEY")

    TRAINING_STATUS=$(echo "$STATUS_RESPONSE" | jq -r '.training_status // empty')

    if [ "$TRAINING_STATUS" = "COMPLETE" ]; then
        echo "Training Complete."
        echo "$STATUS_RESPONSE" | jq .
        break
    elif [ "$TRAINING_STATUS" = "FAILED" ]; then
        echo "Error: Model Training Failed."
        echo "$STATUS_RESPONSE" | jq .
        exit 1
    fi

    CURRENT_TIME=$(date +%s)
    ELAPSED_TIME=$((CURRENT_TIME - START_TIME))

    if [ $ELAPSED_TIME -ge $TIMEOUT ]; then
        echo "Error: Model Training Timed Out after ${TIMEOUT} seconds."
        echo "Last Status Response:"
        echo "$STATUS_RESPONSE" | jq .
        exit 1
    fi

    echo "Status: $TRAINING_STATUS. Waiting..."
    sleep 2
done

CURRENT_TIME=$(date +%s)
ELAPSED_TIME=$((CURRENT_TIME - START_TIME))

echo "Success: Took ${ELAPSED_TIME} seconds to train model."

echo ""

echo "Running Inference on Model $MODEL_ID with Test Dataset ID $TEST_DATASET_ID..."
INFER_RESPONSE=$(curl_with_time -N -s -X POST "$BASE_URL/api/models/prediction/$MODEL_ID/infer?dataset_id=$TEST_DATASET_ID" \
  -H "accept: application/json" \
  -H "Authorization: Bearer $WOODWIDE_API_KEY" \
  -H "Content-Type: application/x-www-form-urlencoded")

if [ ! -z "$OUTPUT_FILE" ]; then
	if echo "$INFER_RESPONSE" | jq . > "$OUTPUT_FILE" 2>/dev/null; then
        echo "Inference results saved (JSON formatted) to $OUTPUT_FILE"
    else
        echo "$INFER_RESPONSE" > "$OUTPUT_FILE"
        echo "Inference results saved (Raw output - JSON parsing failed) to $OUTPUT_FILE"
    fi
else
    echo "Inference Response:"
    echo "$INFER_RESPONSE" | jq . 2>/dev/null || echo "$INFER_RESPONSE"
fi
echo ""