add simple prometheus metrics collection, with a prometheus / grafana… #64
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Python Tests on M1 Mac | |
on: | |
push: | |
branches: [ main ] | |
pull_request: | |
branches: [ main ] | |
jobs: | |
unit_test: | |
runs-on: macos-14 | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.12' | |
- name: Cache huggingface hub models | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/huggingface/hub | |
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }} | |
- name: Install dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
pip install . | |
- name: Run tests | |
run: | | |
# Check if cached files are present | |
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true | |
# Run unit tests | |
METAL_XCODE=1 python3 -m exo.inference.test_inference_engine | |
discovery_integration_test: | |
runs-on: macos-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.x' | |
- name: Install dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
pip install . | |
- name: Run discovery integration test | |
run: | | |
# Start first instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 & | |
PID1=$! | |
# Start second instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 & | |
PID2=$! | |
# Wait for discovery | |
sleep 10 | |
# Stop both instances | |
kill $PID1 $PID2 | |
# Check outputs | |
if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then | |
echo "Test passed: Both instances discovered each other" | |
exit 0 | |
else | |
echo "Test failed: Devices did not discover each other" | |
echo "Output of first instance:" | |
cat output1.log | |
echo "Output of second instance:" | |
cat output2.log | |
exit 1 | |
fi | |
chatgpt_api_integration_test: | |
runs-on: macos-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.x' | |
- name: Cache huggingface hub models | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/huggingface/hub | |
key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }} | |
restore-keys: | | |
${{ runner.os }}-huggingface-hub- | |
- name: Cache tinygrad downloaded models | |
uses: actions/cache@v3 | |
with: | |
path: ~/Library/Caches/tinygrad/downloads | |
key: ${{ runner.os }}-tinygrad-downloads-${{ hashFiles('~/Library/Caches/tinygrad/downloads/**/*') }}-${{ github.job }} | |
restore-keys: | | |
${{ runner.os }}-tinygrad-downloads- | |
- name: Install dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
pip install . | |
- name: Run chatgpt api integration test | |
run: | | |
# Check if cached files are present | |
ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true | |
# Start first instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 & | |
PID1=$! | |
# Start second instance | |
DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 & | |
PID2=$! | |
# Wait for discovery | |
sleep 10 | |
# first one to load the model | |
curl -s http://localhost:8000/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "llama-3-8b", | |
"messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}], | |
"temperature": 0.7 | |
}' | |
response_1=$(curl -s http://localhost:8000/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "llama-3-8b", | |
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}], | |
"temperature": 0.7 | |
}') | |
echo "Response 1: $response_1" | |
response_2=$(curl -s http://localhost:8000/v1/chat/completions \ | |
-H "Content-Type: application/json" \ | |
-d '{ | |
"model": "llama-3-8b", | |
"messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}], | |
"temperature": 0.7 | |
}') | |
echo "Response 2: $response_2" | |
# Stop both instances | |
kill $PID1 $PID2 | |
echo "" | |
if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then | |
echo "Test failed: Response does not contain 'Michael Jackson'" | |
echo "Response 1: $response_1" | |
echo "" | |
echo "Response 2: $response_2" | |
exit 1 | |
else | |
echo "Test passed: Response from both nodes contains 'Michael Jackson'" | |
fi |