Speculative classification

In this example, we will use the classification speculation preset to determine the sentiment of the user’s input.

Python
Swift
TypeScript
Rust

Create a new project

uv init demo && cd demo

Install dependencies

uv add uzu

Paste into main.py

import asyncio

from uzu import (
    ChatConfig,
    ChatMessage,
    ChatReplyConfig,
    ChatSpeculationPreset,
    Engine,
    EngineConfig,
    Feature,
    ReasoningEffort,
    SamplingMethod,
)


async def main() -> None:
    engine_config = EngineConfig.create()
    engine = await Engine.create(engine_config)

    model = await engine.model("Qwen/Qwen3-0.6B")
    if model is None:
        raise RuntimeError("Model not found")
    async for update in (await engine.download(model)).iterator():
        print(f"Download progress: {update.progress}")

    feature = Feature(
        "sentiment",
        ["Happy", "Sad", "Angry", "Fearful", "Surprised", "Disgusted"],
    )
    chat_config = ChatConfig.create().with_speculation_preset(ChatSpeculationPreset.Classification(feature))
    session = await engine.chat(model, chat_config)

    text_to_detect_feature = "Today's been awesome! Everything just feels right, and I can't stop smiling."
    prompt = (
        f'Text is: "{text_to_detect_feature}". '
        f"Choose {feature.name} from the list: {', '.join(feature.values)}. "
        "Answer with one word. Don't add a dot at the end."
    )
    messages = [
        ChatMessage.system().with_reasoning_effort(ReasoningEffort.Disabled),
        ChatMessage.user().with_text(prompt),
    ]

    chat_reply_config = ChatReplyConfig.create().with_token_limit(32).with_sampling_method(SamplingMethod.Greedy())
    replies = await session.reply(messages, chat_reply_config)
    if replies:
        reply = replies[0]
        print(f"Prediction: {reply.message.text}")
        print(f"Generated tokens: {reply.stats.tokens_count_output}")


if __name__ == "__main__":
    asyncio.run(main())

Run the snippet

uv run main.py

Create a new SwiftUI project

Add the SDK

Add this package through SPM:

https://github.com/trymirai/uzu.git

Paste the snippet

import Uzu

public func runChatSpeculationClassification() async throws {
    let engineConfig = EngineConfig.create()
    let engine = try await Engine.create(config: engineConfig)
    
    guard let model = try await engine.model(identifier: "Qwen/Qwen3-0.6B") else {
        return
    }
    for try await update in try await engine.download(model: model).iterator() {
        print("Download progress: \(update.progress())")
    }
    
    let feature = Feature(name: "sentiment", values: [
        "Happy",
        "Sad",
        "Angry",
        "Fearful",
        "Surprised",
        "Disgusted",
    ])
    let chatConfig = ChatConfig.create().withSpeculationPreset(speculationPreset: .classification(feature: feature))
    let session = try await engine.chat(model: model, config: chatConfig)
    
    let textToDetectFeature =
            "Today's been awesome! Everything just feels right, and I can't stop smiling."
    let prompt = "Text is: \"\(textToDetectFeature)\". Choose \(feature.name) from the list: \(feature.values.joined(separator: ", ")). Answer with one word. Don't add a dot at the end."
    let messages = [
        ChatMessage.system().withReasoningEffort(reasoningEffort: .disabled),
        ChatMessage.user().withText(text: prompt)
    ]
    
    let chatReplyConfig = ChatReplyConfig.create().withTokenLimit(tokenLimit: 32).withSamplingMethod(samplingMethod: .greedy)
    let replies = try await session.reply(input: messages, config: chatReplyConfig)
    guard let reply = replies.last else {
        return
    }
    
    print("Prediction: \(reply.message.text() ?? "empty")")
    print("Generated tokens: \(reply.stats.tokensCountOutput ?? 0)")
}

Add the snippet call

var body: some View {
    VStack {
        Text("On-device AI")
    }
    .onAppear() {
        Task {
            try await runExampleName()
        }
    }
}

Make a new project folder

mkdir demo && cd demo

Initialize with pnpm

pnpm init

Install dependencies

pnpm add typescript ts-node @types/node -D
pnpm add @trymirai/uzu

Initialize a tsconfig.json

{
    "compilerOptions": {
        "target": "es2020",
        "module": "commonjs",
        "moduleResolution": "node",
        "strict": true,
        "esModuleInterop": true,
        "outDir": "dist",
        "types": [
            "node"
        ]
    },
    "include": [
        "*.ts"
    ]
}

Create main.ts

import { ChatConfig, ChatMessage, ChatReplyConfig, ChatSpeculationPresetClassification, Engine, EngineConfig, Feature, ReasoningEffort, SamplingMethodGreedy } from '@trymirai/uzu';

async function main() {
    let engineConfig = EngineConfig.create();
    let engine = await Engine.create(engineConfig);

    let model = await engine.model('Qwen/Qwen3-0.6B');
    if (!model) {
        throw new Error('Model not found');
    }
    for await (const update of await engine.download(model)) {
        console.log('Download progress:', update.progress);
    }

    const feature = new Feature('sentiment', [
        'Happy',
        'Sad',
        'Angry',
        'Fearful',
        'Surprised',
        'Disgusted',
    ]);
    let chatConfig = ChatConfig.create().withSpeculationPreset(new ChatSpeculationPresetClassification(feature));
    let session = await engine.chat(model, chatConfig);

    const textToDetectFeature =
        "Today's been awesome! Everything just feels right, and I can't stop smiling.";
    const prompt =
        `Text is: "${textToDetectFeature}". Choose ${feature.name} from the list: ${feature.values.join(', ')}. ` +
        "Answer with one word. Don't add a dot at the end.";
    let messages = [
        ChatMessage.system().withReasoningEffort("Disabled" as ReasoningEffort),
        ChatMessage.user().withText(prompt)
    ];

    let chatReplyConfig = ChatReplyConfig.create().withTokenLimit(32).withSamplingMethod(new SamplingMethodGreedy());
    let reply = (await session.reply(messages, chatReplyConfig))[0];

    if (reply) {
        console.log('Prediction: ', reply.message.text);
        console.log('Generated tokens: ', reply.stats.tokensCountOutput);
    }
}

main().catch((error) => {
    console.error(error);
});

Run the snippet

pnpm ts-node main.ts

Create a new project

cargo new demo && cd demo

Install dependencies

cargo add uzu --git https://github.com/trymirai/uzu
cargo add tokio --features full

Paste into src/main.rs

use uzu::{
    engine::{Engine, EngineConfig},
    types::{
        basic::{Feature, ReasoningEffort, SamplingMethod},
        session::chat::{ChatConfig, ChatMessage, ChatReplyConfig, ChatSpeculationPreset},
    },
};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let engine_config = EngineConfig::default();
    let engine = Engine::new(engine_config).await?;

    let model = engine.model("Qwen/Qwen3-0.6B".to_string()).await?.ok_or("Model not found")?;
    let downloader = engine.download(&model).await?;
    while let Some(update) = downloader.next().await {
        println!("Download progress: {}", update.progress());
    }

    let feature = Feature {
        name: "sentiment".to_string(),
        values: vec![
            "Happy".to_string(),
            "Sad".to_string(),
            "Angry".to_string(),
            "Fearful".to_string(),
            "Surprised".to_string(),
            "Disgusted".to_string(),
        ],
    };
    let chat_config = ChatConfig::default().with_speculation_preset(Some(ChatSpeculationPreset::Classification {
        feature: feature.clone(),
    }));
    let session = engine.chat(model, chat_config).await?;

    let text_to_detect_feature = "Today's been awesome! Everything just feels right, and I can't stop smiling.";
    let prompt = format!(
        "Text is: \"{text_to_detect_feature}\". Choose {} from the list: {}. Answer with one word. Don't add a dot at the end.",
        feature.name,
        feature.values.join(", ")
    );
    let messages = vec![
        ChatMessage::system().with_reasoning_effort(ReasoningEffort::Disabled),
        ChatMessage::user().with_text(prompt),
    ];

    let chat_reply_config =
        ChatReplyConfig::default().with_token_limit(Some(32)).with_sampling_method(SamplingMethod::Greedy {});
    let replies = session.reply(messages, chat_reply_config).await?;
    if let Some(reply) = replies.first() {
        println!("Prediction: {}", reply.message.text().unwrap_or_default());
        println!("Generated tokens: {}", reply.stats.tokens_count_output.unwrap_or_default());
    }

    Ok(())
}

Run the snippet

cargo run --release

You can view the stats to see that the answer will be ready immediately after the prefill step, and actual generation won’t even start due to speculative decoding, which significantly improves generation speed.

Overview

Quick start

Speculative classification

Overview

Quick start

Documentation Index