For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://modelgates.ai/docs/_mcp/server.

Submit a rerank request

POST https://modelgates.ai/api/v1/rerank Content-Type: application/json

Submits a rerank request to the rerank router

Reference: https://modelgates.ai/docs/api/api-reference/rerank/create-rerank

OpenAPI Specification

yaml

openapi: 3.1.0info:  title: ModelGates API  version: 1.0.0paths:  /rerank:    post:      operationId: create-rerank      summary: Submit a rerank request      description: Submits a rerank request to the rerank router      tags:        - subpackage_rerank      parameters:        - name: Authorization          in: header          description: API key as bearer token in Authorization header          required: true          schema:            type: string      responses:        '200':          description: Rerank response          content:            application/json:              schema:                $ref: '#/components/schemas/Rerank_createRerank_Response_200'        '400':          description: Bad Request - Invalid request parameters or malformed input          content:            application/json:              schema:                $ref: '#/components/schemas/BadRequestResponse'        '401':          description: Unauthorized - Authentication required or invalid credentials          content:            application/json:              schema:                $ref: '#/components/schemas/UnauthorizedResponse'        '402':          description: Payment Required - Insufficient credits or quota to complete request          content:            application/json:              schema:                $ref: '#/components/schemas/PaymentRequiredResponse'        '404':          description: Not Found - Resource does not exist          content:            application/json:              schema:                $ref: '#/components/schemas/NotFoundResponse'        '429':          description: Too Many Requests - Rate limit exceeded          content:            application/json:              schema:                $ref: '#/components/schemas/TooManyRequestsResponse'        '500':          description: Internal Server Error - Unexpected server error          content:            application/json:              schema:                $ref: '#/components/schemas/InternalServerResponse'        '502':          description: Bad Gateway - Provider/upstream API failure          content:            application/json:              schema:                $ref: '#/components/schemas/BadGatewayResponse'        '503':          description: Service Unavailable - Service temporarily unavailable          content:            application/json:              schema:                $ref: '#/components/schemas/ServiceUnavailableResponse'      requestBody:        description: Rerank request input        content:          application/json:            schema:              type: object              properties:                documents:                  type: array                  items:                    type: string                  description: The list of documents to rerank                model:                  type: string                  description: The rerank model to use                provider:                  $ref: >-                    #/components/schemas/RerankPostRequestBodyContentApplicationJsonSchemaProvider                query:                  type: string                  description: The search query to rerank documents against                top_n:                  type: integer                  description: Number of most relevant documents to return              required:                - documents                - model                - queryservers:  - url: https://modelgates.ai/api/v1components:  schemas:    RerankPostRequestBodyContentApplicationJsonSchemaProviderDataCollection:      type: string      enum:        - deny        - allow      description: >-        Data collection setting. If no available model provider meets the        requirement, your request will return an error.         - allow: (default) allow providers which store user data non-transiently        and may train on it         - deny: use only providers which do not collect user data.      title: RerankPostRequestBodyContentApplicationJsonSchemaProviderDataCollection    ProviderName:      type: string      enum:        - AkashML        - AI21        - AionLabs        - Alibaba        - Ambient        - Baidu        - Amazon Bedrock        - Amazon Nova        - Anthropic        - Arcee AI        - AtlasCloud        - Avian        - Azure        - BaseTen        - BytePlus        - Black Forest Labs        - Cerebras        - Chutes        - Cirrascale        - Clarifai        - Cloudflare        - Cohere        - Crucible        - Crusoe        - DeepInfra        - DeepSeek        - DekaLLM        - Featherless        - Fireworks        - Friendli        - GMICloud        - Google        - Google AI Studio        - Groq        - Hyperbolic        - Inception        - Inceptron        - InferenceNet        - Ionstream        - Infermatic        - Io Net        - Inflection        - Liquid        - Mara        - Mancer 2        - Minimax        - ModelRun        - Mistral        - Modular        - Moonshot AI        - Morph        - NCompass        - Nebius        - Nex AGI        - NextBit        - Novita        - Nvidia        - OpenAI        - OpenInference        - Parasail        - Poolside        - Perceptron        - Perplexity        - Phala        - Recraft        - Reka        - Relace        - SambaNova        - Seed        - SiliconFlow        - Sourceful        - StepFun        - Stealth        - StreamLake        - Switchpoint        - Together        - Upstage        - Venice        - WandB        - Xiaomi        - xAI        - Z.AI        - FakeProvider      title: ProviderName    RerankPostRequestBodyContentApplicationJsonSchemaProviderIgnoreItems:      oneOf:        - $ref: '#/components/schemas/ProviderName'        - type: string      title: RerankPostRequestBodyContentApplicationJsonSchemaProviderIgnoreItems    BigNumberUnion:      type: string      description: Price per million prompt tokens      title: BigNumberUnion    RerankPostRequestBodyContentApplicationJsonSchemaProviderMaxPrice:      type: object      properties:        audio:          $ref: '#/components/schemas/BigNumberUnion'        completion:          $ref: '#/components/schemas/BigNumberUnion'        image:          $ref: '#/components/schemas/BigNumberUnion'        prompt:          $ref: '#/components/schemas/BigNumberUnion'        request:          $ref: '#/components/schemas/BigNumberUnion'      description: >-        The object specifying the maximum price you want to pay for this        request. USD price per million tokens, for prompt and completion.      title: RerankPostRequestBodyContentApplicationJsonSchemaProviderMaxPrice    RerankPostRequestBodyContentApplicationJsonSchemaProviderOnlyItems:      oneOf:        - $ref: '#/components/schemas/ProviderName'        - type: string      title: RerankPostRequestBodyContentApplicationJsonSchemaProviderOnlyItems    RerankPostRequestBodyContentApplicationJsonSchemaProviderOrderItems:      oneOf:        - $ref: '#/components/schemas/ProviderName'        - type: string      title: RerankPostRequestBodyContentApplicationJsonSchemaProviderOrderItems    PercentileLatencyCutoffs:      type: object      properties:        p50:          type:            - number            - 'null'          format: double          description: Maximum p50 latency (seconds)        p75:          type:            - number            - 'null'          format: double          description: Maximum p75 latency (seconds)        p90:          type:            - number            - 'null'          format: double          description: Maximum p90 latency (seconds)        p99:          type:            - number            - 'null'          format: double          description: Maximum p99 latency (seconds)      description: >-        Percentile-based latency cutoffs. All specified cutoffs must be met for        an endpoint to be preferred.      title: PercentileLatencyCutoffs    PreferredMaxLatency:      oneOf:        - type: number          format: double        - $ref: '#/components/schemas/PercentileLatencyCutoffs'        - description: Any type      description: >-        Preferred maximum latency (in seconds). Can be a number (applies to p50)        or an object with percentile-specific cutoffs. Endpoints above the        threshold(s) may still be used, but are deprioritized in routing. When        using fallback models, this may cause a fallback model to be used        instead of the primary model if it meets the threshold.      title: PreferredMaxLatency    PercentileThroughputCutoffs:      type: object      properties:        p50:          type:            - number            - 'null'          format: double          description: Minimum p50 throughput (tokens/sec)        p75:          type:            - number            - 'null'          format: double          description: Minimum p75 throughput (tokens/sec)        p90:          type:            - number            - 'null'          format: double          description: Minimum p90 throughput (tokens/sec)        p99:          type:            - number            - 'null'          format: double          description: Minimum p99 throughput (tokens/sec)      description: >-        Percentile-based throughput cutoffs. All specified cutoffs must be met        for an endpoint to be preferred.      title: PercentileThroughputCutoffs    PreferredMinThroughput:      oneOf:        - type: number          format: double        - $ref: '#/components/schemas/PercentileThroughputCutoffs'        - description: Any type      description: >-        Preferred minimum throughput (in tokens per second). Can be a number        (applies to p50) or an object with percentile-specific cutoffs.        Endpoints below the threshold(s) may still be used, but are        deprioritized in routing. When using fallback models, this may cause a        fallback model to be used instead of the primary model if it meets the        threshold.      title: PreferredMinThroughput    Quantization:      type: string      enum:        - int4        - int8        - fp4        - fp6        - fp8        - fp16        - bf16        - fp32        - unknown      title: Quantization    ProviderSort:      type: string      enum:        - price        - throughput        - latency        - exacto      description: The provider sorting strategy (price, throughput, latency)      title: ProviderSort    ProviderSortConfigBy:      type: string      enum:        - price        - throughput        - latency        - exacto      description: The provider sorting strategy (price, throughput, latency)      title: ProviderSortConfigBy    ProviderSortConfigPartition:      type: string      enum:        - model        - none      description: >-        Partitioning strategy for sorting: "model" (default) groups endpoints by        model before sorting (fallback models remain fallbacks), "none" sorts        all endpoints together regardless of model.      title: ProviderSortConfigPartition    ProviderSortConfig:      type: object      properties:        by:          oneOf:            - $ref: '#/components/schemas/ProviderSortConfigBy'            - type: 'null'          description: The provider sorting strategy (price, throughput, latency)        partition:          oneOf:            - $ref: '#/components/schemas/ProviderSortConfigPartition'            - type: 'null'          description: >-            Partitioning strategy for sorting: "model" (default) groups            endpoints by model before sorting (fallback models remain            fallbacks), "none" sorts all endpoints together regardless of model.      description: The provider sorting strategy (price, throughput, latency)      title: ProviderSortConfig    RerankPostRequestBodyContentApplicationJsonSchemaProviderSort:      oneOf:        - $ref: '#/components/schemas/ProviderSort'        - $ref: '#/components/schemas/ProviderSortConfig'        - description: Any type      description: >-        The sorting strategy to use for this request, if "order" is not        specified. When set, no load balancing is performed.      title: RerankPostRequestBodyContentApplicationJsonSchemaProviderSort    RerankPostRequestBodyContentApplicationJsonSchemaProvider:      type: object      properties:        allow_fallbacks:          type:            - boolean            - 'null'          description: >            Whether to allow backup providers to serve requests             - true: (default) when the primary provider (or your custom            providers in "order") is unavailable, use the next best provider.             - false: use only the primary/custom provider, and return the            upstream error if it's unavailable.        data_collection:          oneOf:            - $ref: >-                #/components/schemas/RerankPostRequestBodyContentApplicationJsonSchemaProviderDataCollection            - type: 'null'          description: >-            Data collection setting. If no available model provider meets the            requirement, your request will return an error.             - allow: (default) allow providers which store user data            non-transiently and may train on it             - deny: use only providers which do not collect user data.        enforce_distillable_text:          type:            - boolean            - 'null'          description: >-            Whether to restrict routing to only models that allow text            distillation. When true, only models where the author has allowed            distillation will be used.        ignore:          type:            - array            - 'null'          items:            $ref: >-              #/components/schemas/RerankPostRequestBodyContentApplicationJsonSchemaProviderIgnoreItems          description: >-            List of provider slugs to ignore. If provided, this list is merged            with your account-wide ignored provider settings for this request.        max_price:          $ref: >-            #/components/schemas/RerankPostRequestBodyContentApplicationJsonSchemaProviderMaxPrice          description: >-            The object specifying the maximum price you want to pay for this            request. USD price per million tokens, for prompt and completion.        only:          type:            - array            - 'null'          items:            $ref: >-              #/components/schemas/RerankPostRequestBodyContentApplicationJsonSchemaProviderOnlyItems          description: >-            List of provider slugs to allow. If provided, this list is merged            with your account-wide allowed provider settings for this request.        order:          type:            - array            - 'null'          items:            $ref: >-              #/components/schemas/RerankPostRequestBodyContentApplicationJsonSchemaProviderOrderItems          description: >-            An ordered list of provider slugs. The router will attempt to use            the first provider in the subset of this list that supports your            requested model, and fall back to the next if it is unavailable. If            no providers are available, the request will fail with an error            message.        preferred_max_latency:          $ref: '#/components/schemas/PreferredMaxLatency'        preferred_min_throughput:          $ref: '#/components/schemas/PreferredMinThroughput'        quantizations:          type:            - array            - 'null'          items:            $ref: '#/components/schemas/Quantization'          description: A list of quantization levels to filter the provider by.        require_parameters:          type:            - boolean            - 'null'          description: >-            Whether to filter providers to only those that support the            parameters you've provided. If this setting is omitted or set to            false, then providers will receive only the parameters they support,            and ignore the rest.        sort:          $ref: >-            #/components/schemas/RerankPostRequestBodyContentApplicationJsonSchemaProviderSort          description: >-            The sorting strategy to use for this request, if "order" is not            specified. When set, no load balancing is performed.        zdr:          type:            - boolean            - 'null'          description: >-            Whether to restrict routing to only ZDR (Zero Data Retention)            endpoints. When true, only endpoints that do not retain prompts will            be used.      description: Provider routing preferences for the request.      title: RerankPostRequestBodyContentApplicationJsonSchemaProvider    RerankPostResponsesContentApplicationJsonSchemaResultsItemsDocument:      type: object      properties:        text:          type: string          description: The document text      required:        - text      description: The document object containing the original text      title: RerankPostResponsesContentApplicationJsonSchemaResultsItemsDocument    RerankPostResponsesContentApplicationJsonSchemaResultsItems:      type: object      properties:        document:          $ref: >-            #/components/schemas/RerankPostResponsesContentApplicationJsonSchemaResultsItemsDocument          description: The document object containing the original text        index:          type: integer          description: Index of the document in the original input list        relevance_score:          type: number          format: double          description: Relevance score of the document to the query      required:        - document        - index        - relevance_score      description: A single rerank result      title: RerankPostResponsesContentApplicationJsonSchemaResultsItems    RerankPostResponsesContentApplicationJsonSchemaUsage:      type: object      properties:        cost:          type: number          format: double          description: Cost of the request in credits        search_units:          type: integer          description: Number of search units consumed (Cohere billing)        total_tokens:          type: integer          description: Total number of tokens used      description: Usage statistics      title: RerankPostResponsesContentApplicationJsonSchemaUsage    Rerank_createRerank_Response_200:      type: object      properties:        id:          type: string          description: Unique identifier for the rerank response (ORID format)        model:          type: string          description: The model used for reranking        provider:          type: string          description: The provider that served the rerank request        results:          type: array          items:            $ref: >-              #/components/schemas/RerankPostResponsesContentApplicationJsonSchemaResultsItems          description: List of rerank results sorted by relevance        usage:          $ref: >-            #/components/schemas/RerankPostResponsesContentApplicationJsonSchemaUsage          description: Usage statistics      required:        - model        - results      description: Rerank response containing ranked results      title: Rerank_createRerank_Response_200    BadRequestResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for BadRequestResponse      title: BadRequestResponseErrorData    BadRequestResponse:      type: object      properties:        error:          $ref: '#/components/schemas/BadRequestResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Bad Request - Invalid request parameters or malformed input      title: BadRequestResponse    UnauthorizedResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for UnauthorizedResponse      title: UnauthorizedResponseErrorData    UnauthorizedResponse:      type: object      properties:        error:          $ref: '#/components/schemas/UnauthorizedResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Unauthorized - Authentication required or invalid credentials      title: UnauthorizedResponse    PaymentRequiredResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for PaymentRequiredResponse      title: PaymentRequiredResponseErrorData    PaymentRequiredResponse:      type: object      properties:        error:          $ref: '#/components/schemas/PaymentRequiredResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Payment Required - Insufficient credits or quota to complete request      title: PaymentRequiredResponse    NotFoundResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for NotFoundResponse      title: NotFoundResponseErrorData    NotFoundResponse:      type: object      properties:        error:          $ref: '#/components/schemas/NotFoundResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Not Found - Resource does not exist      title: NotFoundResponse    TooManyRequestsResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for TooManyRequestsResponse      title: TooManyRequestsResponseErrorData    TooManyRequestsResponse:      type: object      properties:        error:          $ref: '#/components/schemas/TooManyRequestsResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Too Many Requests - Rate limit exceeded      title: TooManyRequestsResponse    InternalServerResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for InternalServerResponse      title: InternalServerResponseErrorData    InternalServerResponse:      type: object      properties:        error:          $ref: '#/components/schemas/InternalServerResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Internal Server Error - Unexpected server error      title: InternalServerResponse    BadGatewayResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for BadGatewayResponse      title: BadGatewayResponseErrorData    BadGatewayResponse:      type: object      properties:        error:          $ref: '#/components/schemas/BadGatewayResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Bad Gateway - Provider/upstream API failure      title: BadGatewayResponse    ServiceUnavailableResponseErrorData:      type: object      properties:        code:          type: integer        message:          type: string        metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type      required:        - code        - message      description: Error data for ServiceUnavailableResponse      title: ServiceUnavailableResponseErrorData    ServiceUnavailableResponse:      type: object      properties:        error:          $ref: '#/components/schemas/ServiceUnavailableResponseErrorData'        modelgates_metadata:          type:            - object            - 'null'          additionalProperties:            description: Any type        user_id:          type:            - string            - 'null'      required:        - error      description: Service Unavailable - Service temporarily unavailable      title: ServiceUnavailableResponse  securitySchemes:    apiKey:      type: http      scheme: bearer      description: API key as bearer token in Authorization header

SDK Code Examples

python

import requests url = "https://modelgates.ai/api/v1/rerank" payload = {    "documents": ["Paris is the capital of France.", "Berlin is the capital of Germany."],    "model": "cohere/rerank-v3.5",    "query": "What is the capital of France?",    "top_n": 3}headers = {    "Authorization": "Bearer <token>",    "Content-Type": "application/json"} response = requests.post(url, json=payload, headers=headers) print(response.json())

javascript

const url = 'https://modelgates.ai/api/v1/rerank';const options = {  method: 'POST',  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},  body: '{"documents":["Paris is the capital of France.","Berlin is the capital of Germany."],"model":"cohere/rerank-v3.5","query":"What is the capital of France?","top_n":3}'}; try {  const response = await fetch(url, options);  const data = await response.json();  console.log(data);} catch (error) {  console.error(error);}

package main import (	"fmt"	"strings"	"net/http"	"io") func main() { 	url := "https://modelgates.ai/api/v1/rerank" 	payload := strings.NewReader("{\n  \"documents\": [\n    \"Paris is the capital of France.\",\n    \"Berlin is the capital of Germany.\"\n  ],\n  \"model\": \"cohere/rerank-v3.5\",\n  \"query\": \"What is the capital of France?\",\n  \"top_n\": 3\n}") 	req, _ := http.NewRequest("POST", url, payload) 	req.Header.Add("Authorization", "Bearer <token>")	req.Header.Add("Content-Type", "application/json") 	res, _ := http.DefaultClient.Do(req) 	defer res.Body.Close()	body, _ := io.ReadAll(res.Body) 	fmt.Println(res)	fmt.Println(string(body)) }

ruby

require 'uri'require 'net/http' url = URI("https://modelgates.ai/api/v1/rerank") http = Net::HTTP.new(url.host, url.port)http.use_ssl = true request = Net::HTTP::Post.new(url)request["Authorization"] = 'Bearer <token>'request["Content-Type"] = 'application/json'request.body = "{\n  \"documents\": [\n    \"Paris is the capital of France.\",\n    \"Berlin is the capital of Germany.\"\n  ],\n  \"model\": \"cohere/rerank-v3.5\",\n  \"query\": \"What is the capital of France?\",\n  \"top_n\": 3\n}" response = http.request(request)puts response.read_body

java

import com.mashape.unirest.http.HttpResponse;import com.mashape.unirest.http.Unirest; HttpResponse<String> response = Unirest.post("https://modelgates.ai/api/v1/rerank")  .header("Authorization", "Bearer <token>")  .header("Content-Type", "application/json")  .body("{\n  \"documents\": [\n    \"Paris is the capital of France.\",\n    \"Berlin is the capital of Germany.\"\n  ],\n  \"model\": \"cohere/rerank-v3.5\",\n  \"query\": \"What is the capital of France?\",\n  \"top_n\": 3\n}")  .asString();

php

<?phprequire_once('vendor/autoload.php'); $client = new \GuzzleHttp\Client(); $response = $client->request('POST', 'https://modelgates.ai/api/v1/rerank', [  'body' => '{  "documents": [    "Paris is the capital of France.",    "Berlin is the capital of Germany."  ],  "model": "cohere/rerank-v3.5",  "query": "What is the capital of France?",  "top_n": 3}',  'headers' => [    'Authorization' => 'Bearer <token>',    'Content-Type' => 'application/json',  ],]); echo $response->getBody();

csharp

using RestSharp; var client = new RestClient("https://modelgates.ai/api/v1/rerank");var request = new RestRequest(Method.POST);request.AddHeader("Authorization", "Bearer <token>");request.AddHeader("Content-Type", "application/json");request.AddParameter("application/json", "{\n  \"documents\": [\n    \"Paris is the capital of France.\",\n    \"Berlin is the capital of Germany.\"\n  ],\n  \"model\": \"cohere/rerank-v3.5\",\n  \"query\": \"What is the capital of France?\",\n  \"top_n\": 3\n}", ParameterType.RequestBody);IRestResponse response = client.Execute(request);

swift

import Foundation let headers = [  "Authorization": "Bearer <token>",  "Content-Type": "application/json"]let parameters = [  "documents": ["Paris is the capital of France.", "Berlin is the capital of Germany."],  "model": "cohere/rerank-v3.5",  "query": "What is the capital of France?",  "top_n": 3] as [String : Any] let postData = JSONSerialization.data(withJSONObject: parameters, options: []) let request = NSMutableURLRequest(url: NSURL(string: "https://modelgates.ai/api/v1/rerank")! as URL,                                        cachePolicy: .useProtocolCachePolicy,                                    timeoutInterval: 10.0)request.httpMethod = "POST"request.allHTTPHeaderFields = headersrequest.httpBody = postData as Data let session = URLSession.sharedlet dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in  if (error != nil) {    print(error as Any)  } else {    let httpResponse = response as? HTTPURLResponse    print(httpResponse)  }}) dataTask.resume()