dkan icon indicating copy to clipboard operation
dkan copied to clipboard

Tech Spike: Model Context Protocol (MCP)

Open TheETupper opened this issue 2 months ago • 7 comments

https://www.drupal.org/project/mcp

https://drupalmcp.io/en/mcp-server/setup-configure/

TheETupper avatar Oct 23 '25 18:10 TheETupper

https://blog.nilenso.com/blog/2025/05/12/mcp-explained-without-hype-or-fluff/

TheETupper avatar Oct 23 '25 18:10 TheETupper

We will likely need a custom plugin: https://drupalmcp.io/en/developers/create-plugin/

zedmonds96 avatar Oct 24 '25 15:10 zedmonds96

I have not attempted to connect to an LLM yet. However, I enabled the MCP module and wrote a simple plugin to test and it works with the sample content datasets.

Steps to reproduce:

  • Install and enable the module ddev composer require 'drupal/mcp:^1.0' ddev drush en mcp -y

  • Follow configuration steps here: https://drupalmcp.io/en/mcp-server/setup-configure/

  • Create a plugin file in /mcp/src/Plugin/Mcp. I followed these steps and with the assistance of copilot wrote this one with two simple "tools":

namespace Drupal\mcp\Plugin\Mcp;

use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\StringTranslation\TranslatableMarkup;
use Drupal\mcp\Attribute\Mcp;
use Drupal\mcp\Plugin\McpPluginBase;
use Drupal\mcp\ServerFeatures\Tool;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Drupal\datastore\DatastoreService;
use Drupal\common\DataResource;

/**
 * MCP plugin providing DKAN datastore query tools.
 */
#[Mcp(
  id: 'dkan-plugin',
  name: new TranslatableMarkup('DKAN Plugin'),
  description: new TranslatableMarkup(
    'Provides tools to query DKAN datastore tables.'
  ),
)]
class DKANPlugin extends McpPluginBase implements ContainerFactoryPluginInterface
{

  /**
   * DKAN datastore service.
   *
   * @var \Drupal\datastore\DatastoreService
   */
  protected DatastoreService $datastoreService;

  /**
   * {@inheritDoc}
   */
  public static function create(
    ContainerInterface $container,
    array $configuration,
                       $plugin_id,
                       $plugin_definition,
  ) {
    $instance = parent::create(
      $container,
      $configuration,
      $plugin_id,
      $plugin_definition,
    );
    $datastoreService = $container->get('dkan.datastore.service');
    if (!$datastoreService instanceof DatastoreService) {
      throw new \RuntimeException('DatastoreService unavailable.');
    }
    $instance->datastoreService = $datastoreService;
    return $instance;
  }

  /**
   * {@inheritdoc}
   */
  public function getTools(): array {
    return [
      new Tool(
        name: 'datastore-summary',
        description: 'Return summary info for a datastore resource identifier.',
        inputSchema: [
          'type' => 'object',
          'properties' => [
            'identifier' => [
              'type' => 'string',
              'description' => 'Resource identifier (distribution UUID).',
            ],
          ],
          'required' => ['identifier'],
        ],
      ),
      new Tool(
        name: 'datastore-columns',
        description: 'Return column machine names (and descriptions) for a distribution UUID.',
        inputSchema: [
          'type' => 'object',
          'properties' => [
            'identifier' => [
              'type' => 'string',
              'description' => 'Distribution UUID.',
            ],
          ],
          'required' => ['identifier'],
        ],
      ),
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function executeTool(string $toolId, mixed $arguments): array {
    if ($toolId === md5('datastore-summary')) {
      $identifier = $arguments['identifier'] ?? '';
      if ($identifier === '') {
        throw new \InvalidArgumentException('Identifier is required.');
      }
      try {
        $summary = $this->datastoreService->summary($identifier);
        return [['type' => 'text', 'text' => json_encode($summary)]];
      }
      catch (\Throwable $e) {
        return [['type' => 'text', 'text' => json_encode(['error' => $e->getMessage()])]];
      }
    }

    if ($toolId === md5('datastore-columns')) {
      $identifier = $arguments['identifier'] ?? '';
      if ($identifier === '') {
        throw new \InvalidArgumentException('Identifier is required.');
      }
      try {
        [$resolvedId, $version] = DataResource::getIdentifierAndVersion($identifier);
        $storage = $this->datastoreService->getStorage($resolvedId, $version);
        $schema = $storage->getSchema();
        $fields = $schema['fields'] ?? [];
        $columns = [];
        foreach ($fields as $name => $info) {
          $columns[] = [
            'name' => $name,
            'description' => $info['description'] ?? '',
            'type' => $info['type'] ?? ($info['mysql_type'] ?? ''),
          ];
        }
        return [['type' => 'text', 'text' => json_encode([
          'identifier' => $identifier,
          'resolved_identifier' => $resolvedId,
          'version' => $version,
          'columns' => $columns,
        ])]];
      }
      catch (\Throwable $e) {
        return [['type' => 'text', 'text' => json_encode(['error' => $e->getMessage()])]];
      }
    }

    throw new \InvalidArgumentException('Tool not found');
  }

}
  • At this point I could test the two tools with drush commands.
# Test datastore-summary
ddev drush eval "\$pm = \Drupal::service('plugin.manager.mcp'); \$p = \$pm->createInstance('dkan-plugin'); \$result = \$p->executeTool(md5('datastore-summary'), ['identifier' => '74d7d9e2-cf61-503b-87f4-2250ac3f3cad']); echo \$result[0]['text'] . PHP_EOL;"

{"numOfColumns":9,"columns":{"record_number":{"type":"serial","unsigned":true,"not null":true,"mysql_type":"int"},"objectid":{"type":"text","mysql_type":"text","description":"\ufeffOBJECTID"},"roadway":{"type":"text","mysql_type":"text","description":"ROADWAY"},"road_side":{"type":"text","mysql_type":"text","description":"ROAD_SIDE"},"lncd":{"type":"text","mysql_type":"text","description":"LNCD"},"descr":{"type":"text","mysql_type":"text","description":"DESCR"},"begin_post":{"type":"text","mysql_type":"text","description":"BEGIN_POST"},"end_post":{"type":"text","mysql_type":"text","description":"END_POST"},"shape_leng":{"type":"text","mysql_type":"text","description":"Shape_Leng"}},"numOfRows":2969}

# Test datastore-columns (formatted)
ddev drush eval "\$pm = \Drupal::service('plugin.manager.mcp'); \$p = \$pm->createInstance('dkan-plugin'); \$result = \$p->executeTool(md5('datastore-columns'), ['identifier' => '74d7d9e2-cf61-503b-87f4-2250ac3f3cad']); \$data = json_decode(\$result[0]['text'], TRUE); echo json_encode(\$data, JSON_PRETTY_PRINT) . PHP_EOL;"
{
    "identifier": "74d7d9e2-cf61-503b-87f4-2250ac3f3cad",
    "resolved_identifier": "3a187a87dc6cd47c48b6b4c4785224b7",
    "version": "1755703129",
    "columns": [
        {
            "name": "record_number",
            "description": "",
            "type": "serial"
        },
        {
            "name": "objectid",
            "description": "\ufeffOBJECTID",
            "type": "text"
        },
        {
            "name": "roadway",
            "description": "ROADWAY",
            "type": "text"
        },
        {
            "name": "road_side",
            "description": "ROAD_SIDE",
            "type": "text"
        },
        {
            "name": "lncd",
            "description": "LNCD",
            "type": "text"
        },
        {
            "name": "descr",
            "description": "DESCR",
            "type": "text"
        },
        {
            "name": "begin_post",
            "description": "BEGIN_POST",
            "type": "text"
        },
        {
            "name": "end_post",
            "description": "END_POST",
            "type": "text"
        },
        {
            "name": "shape_leng",
            "description": "Shape_Leng",
            "type": "text"
        }
    ]
}

zedmonds96 avatar Oct 30 '25 16:10 zedmonds96

@dafeder interested in your thoughts here. Assuming the LLM connection works well, then it should just be able to use the "tools" we provide. We could create tools to calculate aggregate values for charts or we could go as far as letting it do SQL queries.

zedmonds96 avatar Oct 30 '25 16:10 zedmonds96

@dafeder I have a couple of tools here that actually query the datastore tables. This should be enough for an LLM to create visualizations. The instructions from the module devs involve Claude Desktop and since it's not on the CA list of approved tools, I'm not really sure how to proceed with that.

Here is the current state of my plugin:


namespace Drupal\mcp\Plugin\Mcp;

use Drupal\Core\Plugin\ContainerFactoryPluginInterface;
use Drupal\Core\StringTranslation\TranslatableMarkup;
use Drupal\mcp\Attribute\Mcp;
use Drupal\mcp\Plugin\McpPluginBase;
use Drupal\mcp\ServerFeatures\Tool;
use Symfony\Component\DependencyInjection\ContainerInterface;
use Drupal\datastore\DatastoreService;
use Drupal\common\DataResource;
use Drupal\common\Storage\Query;

/**
 * MCP plugin providing DKAN datastore query tools.
 */
#[Mcp(
  id: 'dkan-plugin',
  name: new TranslatableMarkup('DKAN Plugin'),
  description: new TranslatableMarkup(
    'Provides tools to query DKAN datastore tables.'
  ),
)]
class DKANPlugin extends McpPluginBase implements ContainerFactoryPluginInterface
{

  /**
   * DKAN datastore service.
   *
   * @var \Drupal\datastore\DatastoreService
   */
  protected DatastoreService $datastoreService;

  /**
   * {@inheritDoc}
   */
  public static function create(
    ContainerInterface $container,
    array $configuration,
                       $plugin_id,
                       $plugin_definition,
  ) {
    $instance = parent::create(
      $container,
      $configuration,
      $plugin_id,
      $plugin_definition,
    );
    $datastoreService = $container->get('dkan.datastore.service');
    if (!$datastoreService instanceof DatastoreService) {
      throw new \RuntimeException('DatastoreService unavailable.');
    }
    $instance->datastoreService = $datastoreService;
    return $instance;
  }

  /**
   * {@inheritdoc}
   */
  public function getTools(): array {
    return [
      new Tool(
        name: 'datastore-summary',
        description: 'Return summary info for a datastore resource identifier.',
        inputSchema: [
          'type' => 'object',
          'properties' => [
            'identifier' => [
              'type' => 'string',
              'description' => 'Resource identifier (distribution UUID).',
            ],
          ],
          'required' => ['identifier'],
        ],
      ),
      new Tool(
        name: 'datastore-columns',
        description: 'Return column machine names (and descriptions) for a distribution UUID.',
        inputSchema: [
          'type' => 'object',
          'properties' => [
            'identifier' => [
              'type' => 'string',
              'description' => 'Distribution UUID.',
            ],
          ],
          'required' => ['identifier'],
        ],
      ),
      new Tool(
        name: 'datastore-query',
        description: 'Query datastore and return actual data rows. Perfect for creating visualizations like charts and graphs.',
        inputSchema: [
          'type' => 'object',
          'properties' => [
            'identifier' => [
              'type' => 'string',
              'description' => 'Distribution UUID.',
            ],
            'limit' => [
              'type' => 'integer',
              'description' => 'Maximum number of rows to return (default: 100, max: 500).',
              'default' => 100,
            ],
            'offset' => [
              'type' => 'integer',
              'description' => 'Number of rows to skip (default: 0).',
              'default' => 0,
            ],
            'columns' => [
              'type' => 'array',
              'description' => 'Specific columns to return (optional, returns all if omitted).',
              'items' => ['type' => 'string'],
            ],
          ],
          'required' => ['identifier'],
        ],
      ),
      new Tool(
        name: 'datastore-value-counts',
        description: 'Get frequency counts for a categorical column. Useful for bar charts, pie charts, and categorical analysis.',
        inputSchema: [
          'type' => 'object',
          'properties' => [
            'identifier' => [
              'type' => 'string',
              'description' => 'Distribution UUID.',
            ],
            'column' => [
              'type' => 'string',
              'description' => 'Column name to count values for.',
            ],
            'limit' => [
              'type' => 'integer',
              'description' => 'Maximum number of unique values to return (default: 25).',
              'default' => 25,
            ],
          ],
          'required' => ['identifier', 'column'],
        ],
      ),
      new Tool(
        name: 'datastore-stats',
        description: 'Get statistical summary for numerical columns. Useful for understanding data distribution and creating histograms.',
        inputSchema: [
          'type' => 'object',
          'properties' => [
            'identifier' => [
              'type' => 'string',
              'description' => 'Distribution UUID.',
            ],
            'columns' => [
              'type' => 'array',
              'description' => 'Column names to get statistics for (optional, analyzes all numeric columns if omitted).',
              'items' => ['type' => 'string'],
            ],
          ],
          'required' => ['identifier'],
        ],
      ),
    ];
  }

  /**
   * {@inheritdoc}
   */
  public function executeTool(string $toolId, mixed $arguments): array {
    if ($toolId === md5('datastore-summary')) {
      $identifier = $arguments['identifier'] ?? '';
      if ($identifier === '') {
        throw new \InvalidArgumentException('Identifier is required.');
      }
      try {
        $summary = $this->datastoreService->summary($identifier);
        return [['type' => 'text', 'text' => json_encode($summary)]];
      }
      catch (\Throwable $e) {
        return [['type' => 'text', 'text' => json_encode(['error' => $e->getMessage()])]];
      }
    }

    if ($toolId === md5('datastore-columns')) {
      $identifier = $arguments['identifier'] ?? '';
      if ($identifier === '') {
        throw new \InvalidArgumentException('Identifier is required.');
      }
      try {
        [$resolvedId, $version] = DataResource::getIdentifierAndVersion($identifier);
        $storage = $this->datastoreService->getStorage($resolvedId, $version);
        $schema = $storage->getSchema();
        $fields = $schema['fields'] ?? [];
        $columns = [];
        foreach ($fields as $name => $info) {
          $columns[] = [
            'name' => $name,
            'description' => $info['description'] ?? '',
            'type' => $info['type'] ?? ($info['mysql_type'] ?? ''),
          ];
        }
        return [['type' => 'text', 'text' => json_encode([
          'identifier' => $identifier,
          'resolved_identifier' => $resolvedId,
          'version' => $version,
          'columns' => $columns,
        ])]];
      }
      catch (\Throwable $e) {
        return [['type' => 'text', 'text' => json_encode(['error' => $e->getMessage()])]];
      }
    }

    if ($toolId === md5('datastore-query')) {
      $identifier = $arguments['identifier'] ?? '';
      if ($identifier === '') {
        throw new \InvalidArgumentException('Identifier is required.');
      }
      try {
        [$resolvedId, $version] = DataResource::getIdentifierAndVersion($identifier);
        $storage = $this->datastoreService->getStorage($resolvedId, $version);

        $limit = min($arguments['limit'] ?? 100, 500);
        $offset = $arguments['offset'] ?? 0;
        $requestedColumns = $arguments['columns'] ?? NULL;

        // Build query using Query object
        $query = new Query();
        $query->limit = $limit;
        $query->offset = $offset;

        if ($requestedColumns && is_array($requestedColumns)) {
          foreach ($requestedColumns as $col) {
            $query->filterByProperty($col);
          }
        }

        $results = $storage->query($query);

        return [['type' => 'text', 'text' => json_encode([
          'identifier' => $identifier,
          'count' => count($results),
          'limit' => $limit,
          'offset' => $offset,
          'data' => $results,
        ])]];
      }
      catch (\Throwable $e) {
        return [['type' => 'text', 'text' => json_encode(['error' => $e->getMessage()])]];
      }
    }

    if ($toolId === md5('datastore-value-counts')) {
      $identifier = $arguments['identifier'] ?? '';
      $column = $arguments['column'] ?? '';
      if ($identifier === '' || $column === '') {
        throw new \InvalidArgumentException('Identifier and column are required.');
      }
      try {
        [$resolvedId, $version] = DataResource::getIdentifierAndVersion($identifier);
        $storage = $this->datastoreService->getStorage($resolvedId, $version);
        $limit = $arguments['limit'] ?? 25;

        // Query for all values in that column
        $query = new Query();
        $query->filterByProperty($column);

        $results = $storage->query($query);

        // Count occurrences
        $counts = [];
        foreach ($results as $row) {
          $value = is_object($row) ? $row->{$column} : $row[$column];
          $counts[$value] = ($counts[$value] ?? 0) + 1;
        }

        // Sort by count descending
        arsort($counts);

        // Limit results
        $counts = array_slice($counts, 0, $limit, true);

        // Format for visualization
        $data = [];
        foreach ($counts as $value => $count) {
          $data[] = ['value' => $value, 'count' => $count];
        }

        return [['type' => 'text', 'text' => json_encode([
          'identifier' => $identifier,
          'column' => $column,
          'total_unique_values' => count($data),
          'data' => $data,
        ])]];
      }
      catch (\Throwable $e) {
        return [['type' => 'text', 'text' => json_encode(['error' => $e->getMessage()])]];
      }
    }

    if ($toolId === md5('datastore-stats')) {
      $identifier = $arguments['identifier'] ?? '';
      if ($identifier === '') {
        throw new \InvalidArgumentException('Identifier is required.');
      }
      try {
        [$resolvedId, $version] = DataResource::getIdentifierAndVersion($identifier);
        $storage = $this->datastoreService->getStorage($resolvedId, $version);
        $schema = $storage->getSchema();
        $fields = $schema['fields'] ?? [];

        // Determine which columns to analyze
        $columnsToAnalyze = $arguments['columns'] ?? NULL;
        if (!$columnsToAnalyze) {
          // Auto-detect numeric columns
          $columnsToAnalyze = [];
          foreach ($fields as $name => $info) {
            $type = $info['type'] ?? $info['mysql_type'] ?? '';
            if (in_array(strtolower($type), ['integer', 'int', 'float', 'double', 'decimal', 'numeric'])) {
              $columnsToAnalyze[] = $name;
            }
          }
        }

        $stats = [];
        foreach ($columnsToAnalyze as $column) {
          // Query for statistics
          $query = new Query();
          $query->filterByProperty($column);

          $results = $storage->query($query);

          $values = [];
          foreach ($results as $row) {
            $value = is_object($row) ? $row->{$column} : $row[$column];
            if (is_numeric($value)) {
              $values[] = (float) $value;
            }
          }

          if (count($values) > 0) {
            sort($values);
            $count = count($values);
            $sum = array_sum($values);
            $mean = $sum / $count;

            // Calculate median
            $median = $count % 2 === 0
              ? ($values[$count / 2 - 1] + $values[$count / 2]) / 2
              : $values[floor($count / 2)];

            // Calculate standard deviation
            $variance = array_sum(array_map(fn($x) => pow($x - $mean, 2), $values)) / $count;
            $stddev = sqrt($variance);

            $stats[$column] = [
              'count' => $count,
              'min' => min($values),
              'max' => max($values),
              'mean' => round($mean, 2),
              'median' => round($median, 2),
              'stddev' => round($stddev, 2),
              'sum' => round($sum, 2),
            ];
          }
        }

        return [['type' => 'text', 'text' => json_encode([
          'identifier' => $identifier,
          'statistics' => $stats,
        ])]];
      }
      catch (\Throwable $e) {
        return [['type' => 'text', 'text' => json_encode(['error' => $e->getMessage()])]];
      }
    }

    throw new \InvalidArgumentException('Tool not found');
  }

}

Testing the new tools:

ddev drush eval "\$pm = \Drupal::service('plugin.manager.mcp'); \$p = \$pm->createInstance('dkan-plugin'); \$result = \$p->executeTool(md5('datastore-query'), ['identifier' => '74d7d9e2-cf61-503b-87f4-2250ac3f3cad', 'limit' => 3]); \$data = json_decode(\$result[0]['text'], TRUE); echo json_encode(\$data, JSON_PRETTY_PRINT) . PHP_EOL;"
{
    "identifier": "74d7d9e2-cf61-503b-87f4-2250ac3f3cad",
    "count": 3,
    "limit": 3,
    "offset": 0,
    "data": [
        {
            "record_number": "1",
            "objectid": "2049",
            "roadway": "75000403",
            "road_side": "R",
            "lncd": "1",
            "descr": "DESIGNATED",
            "begin_post": "0.076",
            "end_post": "0.364",
            "shape_leng": "463.2487"
        },
        {
            "record_number": "2",
            "objectid": "2050",
            "roadway": "88070000",
            "road_side": "R",
            "lncd": "1",
            "descr": "DESIGNATED",
            "begin_post": "0.073",
            "end_post": "5.21",
            "shape_leng": "8267.4337"
        },
        {
            "record_number": "3",
            "objectid": "2051",
            "roadway": "89010000",
            "road_side": "R",
            "lncd": "1",
            "descr": "DESIGNATED",
            "begin_post": "11.266",
            "end_post": "14.64",
            "shape_leng": "5430.1665"
        }
    ]
}
ddev drush eval "\$pm = \Drupal::service('plugin.manager.mcp'); \$p = \$pm->createInstance('dkan-plugin'); \$result = \$p->executeTool(md5('datastore-value-counts'), ['identifier' => '74d7d9e2-cf61-503b-87f4-2250ac3f3cad', 'column' => 'road_side']); \$data = json_decode(\$result[0]['text'], TRUE); echo json_encode(\$data, JSON_PRETTY_PRINT) . PHP_EOL;"
{
    "identifier": "74d7d9e2-cf61-503b-87f4-2250ac3f3cad",
    "column": "road_side",
    "total_unique_values": 3,
    "data": [
        {
            "value": "R",
            "count": 1405
        },
        {
            "value": "L",
            "count": 1369
        },
        {
            "value": "C",
            "count": 195
        }
    ]
}
ddev drush eval "\$pm = \Drupal::service('plugin.manager.mcp'); \$p = \$pm->createInstance('dkan-plugin'); \$result = \$p->executeTool(md5('datastore-stats'), ['identifier' => '74d7d9e2-cf61-503b-87f4-2250ac3f3cad', 'columns' => ['shape_leng']]); \$data = json_decode(\$result[0]['text'], TRUE); echo json_encode(\$data, JSON_PRETTY_PRINT) . PHP_EOL;"
{
    "identifier": "74d7d9e2-cf61-503b-87f4-2250ac3f3cad",
    "statistics": {
        "shape_leng": {
            "count": 2969,
            "min": 6.5931,
            "max": 28741.0863,
            "mean": 1797.07,
            "median": 696.19,
            "stddev": 2934.1,
            "sum": 5335506.42
        }
    }
}

zedmonds96 avatar Dec 08 '25 15:12 zedmonds96

@zedmonds96 thanks, maybe we can all look at this together sometimes soon. In the meantime can we get this into a git repo somewhere?

dafeder avatar Dec 08 '25 15:12 dafeder

@dafeder where should I put it?

zedmonds96 avatar Dec 08 '25 15:12 zedmonds96