terraform-provider-vsphere icon indicating copy to clipboard operation
terraform-provider-vsphere copied to clipboard

Failed to query disk type

Open pryorda opened this issue 4 years ago • 24 comments

Terraform Version

0.12.28

vSphere Provider Version

1.22.0+

vSphere Version

6.5

Affected Resource(s)

vsphere_virtual_disk

Terraform Configuration Files

locals {
  host_basename = coalesce(
    var.hostname_override,
    "${var.environment}-${var.application}-${var.component}",
  )
  role = "${var.application}-${var.component}"
}

data "vsphere_datacenter" "datacenter" {
  name = var.vsphere_datacenter
}

data "vsphere_compute_cluster" "cluster" {
  name          = var.vsphere_cluster
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_datastore" "datastore" {
  name          = var.vsphere_datastore
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_resource_pool" "pool" {
  name          = "${var.vsphere_cluster}/Resources"
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_network" "network" {
  name          = var.vsphere_network_label
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_virtual_machine" "template" {
  name          = var.vsphere_template
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

# DNS for instances
module "instance-dns" {
  source = "../instance_dns"

  instance_count    = var.instance_count
  service_provider  = "op"
  ttl               = var.vsphere_network_ipv4_addresses != "" ? 900 : 60
  region            = lower(var.region)
  create_regionless = false
  hostname          = local.host_basename
  ips               = vsphere_virtual_machine.instance.*.default_ip_address
  allow_overwrite   = var.allow_dns_overwrite
}

# Instance Resource
resource "vsphere_virtual_machine" "instance" {
  depends_on = [vsphere_virtual_disk.data_disk]

  count            = var.instance_count
  name             = "${lower(local.host_basename)}${count.index + 1}.${lower(var.region)}"
  resource_pool_id = data.vsphere_resource_pool.pool.id
  datastore_id     = data.vsphere_datastore.datastore.id
  guest_id         = data.vsphere_virtual_machine.template.guest_id
  scsi_type        = data.vsphere_virtual_machine.template.scsi_type

  folder = var.vsphere_folder_path

  num_cpus           = var.vsphere_vcpu
  memory             = var.vsphere_memory
  memory_reservation = var.vsphere_reserved_memory

  enable_disk_uuid           = true
  wait_for_guest_net_timeout = var.vsphere_network_ipv4_addresses != "" ? 5 : 180
  sync_time_with_host        = var.sync_time_with_host

  # Prevent attributes from going null in 0.12
  custom_attributes = {}
  extra_config      = {}
  tags              = []

  network_interface {
    network_id     = data.vsphere_network.network.id
    adapter_type   = data.vsphere_virtual_machine.template.network_interface_types[0]
    mac_address    = var.static_macs != "" ? element(split(",", var.static_macs), count.index) : ""
    use_static_mac = var.static_macs == "" ? false : true
  }

  // This doesn't actually work and is a work around for customize spec.
  cdrom {
    datastore_id = data.vsphere_datastore.datastore.id
    path         = "ISOs/os-livecd.iso"
  }

  disk {
    path             = "${lower(local.host_basename)}${count.index + 1}.${lower(var.region)}.vmdk"
    label            = "disk0"
    size             = var.root_disk_size
    eagerly_scrub    = data.vsphere_virtual_machine.template.disks[0].eagerly_scrub
    thin_provisioned = data.vsphere_virtual_machine.template.disks[0].thin_provisioned
  }

  dynamic "disk" {
    for_each = flatten(length(vsphere_virtual_disk.data_disk) > 0 ? [element(vsphere_virtual_disk.data_disk, count.index)] : [])

    content {
      path         = disk.value.vmdk_path
      label        = "disk1"
      attach       = true
      unit_number  = 1
      datastore_id = data.vsphere_datastore.datastore.id
    }
  }

  clone {
    template_uuid = data.vsphere_virtual_machine.template.id

    customize {
      dns_suffix_list = concat(["${lower(var.region)}.${lower(var.service_provider)}.example.com"], split(",", var.vsphere_network_domain_search))
      dns_server_list = split(
        ",",
        var.vsphere_network_ipv4_addresses != "" ? var.vsphere_network_domain_resolvers : "",
      )

      linux_options {
        host_name = "${lower(local.host_basename)}${count.index + 1}"
        domain    = "${lower(var.region)}.${lower(var.service_provider)}.example.com"
        time_zone = var.vsphere_cluster_timezone
      }

      network_interface {
        ipv4_address = element(split(",", var.vsphere_network_ipv4_addresses), count.index)
        ipv4_netmask = var.vsphere_network_ipv4_prefix_length
      }

      ipv4_gateway = var.vsphere_network_ipv4_gateway
    }
  }

  lifecycle {
    ignore_changes = [
      disk,
      clone,
      poweron_timeout,
      ide_controller_count,
      sata_controller_count,
    ]
  }

  provisioner "local-exec" {
    when = destroy

    interpreter = ["bash", "-c"]

    command = <<EOT
...
EOT

  }
}

resource "vsphere_virtual_disk" "data_disk" {
  count      = var.data_disk_size > 0 ? var.instance_count : 0
  size       = var.data_disk_size
  vmdk_path  = "${lower(local.host_basename)}${count.index + 1}.${lower(var.region)}.data_disk.vmdk"
  datacenter = var.vsphere_datacenter
  datastore  = var.vsphere_datastore
  type       = "thin"

  lifecycle {
    prevent_destroy = false
  }
}

# Create Virtual Machine Anti-Affinity Rules
resource "vsphere_compute_cluster_vm_anti_affinity_rule" "cluster_vm_anti_affinity_rule" {
  count               = var.instance_count > 0 ? 1 : 0
  name                = "${lower(local.host_basename)}.${lower(var.region)}.${lower(var.service_provider)}"
  compute_cluster_id  = data.vsphere_compute_cluster.cluster.id
  virtual_machine_ids = vsphere_virtual_machine.instance.*.id
}

# Fun hack explained here https://github.com/hashicorp/terraform/issues/16580#issuecomment-342573652
output "instance_ids" {
  value = vsphere_virtual_machine.instance.*.uuid
}

output "instances_dns" {
  value = formatlist(
    "%s.%s",
    concat(vsphere_virtual_machine.instance.*.name, [""]),
    "int.example.com",
  )
}

output "instance_private_ips" {
  value      = vsphere_virtual_machine.instance.*.default_ip_address
  depends_on = [vsphere_virtual_machine.instance]
}

Debug Output

Panic Output

na

Expected Behavior

Apply successfully like in 1.21.0

Actual Behavior

Dies with failed to query disk type

Steps to Reproduce

terraform apply

References

Bug was introduced with https://github.com/hashicorp/terraform-provider-vsphere/commit/4fd6f8e89ec3cf9320554503689a60e464fb8bd4

Community Note

  • Please vote on this issue by adding a 👍 reaction to the original issue to help the community and maintainers prioritize this request
  • Please do not leave "+1" or other comments that do not add relevant new information or questions, they generate extra noise for issue followers and do not help prioritize the request
  • If you are interested in working on this issue or have submitted a pull request, please leave a comment

pryorda avatar Aug 31 '20 03:08 pryorda

@skevir @bill-rich I added some logging locally and I'm getting a LocalizedMethodFault. What was the purpose of adding the type in the disk read?

pryorda avatar Aug 31 '20 06:08 pryorda

Looks like the issue is specific to 6.5. Is there anyone else who can try?

pryorda avatar Aug 31 '20 23:08 pryorda

Hi @pryorda! Can you please include the debug log that includes the error you're running into?

bill-rich avatar Sep 02 '20 18:09 bill-rich

I can't at this time. I can only provide the line that its falling into and the error added by the statement I added. It appears 6.5 does not give that attribute back.

pryorda avatar Sep 02 '20 19:09 pryorda

Would it get approved if I did a pull request to make it warn if the attribute doesn't exist rather then dying?

pryorda avatar Sep 08 '20 19:09 pryorda

@pryorda Yes. That would be great. It was added as part of supporting importing disk resources, but it isn't necessary outside of that case.

bill-rich avatar Sep 22 '20 17:09 bill-rich

@bill-rich could you please let us know when will a fix be provided for this

anupugalavat avatar Oct 15 '20 10:10 anupugalavat

@bill-rich @pryorda @anupugalavat Are you guys able to review my colleague's fix for this in #1447 ? Our work-around at the moment is to pin the vsphere provider to 1.21.0.

prologic avatar Sep 27 '21 03:09 prologic

@appilon, the change in 1447 seems pretty innocuous to me to log a warning rather than error/fail. What are your thought?

Ryan

tenthirtyam avatar Jan 21 '22 03:01 tenthirtyam

I took a quick look at the linked PR @tenthirtyam , I'm not sure it's appropriate to exit early if no disk type can be read, why is it that this attribute fails, do disks not always have a disk type? I worry merging that PR will cause state drift.

appilon avatar Jan 24 '22 20:01 appilon

Good point, @appilon - I'll add this to my list of those to review.

Ryan

tenthirtyam avatar Feb 09 '22 23:02 tenthirtyam

Do you have a simplified Terraform example that experienced this issue - I could use it for testing/reproduction purposes.

Has this issue been experienced in vSphere 7.0?

Ryan Johnson Senior Staff Solutions Architect | Product Engineering @ VMware, Inc.

tenthirtyam avatar Aug 16 '22 05:08 tenthirtyam

@tenthirtyam Sincere apologies I haven't had the chance to go back to my team and try to get a repro for this. Will do as soon as I can! 👌

prologic avatar Aug 17 '22 01:08 prologic

@tenthirtyam Tried to cut this down to a simple single file as we use multiple modules.

It's an issue on 6.5, 6.7, 7.0.

Only solution for us is to pin provider version to 1.21.0.

terraform {
  required_version = ">= 0.13"
  required_providers {
    vsphere = {
      source  = "hashicorp/vsphere"
      #version = "1.21.0" # last working version
    }
  }
}

provider "vsphere" {
  user           = var.vsphere_user
  password       = var.vsphere_password
  vsphere_server = var.vsphere_server
}

variable "environment" {
  default = "test"
}
variable "swarm_count" {
  default = "1"
}
variable "disk_size" {
  default = "1"
}
variable "additional_disks" {
  default = "3"
}
variable "additional_disks_size" {
  default = "1"
}
variable "vsphere_server" {
  default = ""
}
variable "vsphere_datacenter" {
  default = ""
}
variable "vsphere_datastore" {
  default = ""
}
variable "vsphere_resource_pool" {
  default = ""
}
variable "vsphere_network" {
  default = ""
}
variable "vsphere_cluster" {
  default = ""
}
variable "vsphere_template" {
  default = ""
}
variable "vsphere_folder_path" {
  default = ""
}

locals {
  node_name = "1191repro"
  disk-list = flatten([
    for disk in range(1, var.additional_disks + 1, 1) : [
      for node in range(1, var.swarm_count + 1, 1) : {
        node = node
        disk = disk
      }
    ]
  ])
}


data "vsphere_datacenter" "dc" {
  name = var.vsphere_datacenter
}
data "vsphere_datastore" "datastore" {
  name          = var.vsphere_datastore
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_resource_pool" "pool" {
  name          = var.vsphere_resource_pool
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_network" "network" {
  name          = var.vsphere_network
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_compute_cluster" "cluster" {
  name          = var.vsphere_cluster
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_virtual_machine" "template" {
  name          = "${var.vsphere_template}-${var.environment}"
  datacenter_id = data.vsphere_datacenter.dc.id
}

resource "vsphere_virtual_machine" "vm" {
  count            = var.swarm_count
  depends_on       = [time_sleep.disk_wait, vsphere_virtual_disk.additional_disk]
  name             = "1191repro"
  resource_pool_id = data.vsphere_resource_pool.pool.id
  datastore_id     = data.vsphere_datastore.datastore.id
  folder           = var.vsphere_folder_path

  num_cpus                   = 1
  memory                     = 1024
  guest_id                   = data.vsphere_virtual_machine.template.guest_id
  firmware                   = data.vsphere_virtual_machine.template.firmware
  scsi_type                  = data.vsphere_virtual_machine.template.scsi_type
  sync_time_with_host        = true
  wait_for_guest_ip_timeout  = 300
  shutdown_wait_timeout      = 1
  force_power_off            = true

  disk {
    label            = "bootdisk"
    size             = data.vsphere_virtual_machine.template.disks.0.size > var.disk_size ? data.vsphere_virtual_machine.template.disks.0.size : var.disk_size
    eagerly_scrub    = data.vsphere_virtual_machine.template.disks.0.eagerly_scrub
    thin_provisioned = data.vsphere_virtual_machine.template.disks.0.thin_provisioned
  }

  dynamic "disk" {
    for_each = range(var.additional_disks)

    content {
      path         = "test/${var.environment}/${local.node_name}${count.index + 1}/disk${local.disk-list[(disk.value * var.swarm_count)].disk}.vmdk"
      label        = "${local.node_name}${count.index + 1}-disk${local.disk-list[(disk.value * var.swarm_count)].disk}"
      attach       = true
      unit_number  = local.disk-list[(disk.value * var.swarm_count)].disk
      datastore_id = data.vsphere_datastore.datastore.id
    }
  }

  network_interface {
    network_id = data.vsphere_network.network.id
  }

  clone {
    template_uuid = data.vsphere_virtual_machine.template.id
  }
}

resource "time_sleep" "disk_wait" {
  depends_on       = [vsphere_virtual_disk.additional_disk]
  destroy_duration = "20s"
}

resource "vsphere_virtual_disk" "additional_disk" {
  count              = var.additional_disks
  size               = var.additional_disks_size
  vmdk_path          = "test/${var.environment}/${local.node_name}${local.disk-list[count.index].node}/disk${local.disk-list[count.index].disk}.vmdk"
  datacenter         = var.vsphere_datacenter
  datastore          = var.vsphere_datastore
  type               = "thin"
  create_directories = true
}

deebsman avatar Sep 06 '22 06:09 deebsman

Thanks for sharing, @deebsman.

@appilon and I can use this example for further investigation when time permits.

Ryan Johnson Senior Staff Solutions Architect | Product Engineering @ VMware, Inc.

tenthirtyam avatar Sep 06 '22 12:09 tenthirtyam

I'll try to reproduce with the simplified config and debug the issue.

dimitarproynov avatar Jan 05 '23 14:01 dimitarproynov

Thanks, Dimitar! I've reassigned the issue to you during your investigation.

tenthirtyam avatar Jan 05 '23 14:01 tenthirtyam

Hi all,

I've passed with a debugger thru the code mentioned at 4fd6f8e (vdm.QueryVirtualDiskInfo). There is a call for each additional disk in the above .tf snippet. In all cases the vdm.QueryVirtualDiskInfo returned without errors on both 6.7 and 7.0 (latest patch versions).

That being said the virtual machine creation "vm" from the above snippet is stuck in a loop. The vSphere UI shows that the VM has been created and the vim task has completed, but the govmomi code is stuck at virtualmachine.WaitForGuestIP. I assume the issue is in my testbed.

Does this bug reproduce after the VM (from the short snippet above) is cloned or before that?

Regards, _Proynov

dimitarproynov avatar Jan 06 '23 15:01 dimitarproynov

I've circumvented the WaitForGuestIP and WaitForGuestNet, alas the "terraform apply" succeeds on both 6.7 and 7.0 latest patch.

dimitarproynov avatar Jan 09 '23 09:01 dimitarproynov

Added the label not-reproduced based on Dimitar’s testing using the sample provided previously in the issue.

tenthirtyam avatar Jan 09 '23 14:01 tenthirtyam

@deebsman there has to be something more to it. Maybe some particularity in your environment that contributes to the issue being reproduced.

My testing environment was the following:

VC (6.7 and 7.0.3 latest)

  • Datacenter
    • DRS Cluster
      • Single ESXi Host with a Single VMFS 6 Datastore attached

Your sample clones a VM from template and adds additional disks to it. Maybe there is something special in the VM Template you've used to reproduce the issue.

Regards, _Proynov

dimitarproynov avatar Jan 09 '23 14:01 dimitarproynov

Are there any error statements in the vcenter logs?

You can find them via ssh'ing and looking up the /var/log/vmware/vpxd/vpxd-X.log file.

dimitarproynov avatar Jan 09 '23 15:01 dimitarproynov

@dimitarproynov This appears to be an issue when vsphere_virtual_disk.additional_disk.datastore is a path rather than a name.

resourceVSphereVirtualDiskCreate() quite happily accepts a datastore path because it calls getDatastore() which splits the path and searches by parent/child to find the datastore, and so the disks are created.

resourceVSphereVirtualDiskRead() however passes the datastore as specified to QueryDiskType() which doesn't differentiate between path and name and ends up erroring with Invalid datastore path '[ds cluster/ds name] path/to/file.vmdk'

It works when datastore is a name.

I'm not sure if there's a bug here as the documentation for vsphere_virtual_disk does say that datastore should be set to the name of the datastore. It is a bit inconsistent though as the vsphere_datastore data source accepts a path or a name.

mostuff avatar Feb 15 '23 02:02 mostuff

This issue has begun to happen to me on 2.7.0 against VCenter 7.0.3. It does not happen under 2.6.1. I am definitely passing the datastore by name and the path separately, see below:

# Optional secondary disk
resource "vsphere_virtual_disk" "dual" {

  count = var.vmware_dual_disk.provision ? 1 : 0

  datastore          = var.vmware_dual_disk.datastore_name
  vmdk_path          = "/dual_disks/${var.host.hostname}-dual.vmdk"
  create_directories = true
  type               = "thin"
  size               = var.vmware_dual_disk.size

  # This disk is precious!
  lifecycle {
    prevent_destroy = false
  }
}

Happy to help debug further if desired.

mike-sol avatar Mar 20 '24 23:03 mike-sol