flower icon indicating copy to clipboard operation
flower copied to clipboard

Miniconda env set up error

Open dr4g0n7ly opened this issue 2 years ago • 2 comments

Describe the bug

I used the exact same code to follow along with the PyTorch Quickstart tutorial at https://flower.dev/docs/framework/quickstart-pytorch.html I get an error saying DNS resolution failed for 127:0:01:8080: UNAVAILABLE: WSA Error

Steps/Code to Reproduce

client.py

from collections import OrderedDict

import torch import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as transforms from torch.utils.data import DataLoader from torchvision.datasets import CIFAR10

import flwr as fl

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Load CIFAR-10

def load_data(): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = CIFAR10(".", train=True, download=True, transform=transform) testset = CIFAR10(".", train=False, download=True, transform=transform) trainloader = DataLoader(trainset, batch_size=32, shuffle=True) testloader = DataLoader(testset, batch_size=32) num_examples = {"trainset" : len(trainset), "testset" : len(testset)} return trainloader, testloader, num_examples

Train network on training set

def train(net, trainloader, epochs): criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for _ in range(epochs): for images, labels in trainloader: images, labels = images.to(DEVICE), labels.to(DEVICE) optimizer.zero_grad() loss = criterion(net(images), labels) loss.backward() optimizer.step()

Validate the network on the entire test set

def test(net, testloader): criterion = torch.nn.CrossEntropyLoss() correct, total, loss = 0, 0, 0.0 with torch.no_grad(): for data in testloader: images, labels = data[0].to(DEVICE), data[1].to(DEVICE) outputs = net(images) loss += criterion(outputs, labels).item() _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = correct / total return loss, accuracy

model

class Net(nn.Module): def init(self) -> None: super(Net, self).init() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10)

def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(-1, 16 * 5 * 5)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

Load model and data

net = Net().to(DEVICE) trainloader, testloader, num_examples = load_data()

class CifarClient(fl.client.NumPyClient): def get_parameters(self, config): return [val.cpu().numpy() for _, val in net.state_dict().items()]

def set_parameters(self, parameters):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)

def fit(self, parameters, config):
    self.set_parameters(parameters)
    train(net, trainloader, epochs=1)
    return self.get_parameters(config={}), num_examples["trainset"], {}

def evaluate(self, parameters, config):
    self.set_parameters(parameters)
    loss, accuracy = test(net, testloader)
    return float(loss), num_examples["testset"], {"accuracy": float(accuracy)}

fl.client.start_numpy_client( server_address="127:0:01:8080", client=CifarClient() )

-------------

server.py

import flwr as fl

strategy = fl.server.strategy.FedAvg()

fl.server.start_server( server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3), strategy=strategy, )

Expected Results

Once I run the server.py and then the client.py, as per my understanding the model should start training at the client

Actual Results

I get the following error

Files already downloaded and verified
Files already downloaded and verified
INFO flwr 2023-07-28 19:47:54,862 | grpc.py:50 | Opened insecure gRPC connection (no certificates were passed)
DEBUG flwr 2023-07-28 19:47:54,866 | connection.py:39 | ChannelConnectivity.IDLE
DEBUG flwr 2023-07-28 19:47:54,886 | connection.py:39 | ChannelConnectivity.TRANSIENT_FAILURE
DEBUG flwr 2023-07-28 19:47:55,091 | connection.py:113 | gRPC channel closed
Traceback (most recent call last):
  File "C:\Users\nisha\Documents\GitHub\NLP_project\flower-cifar-10\client.py", line 104, in <module>
    fl.client.start_numpy_client(server_address="127:0:01:8080", client=CifarClient())
  File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\flwr\client\app.py", line 252, in start_numpy_client
    start_client(
  File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\flwr\client\app.py", line 174, in start_client
    server_message = receive()
  File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\flwr\client\grpc_client\connection.py", line 105, in <lambda>
    receive: Callable[[], ServerMessage] = lambda: next(server_message_iterator)
  File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\grpc\_channel.py", line 475, in __next__
    return self._next()
  File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\grpc\_channel.py", line 864, in _next
    raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
        status = StatusCode.UNAVAILABLE
        details = "DNS resolution failed for 127:0:01:8080: UNAVAILABLE: WSA Error"
        debug_error_string = "UNKNOWN:DNS resolution failed for 127:0:01:8080: UNAVAILABLE: WSA Error {created_time:"2023-07-28T14:17:54.8746447+00:00", grpc_status:14}"
>```

dr4g0n7ly avatar Jul 28 '23 14:07 dr4g0n7ly

Hi @dr4g0n7ly , are you still seeing this error? if so, could you tell us what platform do you use (windows, linux, mac)

jafermarq avatar Sep 20 '23 20:09 jafermarq

Hi @dr4g0n7ly , are you still seeing this error? if so, could you tell us what platform do you use (windows, linux, mac)

look like windows,i think you shoud check print of your server.py @dr4g0n7ly

helin0815 avatar Jan 09 '24 07:01 helin0815