Miniconda env set up error
Describe the bug
I used the exact same code to follow along with the PyTorch Quickstart tutorial at https://flower.dev/docs/framework/quickstart-pytorch.html I get an error saying DNS resolution failed for 127:0:01:8080: UNAVAILABLE: WSA Error
Steps/Code to Reproduce
client.py
from collections import OrderedDict
import torch import torch.nn as nn import torch.nn.functional as F import torchvision.transforms as transforms from torch.utils.data import DataLoader from torchvision.datasets import CIFAR10
import flwr as fl
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Load CIFAR-10
def load_data(): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = CIFAR10(".", train=True, download=True, transform=transform) testset = CIFAR10(".", train=False, download=True, transform=transform) trainloader = DataLoader(trainset, batch_size=32, shuffle=True) testloader = DataLoader(testset, batch_size=32) num_examples = {"trainset" : len(trainset), "testset" : len(testset)} return trainloader, testloader, num_examples
Train network on training set
def train(net, trainloader, epochs): criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9) for _ in range(epochs): for images, labels in trainloader: images, labels = images.to(DEVICE), labels.to(DEVICE) optimizer.zero_grad() loss = criterion(net(images), labels) loss.backward() optimizer.step()
Validate the network on the entire test set
def test(net, testloader): criterion = torch.nn.CrossEntropyLoss() correct, total, loss = 0, 0, 0.0 with torch.no_grad(): for data in testloader: images, labels = data[0].to(DEVICE), data[1].to(DEVICE) outputs = net(images) loss += criterion(outputs, labels).item() _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() accuracy = correct / total return loss, accuracy
model
class Net(nn.Module): def init(self) -> None: super(Net, self).init() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
Load model and data
net = Net().to(DEVICE) trainloader, testloader, num_examples = load_data()
class CifarClient(fl.client.NumPyClient): def get_parameters(self, config): return [val.cpu().numpy() for _, val in net.state_dict().items()]
def set_parameters(self, parameters):
params_dict = zip(net.state_dict().keys(), parameters)
state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
net.load_state_dict(state_dict, strict=True)
def fit(self, parameters, config):
self.set_parameters(parameters)
train(net, trainloader, epochs=1)
return self.get_parameters(config={}), num_examples["trainset"], {}
def evaluate(self, parameters, config):
self.set_parameters(parameters)
loss, accuracy = test(net, testloader)
return float(loss), num_examples["testset"], {"accuracy": float(accuracy)}
fl.client.start_numpy_client( server_address="127:0:01:8080", client=CifarClient() )
-------------
server.py
import flwr as fl
strategy = fl.server.strategy.FedAvg()
fl.server.start_server( server_address="0.0.0.0:8080", config=fl.server.ServerConfig(num_rounds=3), strategy=strategy, )
Expected Results
Once I run the server.py and then the client.py, as per my understanding the model should start training at the client
Actual Results
I get the following error
Files already downloaded and verified
Files already downloaded and verified
INFO flwr 2023-07-28 19:47:54,862 | grpc.py:50 | Opened insecure gRPC connection (no certificates were passed)
DEBUG flwr 2023-07-28 19:47:54,866 | connection.py:39 | ChannelConnectivity.IDLE
DEBUG flwr 2023-07-28 19:47:54,886 | connection.py:39 | ChannelConnectivity.TRANSIENT_FAILURE
DEBUG flwr 2023-07-28 19:47:55,091 | connection.py:113 | gRPC channel closed
Traceback (most recent call last):
File "C:\Users\nisha\Documents\GitHub\NLP_project\flower-cifar-10\client.py", line 104, in <module>
fl.client.start_numpy_client(server_address="127:0:01:8080", client=CifarClient())
File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\flwr\client\app.py", line 252, in start_numpy_client
start_client(
File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\flwr\client\app.py", line 174, in start_client
server_message = receive()
File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\flwr\client\grpc_client\connection.py", line 105, in <lambda>
receive: Callable[[], ServerMessage] = lambda: next(server_message_iterator)
File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\grpc\_channel.py", line 475, in __next__
return self._next()
File "C:\Users\nisha\miniconda3\envs\snakes\lib\site-packages\grpc\_channel.py", line 864, in _next
raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "DNS resolution failed for 127:0:01:8080: UNAVAILABLE: WSA Error"
debug_error_string = "UNKNOWN:DNS resolution failed for 127:0:01:8080: UNAVAILABLE: WSA Error {created_time:"2023-07-28T14:17:54.8746447+00:00", grpc_status:14}"
>```
Hi @dr4g0n7ly , are you still seeing this error? if so, could you tell us what platform do you use (windows, linux, mac)
Hi @dr4g0n7ly , are you still seeing this error? if so, could you tell us what platform do you use (windows, linux, mac)
look like windows,i think you shoud check print of your server.py @dr4g0n7ly