function [gradients,loss]= ModelD(k,M,Validate_train,params,NN_layer)
Attentionweight  = stripdims(squeeze(params.attention.weight)); 
weight_out = stripdims(squeeze(params.attention.output_weight));
bias = stripdims(squeeze(params.attention.bias));
Validate_train =(Validate_train(:,:));
validate_data_in = Validate_train(randperm(size(Validate_train,1)),:);
Validate_train_x = validate_data_in (:,1:3);
Validate_train_y = validate_data_in (:,4:end);
A_zero= zeros(size(Validate_train_y,1),1);
Validate_train_y = [Validate_train_y, A_zero];
Validate_data_x = [];
for i =1:k
    for j= 1:NN_layer
        Validate_data_x (i,j) = Validate_train_x(j);
        Validate_train_x(j) = Validate_train_x(j+3);
    end
end
y_in = Validate_train_y(1:M,:);
Index =randi([1,M],1,1);
X_in = Validate_data_x(Index,:);
Y_in = repmat(y_in(Index,:),11);
for i= 1:NN_layer
        h = X_in(i);
        ht = Y_in(1,i);
        A = (Attentionweight(i)).*h;
        B = (weight_out)*ht;
        C = (bias(i));
        score(i) =  tanh( A + B + C) ; 
end
score =score';
score = dlarray(score,'CB');
a = softmax(score);
Vt = [];
for i = 1:NN_layer
    AA =  a(i)* X_in(i);
    Vt = [Vt AA];
end
Vt = dlarray(Vt,'CB');
loss = mse(Vt,X_in); 
gradients = dlgradient(loss,params);  
 
end