# Python numpy 小测试:均值标准化和数据分离 (十七)

TODO：

# import NumPy into Python
import numpy as np

# Create a 1000 x 20 ndarray with random integers in the half-open interval [0, 5001).
# numpy.random.randint(low, high=None, size=None, dtype='l')
X = np.random.randint(0, 5001, 20000).reshape(1000, 20)

# print the shape of X
print(X.shape)

(1000, 20)

$\mbox{Norm_Col}_i = \frac{\mbox{Col}_i - \mu_i}{\sigma_i}$

# Average of the values in each column of X
ave_cols = X.mean(axis=0)

# Standard Deviation of the values in each column of X
std_cols = X.std(axis=0)

# Print the shape of ave_cols
print(ave_cols.shape)

# Print the shape of std_cols
print(std_cols.shape)
(20,)
(20,)

# Mean normalize X
X_norm = (X - ave_cols) / std_cols
print(X_norm)
[[-1.09244009  1.20786002  0.76287604 ...,  0.64526733 -1.41998318
1.17164336]
[ 0.84061898  0.11577736  0.53107517 ...,  0.30592195  1.40700164
-0.74825693]
[ 0.69818305  0.57847736  0.71720781 ..., -1.68652026 -0.8152398
0.0664558 ]
...,
[ 0.34875894 -0.90714237 -1.55374878 ..., -0.66917664 -0.20564739
-1.45162098]
[-1.2025999   0.84198812  1.07494229 ..., -1.66920672 -1.12280688
-0.81050154]
[ 0.5543438  -0.34208123 -1.3620806  ..., -1.44066799  0.23907799
-1.45092937]]

# Print the average of all the values of X_norm
print(X_norm.mean())

# Print the minimum value of each column of X_norm
print(X_norm.min(axis=0))

# Print the maximum value of each column of X_norm
print(X_norm.max(axis=0))
3.90798504668e-18
[-1.72112282 -1.72810634 -1.75510417 -1.73180046 -1.70567088 -1.75546874
-1.75310448 -1.67458958 -1.73767477 -1.69207709 -1.68425496 -1.77073212
-1.69909456 -1.75145458 -1.65293994 -1.70000118 -1.68762343 -1.68652026
-1.70746142 -1.73379653]
[ 1.77802983  1.72104895  1.70045867  1.75054243  1.78609535  1.7104575
1.78341753  1.76788895  1.72197691  1.73202726  1.75850722  1.75033426
1.77507248  1.70611506  1.77887509  1.7676166   1.72273456  1.7748027
1.75197552  1.71732107]

1. 训练集
2. 交叉验证集
3. 测试集

# We create a random permutation of integers 0 to 4
np.random.permutation(5)
array([4, 3, 2, 1, 0])

TODO

# Create a rank 1 ndarray that contains a random permutation of the row indices of X_norm
row_indices = np.random.permutation(X_norm.shape[0])
print(row_indices)
[530 679 250 649 202 527 682 949 842 881 492 699 632 835 713 897 210 257
538 558 578 124 989 891 404 182 219 268  78 651 197 355 818 624 609 254
564 768 581 893 741 864 161 328 321 878 231 361 496 147 409 695 267 610
614  52 358 498 307 980 518 479 541 179   6 345 805 526 240  85 489 226
336 770 617 573 340 819  45 326 142 915  91 569 921 439 999 509 674 505
929 484 750 661 174 352 707 575 583 930 613 430 169 333 303  56  13  51
554 400 625 105 686 136 593 664  34 867 180 415 923 337 368 760 884 706
252 673 410 888 418 535 467 716 435 608 965 408 633 168 874 870 461 759
184 576 623 722 844 663 605 401 887 979 104 683 388 802 150 454 140 414
931 906 175  82 351 698 851 832 718 764  39 742  77 487 520 382 546 285
781 503 705 725 803 292 570 766 289 745 288 577 671 207 841 847 282 667
325 531  18 843 737 259 757 916 753 957  17 974 588 411  19   1 919 762
40 937 260 291 265 459 810 629 701 171 712 774 752 971 274 442 655 245
592 536 869  70 700 596 122 488  48  60 880 471 543 349 947 183 675 112
582 115   3 668 200 909 448 814 364  10 338 696  74 273 374 922 493  96
542  42 788 853 830 680 211  32 904 907  93 598  31 346 263 305  98  65
604 872 836  12  83 628 627 378 348 331 302  69 146  73 306 777 453  76
761 643 652 158 812 779 821 963 775 334 902 561 726 466 396  92 665 983
826 206 678 417 464  64 397 217 478 490 507 905 297  59 405 723  37 772
139 936 253 984 703 129 375   4 380 165 729 463 116 982 602 152 377 132
248 137 143 425 831 446 630 839 135 603 615 403 121 955 744 545 589 871
234  90 758 813   7 166 857 271 634 890 236 861 877  26  58  75 519 892
424 407 647 786 251 106 362 223 350 903 549 393 738 773 782 657 754 188
392 185 616 650 172 269 243 384 485  66 279  62 228 315 555 889 981 833
255 160 935 310 811 765 480 552 433 262 914  79 164 693 800 736 763 823
817  35 571 621 514 943 568 798 797 739  24 390 324 294 399 908 208 563
895 117 125 344 449 689 456 304 477 529  28 296 865 500 511 584 804 196
43 422 502 917 956 946 214 925 550 134 825 708 110 994 357 975 123 720
222 920 286 879 783 828 317 419 913 434 327 850 157 365 952 229 416 181
472 437 198 443  55  20 684 145 749 962 339 221 677 423 506 293 721 653
309 468 369 619 421 342 389 235 704 547 524 845 816 383 648 941 429 203
100 371 790 385 178 660 515  63 224 335 497 264 912 740  94 815 491 719
379 256 232   9 595 475 755  15 438  84 281 215 987 928 441 144 944 516
854 809 343 451 658 154  25 953 622 278  49 959 330 533 452 201 318 641
314 612  97 868 218 567 126 562 687 191  57 299 458  50 958 394 990 715
186 697   5 261 690 606 635 927 730 148 978 585 213 525 746 537 838 767
991 977 114 470 747 934 225 220  27 780 457 402 940 499 482 194 794 948
130 939 829 645 778  14 611 985 242 131 732 153 970 852 187 426 193 363
353 495 301 494 367 574 796 532 734 356 685 860 162 996 120 319 395 656
528  11 372 523 636 308 785  53 156 876 227 295  22 177 238 899 717   8
2 341 359 107 539 398  81  88  71 276 969 938 287 553 534 731  16 113
637 960 856 756 354 709 246 597 901 444 599 964 128 834 669 460 320 866
670 329 875 642 280 199 420 988 230 862 283 885 565  23 882 101 626  86
993 820 784 950 728 590 127 548 360 918 954 792 102 735 557 387 711 639
579  47  38  41 694 587 910 173 391 376 837 373 654  29 381 681 666 247
239 951 997 313 859 972 277  44 676  36 450 540 412 556 233 209  61 848
447 933 512 522 791 141 469 508 801 714 486 510 807 151 216 638 481 192
771 432 572 195 644 244 691  33 743 566 436 163 769  30 560 204 133 883
386 733 618 799 476 205 109 440 898 822 995 776  67 966  46 513 873 967
445 976 789 406 473 992  72 551 428 501  87 727 462 483 600 149 945 427
710 808 521 108 316 886 586 631 258 559 266 176  80 591 620  21 640 167
968 646 827 290 284 849 702 275 322 942 840 748 601  89 659 688 863 795
272 961 580 986 311 806 932 504 237 249 900 431 894 190 793 855 998 926
824  95 138 103 751 212 924 724 474 544 787 298 370 896 155 607 323 118
846 170   0 111 594 119 159  99 692  54 366 911 413 858 332 189 455 312
517 662 270 241 672 973 465 300  68 347]

# Make any necessary calculations.
# You can save your calculations into variables to use later.

# Create a Training Set
X_train =

# Create a Cross Validation Set
X_crossVal =

# Create a Test Set
X_test = 

# Print the shape of X_train

# Print the shape of X_crossVal

# Print the shape of X_test