1.1 使用 R 时 reticulate 拥有的互操作性

Type conversions
R Python
单元素向量 标量Scalar 1、 1LTRUE"foo"
未命名列表或多元素向量 List c(1.0, 2.0, 3.0)c(1L, 2L, 3L)
命名列表 Dict list(a = 1L, b = 2.0)dict(x = x_data)
Matrix/Array NumPy ndarray matrix(c(1,2,3,4), nrow = 2, ncol = 2)
Data Frame Pandas DataFrame data.frame(x = c(1,2,3), y = c("a", "b", "c"))
Function Python function function(x) x + 1
NULL, TRUE, FALSE None, True, False NULL, TRUE, ALSE

1.1.1 reticulate 包

Code
library(reticulate)
#devtools::install_version( "ggmap",  version = "3.5.2")
if(!require("tidyverse")) {
    utils::install.packages("tidyverse")
    library(tidyverse)
}

1.1.2 R 安装 python 模块

reticulate:安装Python module

Code
py_install("pip")
# 查看模块是否已安装
py_module_available('pip')
py_module_available('numpy')
py_module_available('pandas')

1.1.3 R 调用 Python 模块

Code
# 调用os模块(module)的listdir()函数
os <- reticulate::import("os")
os$listdir("./")
#>  [1] "03dictionary_cache"            "class_cache"                  
#>  [3] "control_structure_cache"       ".quarto"                      
#>  [5] "01basic_data_type"             "01basic_data_type_cache"      
#>  [7] "02list_cache"                  "preface_cache"                
#>  [9] "R-Python_cache"                "summary_cache"                
#> [11] "function_cache"                "index_cache"                  
#> [13] "function.qmd"                  "index.qmd"                    
#> [15] "preface.qmd"                   "references.bib"               
#> [17] "references.qmd"                "R-Python.qmd"                 
#> [19] "summary.qmd"                   "_quarto.yml"                  
#> [21] "02list.qmd"                    "03dictionary.qmd"             
#> [23] "class.qmd"                     "control_structure.qmd"        
#> [25] ".Rproj.user"                   "images"                       
#> [27] "data"                          "file_RW.qmd"                  
#> [29] ".gitignore"                    "visualization.qmd"            
#> [31] "01basic_data_type.qmd"         "PythonDataScience_notes.Rproj"
#> [33] "docs"                          "index.html"                   
#> [35] "site_libs"                     "R-Python.rmarkdown"

# 调用seaborn模块的load_dataset()函数
# 需要seaborn模块已安装
sns <- import("seaborn")
#> No module named 'seaborn'
tips <- sns$load_dataset("tips")
#> Error in eval(expr, envir, enclos): object 'sns' not found
print(head(tips))
#> Error in eval(expr, envir, enclos): object 'tips' not found

1.1.4 R → Python

Code
A <- 1
B <- c(1, 2, 3)
C <- c(a = 1, b = 2, c = 3)
D <- matrix(1:4, nrow = 2)
E <- data.frame(a = c(1, 2), b = c(3, 4))
G <- list(1, 2, 3)
H <- list(c(1, 2), c(3, 4))
I <- list(a = c(1, 2), b = c(3, 4))
J <- function(a, b) {
  return(a + b)
  }
K1 <- NULL
K2 <- T
K3 <- F
Code
### float
r.A
###> 1.0
type(r.A)
###> <class 'float'>

### list
r.B
###> [1.0, 2.0, 3.0]
type(r.B)
###> <class 'list'>

r.C
###> [1.0, 2.0, 3.0]
type(r.C)
###> <class 'list'>

### numpy.ndarray
r.D
###> RuntimeError: Required version of NumPy not available: installation of Numpy >= 1.6 not found
type(r.D)
###> RuntimeError: Required version of NumPy not available: installation of Numpy >= 1.6 not found

### pandas.core.frame.DataFrame
r.E
###> {'a': [1.0, 2.0], 'b': [3.0, 4.0]}
type(r.E)
###> <class 'dict'>

### list
r.G
###> [1.0, 2.0, 3.0]
type(r.G)
###> <class 'list'>

r.H
###> [[1.0, 2.0], [3.0, 4.0]]
type(r.H)
###> <class 'list'>

### dict
r.I
###> {'a': [1.0, 2.0], 'b': [3.0, 4.0]}
type(r.I)
###> <class 'dict'>

### function
r.J
###> <function make_python_function.<locals>.python_function at 0x000002001AE0A840>
type(r.J)
###> <class 'function'>
r.J(2, 3)
###> 5


### NoneType
r.K1
type(r.K1)
###> <class 'NoneType'>


### bool
r.K2
###> True
type(r.K2)
###> <class 'bool'>
r.K3
###> False
type(r.K3)
###> <class 'bool'>

1.1.5 Python → R

Code
import pandas as pd
###> ModuleNotFoundError: No module named 'pandas'
m = [1, 2, 3]
n = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
###> NameError: name 'pd' is not defined

A = 1
B = [1, 2, 3]
C = [[1, 2], [3, 4]]
D1 = [[1], 2, 3]
D2 = [[1, 2], 2, 3]

E = (1, 2, 3)

FF = ((1, 2), (3, 4))

G = ((1, 2), 3, 4)

H = {"a": [1, 2, 3],  "b": [2, 3, 4]}
     
I = {"a": 1,"b": [2, 3, 4]}
     
def J(a, b):
  return a + b
Code
py$m
#> [1] 1 2 3
py$n
#> module '__main__' has no attribute 'n'

### integer
py$A
#> [1] 1
class(py$A)
#> [1] "integer"

py$B
#> [1] 1 2 3
class(py$B)
#> [1] "integer"

### list
py$C
#> [[1]]
#> [1] 1 2
#> 
#> [[2]]
#> [1] 3 4
class(py$C)
#> [1] "list"

py$D1
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] 3
class(py$D1)
#> [1] "list"

py$D2
#> [[1]]
#> [1] 1 2
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] 3
class(py$D2)
#> [1] "list"

py$E
#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] 3
class(py$E)
#> [1] "list"

py$FF
#> [[1]]
#> [[1]][[1]]
#> [1] 1
#> 
#> [[1]][[2]]
#> [1] 2
#> 
#> 
#> [[2]]
#> [[2]][[1]]
#> [1] 3
#> 
#> [[2]][[2]]
#> [1] 4
class(py$FF)
#> [1] "list"

py$G
#> [[1]]
#> [[1]][[1]]
#> [1] 1
#> 
#> [[1]][[2]]
#> [1] 2
#> 
#> 
#> [[2]]
#> [1] 3
#> 
#> [[3]]
#> [1] 4
class(py$G)
#> [1] "list"

py$H
#> $a
#> [1] 1 2 3
#> 
#> $b
#> [1] 2 3 4
class(py$H)
#> [1] "list"

py$I
#> $a
#> [1] 1
#> 
#> $b
#> [1] 2 3 4
class(py$I)
#> [1] "list"

### function
py$J
#> <function J at 0x000001C6CD5600E0>
class(py$J)
#> [1] "python.builtin.function" "python.builtin.object"
py$J(2, 3)
#> [1] 5

1.2 编写 Python 时 rpy2 拥有的互操作性

Code
py_module_available("rpy2")
#> [1] FALSE

……