main 的所有命令行参数
All command line arguments to main
我想知道 main 函数的所有命令行参数是什么(特别是在 C 中,但我猜这无论如何都适用于所有语言)?在我的编译器中 class 我听到一位讲师简短地提到(可能我听错了或误解了)main() 参数比通常提到的要多,特别是在 argv 指针的负偏移处可以访问一些信息。我无法通过谷歌搜索或在我拥有的几本教科书中找到任何东西。我用C写了这个小程序来试试。以下是一些问题:
1) While 循环在段错误之前运行了 32 次。为什么总共有32个参数,我在哪里可以找到它们的规格,为什么有32个而不是另一个数量?
打印出来的信息都是关于系统的:密码、学期会话信息、用户信息等等。
2) 有没有在main之前入栈的东西?在典型的调用过程中,函数的参数放在 return 地址之前的堆栈中(提供或获取金丝雀和其他东西)。当一个程序被 shell 调用时,过程是否相同,我在哪里可以读到这方面的信息?我真的很想知道 shell 如何调用程序以及与程序内堆栈布局相比,内存布局是什么。
#include <stdio.h>
#include <ctype.h>
int main(int argc, char * argv[]) {
void * argall = argv[0];
printf("argc=%d\n", argc);
int i = 0;
while (i < 32) {
//while (argall) { // tried this to find out that it seg faults at i=32
printf("arg%d %s\n", i, (char* ) argall);
i++;
argall = argv[i];
}
printf("negative pointers\n");
// I don't think dereferencing in this part is quite right, but I am
// getting chars since I am reading bytes. Output of below code is.
// How come it is alphabet?
// I tried reading int values and (char*) for string, but got nothing useful.
/*
arg -1 o
arg -2 n
arg -3 m
arg -4 l
arg -5 k
*/
printf("arg -1 %c\n", (char) argv-1);
printf("arg -2 %c\n", (char) argv-2);
printf("arg -3 %c\n", (char) argv-3);
printf("arg -4 %c\n", (char) argv-4);
printf("arg -5 %c\n", (char) argv-5);
return 0;
}
非常感谢!很抱歉 post.
更新:这是 while 循环的输出:
argc=1
arg0 ./main-testing.o
arg1 (null)
arg2 TERM_PROGRAM=iTerm.app
arg3 SHELL=/bin/bash
arg4 TERM=xterm-256color
arg5 CLICOLOR=1
arg6 TMPDIR=/var/folders/d0/<redacted>
arg7 Apple_PubSub_Socket_Render=/private/<redacted>
arg8 OLDPWD=/Users/me/problems
arg9 USER=me
arg10 COMMAND_MODE=unix2003
arg11 SSH_AUTH_SOCK=/private/t<redacted>
arg12 _<redacted>
arg13 LSCOLORS=ExFxBxDxCxegedabagacad
arg14 PATH=/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin
arg15 PWD=/Users/me/problems/c
arg16 LANG=en_CA.UTF-8
arg17 ITERM_PROFILE=Default
arg18 XPC_FLAGS=0x0
arg19 PS1=\[3[36m\]\u\[3[m\]@\[3[32m\]\h:\[3[33;1m\]\w\[3[m\]$
arg20 XPC_SERVICE_NAME=0
arg21 SHLVL=1
arg22 COLORFGBG=7;0
arg23 HOME=/Users/me
arg24 ITERM_SESSION_ID=w0t0p0
arg25 LOGNAME=me
arg26 _=./main-testing.o
arg27 (null)
arg28 executable_path=./main-testing.o
arg29
arg30
arg31
在 Linux、*BSD 上——因此 Mac OS X——可能还有其他类 unix 系统,environ
数组构建在堆栈在 argv
数组之后。
environ
包含所有环境变量作为每个形式为 name=value
的字符串数组。虽然通常通过 getenv
函数访问各个环境变量,但也允许使用 environ
全局变量(通过 Posix)。
在 main
调用框架下方的堆栈中查找这些字符串 不 正确,也没有提供任何优于 environ
的使用.
如果您想查看实际代码,则需要深入研究 execve
系统调用的实现,这才是真正启动新进程的地方。 Linux 进程启动 here on lwn.org, which includes pointers to code repositories. The FreeBSD implementation, which is in many respects similar, is found in /sys/kern/kern_exec.c
; you might start reading here.
的讨论看起来相当准确
您似乎在使用 Mac。在 Mac 上,你得到 4 位数据。
您可以为 main()
使用替代声明:
int main(int argcv, char **argv, char **envp)
然后您将能够列出环境,就像您通过访问超出参数列表末尾所做的那样。环境跟随参数,也以空指针终止。
然后 Mac 在环境之后有更多数据(您可以在输出中看到 executable_path=…
)。您可以在 Entry Point, which refers to The char *apple[]
Argument Vector:
下的维基百科中找到一些相关信息
int main(int argc, char **argv, char **envp, char **applev)
我不知道 argv
向量之前的内容有任何标准化。将它们作为单个字符访问不太可能有用。我会将数据打印为地址并寻找模式。
这是我几年前写的一些代码,试图从 environ
中找到参数列表;它会一直工作,直到您通过添加新变量来修改环境,该变量会更改 environ
点:
#include <inttypes.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h> /* putenv(), setenv() */
extern char **environ; /* Should be declared in <unistd.h> */
/*
** The object of the exercise is: given just environ (since that is all
** that is available to a library function) attempt to find argv[0] (and
** hence argc).
**
** On some platforms, the layout of memory is such that the number of
** arguments (argc) is available, followed by the argument vector,
** followed by the environment vector.
**
** argv environ
** | |
** v v
** | argc | argv0 | argv1 | ... | argvN | 0 | env0 | env1 | ... | envN | 0 |
**
** This applies to:
** -- Solaris 10 (32-bit, 64-bit SPARC)
** -- MacOS X 10.6 (Snow Leopard, 32-bit and 64-bit)
** -- Linux (RHEL 5 on x86/64, 32-bit and 64-bit)
**
** Sadly, this is not quite what happens on the other two Unix
** platforms. The value preceding argv0 seems to be a 0.
** -- AIX 6.1 (32-bit, 64-bit)
** -- HP-UX 11.23 IA64 (32-bit, 64-bit)
** Sub-standard POSIX support (no setenv()) and C99 support (no %zd).
**
** NB: If putenv() or setenv() is called to add an environment variable,
** then the base address of environ changes radically, moving off the
** stack onto heap, and all bets are off. Modifying an existing
** variable is not a problem.
**
** Spotting the change from stack to heap is done by observing whether
** the address pointed to by environ is more than 128 K times the size
** of a pointer from the address of a local variable.
**
** This code is nominally incredibly machine-specific - but actually
** works remarkably portably.
*/
typedef struct Arguments
{
char **argv;
size_t argc;
} Arguments;
static void print_cpp(const char *tag, int i, char **ptr)
{
uintptr_t p = (uintptr_t)ptr;
printf("%s[%d] = 0x%" PRIXPTR " (0x%" PRIXPTR ") (%s)\n",
tag, i, p, (uintptr_t)(*ptr), (*ptr == 0 ? "<null>" : *ptr));
}
enum { MAX_DELTA = sizeof(void *) * 128 * 1024 };
static Arguments find_argv0(void)
{
static char *dummy[] = { "<unknown>", 0 };
Arguments args;
uintptr_t i;
char **base = environ - 1;
uintptr_t delta = ((uintptr_t)&base > (uintptr_t)environ) ? (uintptr_t)&base - (uintptr_t)environ : (uintptr_t)environ - (uintptr_t)&base;
if (delta < MAX_DELTA)
{
for (i = 2; (uintptr_t)(*(environ - i) + 2) != i && (uintptr_t)(*(environ - i)) != 0; i++)
print_cpp("test", i, environ-i);
args.argc = i - 2;
args.argv = environ - i + 1;
}
else
{
args.argc = 1;
args.argv = dummy;
}
printf("argc = %zd\n", args.argc);
for (i = 0; i <= args.argc; i++)
print_cpp("argv", i, &args.argv[i]);
return args;
}
static void print_arguments(void)
{
Arguments args = find_argv0();
printf("Command name and arguments\n");
printf("argc = %zd\n", args.argc);
for (size_t i = 0; i <= args.argc; i++)
printf("argv[%zd] = %s\n", i, (args.argv[i] ? args.argv[i] : "<null>"));
}
static int check_environ(int argc, char **argv)
{
size_t n = argc;
size_t i;
unsigned long delta = (argv > environ) ? argv - environ : environ - argv;
printf("environ = 0x%lX; argv = 0x%lX (delta: 0x%lX)\n", (unsigned long)environ, (unsigned long)argv, delta);
for (i = 0; i <= n; i++)
print_cpp("chkv", i, &argv[i]);
if (delta > (unsigned long)argc + 1)
return 0;
for (i = 1; i < n + 2; i++)
{
printf("chkr[%zd] = 0x%lX (0x%lX) (%s)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)),
(*(environ-i) ? *(environ-i) : "<null>"));
fflush(0);
}
i = n + 2;
printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
i = n + 3;
printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
return 1;
}
int main(int argc, char **argv)
{
printf("Before setting environment\n");
if (check_environ(argc, argv))
print_arguments();
//putenv("TZ=US/Pacific");
setenv("SHELL", "/bin/csh", 1);
printf("After modifying environment\n");
if (check_environ(argc, argv) == 0)
printf("Modifying environment messed everything up\n");
print_arguments();
putenv("CODSWALLOP=nonsense");
printf("After adding to environment\n");
if (check_environ(argc, argv) == 0)
printf("Adding environment messed everything up\n");
print_arguments();
return 0;
}
我想知道 main 函数的所有命令行参数是什么(特别是在 C 中,但我猜这无论如何都适用于所有语言)?在我的编译器中 class 我听到一位讲师简短地提到(可能我听错了或误解了)main() 参数比通常提到的要多,特别是在 argv 指针的负偏移处可以访问一些信息。我无法通过谷歌搜索或在我拥有的几本教科书中找到任何东西。我用C写了这个小程序来试试。以下是一些问题:
1) While 循环在段错误之前运行了 32 次。为什么总共有32个参数,我在哪里可以找到它们的规格,为什么有32个而不是另一个数量?
打印出来的信息都是关于系统的:密码、学期会话信息、用户信息等等。
2) 有没有在main之前入栈的东西?在典型的调用过程中,函数的参数放在 return 地址之前的堆栈中(提供或获取金丝雀和其他东西)。当一个程序被 shell 调用时,过程是否相同,我在哪里可以读到这方面的信息?我真的很想知道 shell 如何调用程序以及与程序内堆栈布局相比,内存布局是什么。
#include <stdio.h>
#include <ctype.h>
int main(int argc, char * argv[]) {
void * argall = argv[0];
printf("argc=%d\n", argc);
int i = 0;
while (i < 32) {
//while (argall) { // tried this to find out that it seg faults at i=32
printf("arg%d %s\n", i, (char* ) argall);
i++;
argall = argv[i];
}
printf("negative pointers\n");
// I don't think dereferencing in this part is quite right, but I am
// getting chars since I am reading bytes. Output of below code is.
// How come it is alphabet?
// I tried reading int values and (char*) for string, but got nothing useful.
/*
arg -1 o
arg -2 n
arg -3 m
arg -4 l
arg -5 k
*/
printf("arg -1 %c\n", (char) argv-1);
printf("arg -2 %c\n", (char) argv-2);
printf("arg -3 %c\n", (char) argv-3);
printf("arg -4 %c\n", (char) argv-4);
printf("arg -5 %c\n", (char) argv-5);
return 0;
}
非常感谢!很抱歉 post.
更新:这是 while 循环的输出:
argc=1
arg0 ./main-testing.o
arg1 (null)
arg2 TERM_PROGRAM=iTerm.app
arg3 SHELL=/bin/bash
arg4 TERM=xterm-256color
arg5 CLICOLOR=1
arg6 TMPDIR=/var/folders/d0/<redacted>
arg7 Apple_PubSub_Socket_Render=/private/<redacted>
arg8 OLDPWD=/Users/me/problems
arg9 USER=me
arg10 COMMAND_MODE=unix2003
arg11 SSH_AUTH_SOCK=/private/t<redacted>
arg12 _<redacted>
arg13 LSCOLORS=ExFxBxDxCxegedabagacad
arg14 PATH=/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin
arg15 PWD=/Users/me/problems/c
arg16 LANG=en_CA.UTF-8
arg17 ITERM_PROFILE=Default
arg18 XPC_FLAGS=0x0
arg19 PS1=\[3[36m\]\u\[3[m\]@\[3[32m\]\h:\[3[33;1m\]\w\[3[m\]$
arg20 XPC_SERVICE_NAME=0
arg21 SHLVL=1
arg22 COLORFGBG=7;0
arg23 HOME=/Users/me
arg24 ITERM_SESSION_ID=w0t0p0
arg25 LOGNAME=me
arg26 _=./main-testing.o
arg27 (null)
arg28 executable_path=./main-testing.o
arg29
arg30
arg31
在 Linux、*BSD 上——因此 Mac OS X——可能还有其他类 unix 系统,environ
数组构建在堆栈在 argv
数组之后。
environ
包含所有环境变量作为每个形式为 name=value
的字符串数组。虽然通常通过 getenv
函数访问各个环境变量,但也允许使用 environ
全局变量(通过 Posix)。
在 main
调用框架下方的堆栈中查找这些字符串 不 正确,也没有提供任何优于 environ
的使用.
如果您想查看实际代码,则需要深入研究 execve
系统调用的实现,这才是真正启动新进程的地方。 Linux 进程启动 here on lwn.org, which includes pointers to code repositories. The FreeBSD implementation, which is in many respects similar, is found in /sys/kern/kern_exec.c
; you might start reading here.
您似乎在使用 Mac。在 Mac 上,你得到 4 位数据。
您可以为 main()
使用替代声明:
int main(int argcv, char **argv, char **envp)
然后您将能够列出环境,就像您通过访问超出参数列表末尾所做的那样。环境跟随参数,也以空指针终止。
然后 Mac 在环境之后有更多数据(您可以在输出中看到 executable_path=…
)。您可以在 Entry Point, which refers to The char *apple[]
Argument Vector:
int main(int argc, char **argv, char **envp, char **applev)
我不知道 argv
向量之前的内容有任何标准化。将它们作为单个字符访问不太可能有用。我会将数据打印为地址并寻找模式。
这是我几年前写的一些代码,试图从 environ
中找到参数列表;它会一直工作,直到您通过添加新变量来修改环境,该变量会更改 environ
点:
#include <inttypes.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h> /* putenv(), setenv() */
extern char **environ; /* Should be declared in <unistd.h> */
/*
** The object of the exercise is: given just environ (since that is all
** that is available to a library function) attempt to find argv[0] (and
** hence argc).
**
** On some platforms, the layout of memory is such that the number of
** arguments (argc) is available, followed by the argument vector,
** followed by the environment vector.
**
** argv environ
** | |
** v v
** | argc | argv0 | argv1 | ... | argvN | 0 | env0 | env1 | ... | envN | 0 |
**
** This applies to:
** -- Solaris 10 (32-bit, 64-bit SPARC)
** -- MacOS X 10.6 (Snow Leopard, 32-bit and 64-bit)
** -- Linux (RHEL 5 on x86/64, 32-bit and 64-bit)
**
** Sadly, this is not quite what happens on the other two Unix
** platforms. The value preceding argv0 seems to be a 0.
** -- AIX 6.1 (32-bit, 64-bit)
** -- HP-UX 11.23 IA64 (32-bit, 64-bit)
** Sub-standard POSIX support (no setenv()) and C99 support (no %zd).
**
** NB: If putenv() or setenv() is called to add an environment variable,
** then the base address of environ changes radically, moving off the
** stack onto heap, and all bets are off. Modifying an existing
** variable is not a problem.
**
** Spotting the change from stack to heap is done by observing whether
** the address pointed to by environ is more than 128 K times the size
** of a pointer from the address of a local variable.
**
** This code is nominally incredibly machine-specific - but actually
** works remarkably portably.
*/
typedef struct Arguments
{
char **argv;
size_t argc;
} Arguments;
static void print_cpp(const char *tag, int i, char **ptr)
{
uintptr_t p = (uintptr_t)ptr;
printf("%s[%d] = 0x%" PRIXPTR " (0x%" PRIXPTR ") (%s)\n",
tag, i, p, (uintptr_t)(*ptr), (*ptr == 0 ? "<null>" : *ptr));
}
enum { MAX_DELTA = sizeof(void *) * 128 * 1024 };
static Arguments find_argv0(void)
{
static char *dummy[] = { "<unknown>", 0 };
Arguments args;
uintptr_t i;
char **base = environ - 1;
uintptr_t delta = ((uintptr_t)&base > (uintptr_t)environ) ? (uintptr_t)&base - (uintptr_t)environ : (uintptr_t)environ - (uintptr_t)&base;
if (delta < MAX_DELTA)
{
for (i = 2; (uintptr_t)(*(environ - i) + 2) != i && (uintptr_t)(*(environ - i)) != 0; i++)
print_cpp("test", i, environ-i);
args.argc = i - 2;
args.argv = environ - i + 1;
}
else
{
args.argc = 1;
args.argv = dummy;
}
printf("argc = %zd\n", args.argc);
for (i = 0; i <= args.argc; i++)
print_cpp("argv", i, &args.argv[i]);
return args;
}
static void print_arguments(void)
{
Arguments args = find_argv0();
printf("Command name and arguments\n");
printf("argc = %zd\n", args.argc);
for (size_t i = 0; i <= args.argc; i++)
printf("argv[%zd] = %s\n", i, (args.argv[i] ? args.argv[i] : "<null>"));
}
static int check_environ(int argc, char **argv)
{
size_t n = argc;
size_t i;
unsigned long delta = (argv > environ) ? argv - environ : environ - argv;
printf("environ = 0x%lX; argv = 0x%lX (delta: 0x%lX)\n", (unsigned long)environ, (unsigned long)argv, delta);
for (i = 0; i <= n; i++)
print_cpp("chkv", i, &argv[i]);
if (delta > (unsigned long)argc + 1)
return 0;
for (i = 1; i < n + 2; i++)
{
printf("chkr[%zd] = 0x%lX (0x%lX) (%s)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)),
(*(environ-i) ? *(environ-i) : "<null>"));
fflush(0);
}
i = n + 2;
printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
i = n + 3;
printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
return 1;
}
int main(int argc, char **argv)
{
printf("Before setting environment\n");
if (check_environ(argc, argv))
print_arguments();
//putenv("TZ=US/Pacific");
setenv("SHELL", "/bin/csh", 1);
printf("After modifying environment\n");
if (check_environ(argc, argv) == 0)
printf("Modifying environment messed everything up\n");
print_arguments();
putenv("CODSWALLOP=nonsense");
printf("After adding to environment\n");
if (check_environ(argc, argv) == 0)
printf("Adding environment messed everything up\n");
print_arguments();
return 0;
}