From: Brian Warner Date: Tue, 30 Sep 2008 23:21:49 +0000 (-0700) Subject: #518: replace various BASEDIR/* config files with a single BASEDIR/tahoe.cfg, with... X-Git-Url: https://git.rkrishnan.org/pf/content/en/service/module-simplejson.tests.html?a=commitdiff_plain;h=cd26f583057dabe897a6ee0d0279973e3548e027;p=tahoe-lafs%2Ftahoe-lafs.git #518: replace various BASEDIR/* config files with a single BASEDIR/tahoe.cfg, with backwards-compatibility of course --- diff --git a/docs/configuration.txt b/docs/configuration.txt index 1775b18d..8c568bf0 100644 --- a/docs/configuration.txt +++ b/docs/configuration.txt @@ -1,7 +1,7 @@ = Configuring a Tahoe node = -A Tahoe node is configured by writing files to its base directory. These +A Tahoe node is configured by writing to files in its base directory. These files are read by the node when it starts, so each time you change them, you need to restart the node. @@ -12,138 +12,235 @@ This document contains a complete list of the config files that are examined by the client node, as well as the state files that you'll observe in its base directory. +The main file is named 'tahoe.cfg', which is an ".INI"-style configuration +file (parsed by the Python stdlib 'ConfigParser' module: "[name]" section +markers, lines with "key.subkey: value", rfc822-style continuations). There +are other files that contain information which does not easily fit into this +format. The 'tahoe create-client' command will create an initial tahoe.cfg +file for you. After creation, the node will never modify the 'tahoe.cfg' +file: all persistent state is put in other files. + +The item descriptions below use the following types: + + boolean: one of (True, yes, on, 1, False, off, no, 0), case-insensitive + strports string: a Twisted listening-port specification string, like "tcp:80" + or "tcp:8123:interface=127.0.0.1". For a full scription of + the format, see + http://twistedmatrix.com/documents/current/api/twisted.application.strports.html + FURL string: a Foolscap endpoint identifier, like + pb://soklj4y7eok5c3xkmjeqpw@192.168.69.247:44801/eqpwqtzm + + +== Overall Node Configuration == + +This section controls the network behavior of the node overall: which ports +and IP addresses are used, when connections are timed out, etc. This +configuration is independent of the services that the node is offering: the +same controls are used for client and introducer nodes. + +[node] + +nickname = (UTF-8 string, optional) + + This value will be displayed in management tools as this node's "nickname". + If not provided, the nickname will be set to "". This string + shall be a UTF-8 encoded unicode string. + +web.port = (strports string, optional) + + This controls where the node's webserver should listen, providing filesystem + access and node status as defined in webapi.txt . This file contains a + Twisted "strports" specification such as "8123" or + "tcp:8123:interface=127.0.0.1". The 'tahoe create-client' command sets the + web.port to "tcp:8123:interface=127.0.0.1" by default, and is overridable by + the "--webport" option. You can make it use SSL by writing + "ssl:8123:privateKey=mykey.pem:certKey=cert.pem" instead. + + If this is not provided, the node will not run a web server. + +tub.port = (integer, optional) + + This controls which port the node uses to accept Foolscap connections from + other nodes. If not provided, the node will ask the kernel for any available + port. The port will be written to a separate file (named client.port or + introducer.port), so that subsequent runs will re-use the same port. + +advertised_ip_addresses = (comma-separated host[:port] string, optional) + + The node normally uses tools like 'ifconfig' to determine the set of IP + addresses on which it can be reached from nodes both near and far. The node + introduces itself to the rest of the grid with a FURL that contains a series + of (ipaddr, port) pairs which other nodes will use to contact this one. By + providing this file, you can add to this list. This can be useful if your + node is running behind a firewall, but you have created a port-forwarding to + allow the outside world to access it. Each line must have a dotted-quad IP + address and an optional :portnum specification, like: + + 123.45.67.89 + 44.55.66.77:8098 + + Lines that do not provide a port number will use the same client.port as the + automatically-discovered addresses. + +log_gatherer.furl = (FURL, optional) + + If provided, this contains a single FURL string which is used to contact a + 'log gatherer', which will be granted access to the logport. This can be + used by centralized storage meshes to gather operational logs in a single + place. Note that when an old-style BASEDIR/log_gatherer.furl file exists + (see 'Backwards Compatibility Files', below), both are used. (for most other + items, the separate config file overrides the entry in tahoe.cfg) + +timeout.keepalive = (integer in seconds, optional) +timeout.disconnect = (integer in seconds, optional) + + If timeout.keepalive is provided, it is treated as an integral number of + seconds, and sets the Foolscap "keepalive timer" to that value. For each + connection to another node, if nothing has been heard for a while, we will + attempt to provoke the other end into saying something. The duration of + silence that passes before sending the PING will be between KT and 2*KT. + This is mainly intended to keep NAT boxes from expiring idle TCP sessions, + but also gives TCP's long-duration keepalive/disconnect timers some traffic + to work with. The default value is 240 (i.e. 4 minutes). + + If timeout.disconnect is provided, this is treated as an integral number of + seconds, and sets the Foolscap "disconnect timer" to that value. For each + connection to another node, if nothing has been heard for a while, we will + drop the connection. The duration of silence that passes before dropping the + connection will be between DT-2*KT and 2*DT+2*KT (please see ticket #521 for + more details). If we are sending a large amount of data to the other end + (which takes more than DT-2*KT to deliver), we might incorrectly drop the + connection. The default behavior (when this value is not provided) is to + disable the disconnect timer. + + See ticket #521 for a discussion of how to pick these timeout values. Using + 30 minutes means we'll disconnect after 22 to 68 minutes of inactivity. + Receiving data will reset this timeout, however if we have more than 22min + of data in the outbound queue (such as 800kB in two pipelined segments of 10 + shares each) and the far end has no need to contact us, our ping might be + delayed, so we may disconnect them by accident. + +ssh.port = (strports string, optional) +ssh.authorized_keys_file = (filename, optional) + + This enables an SSH-based interactive Python shell, which can be used to + inspect the internal state of the node, for debugging. To cause the node to + accept SSH connections on port 8022 from the same keys as the rest of your + account, use: + + [tub] + ssh.port = 8022 + ssh.authorized_keys_file = ~/.ssh/authorized_keys + == Client Configuration == -introducer.furl (mandatory): This FURL tells the client how to connect to the -introducer. Each Tahoe grid is defined by an introducer. The introducer's -furl is created by the introducer node and written into its base directory -when it starts, whereupon it should be published to everyone who wishes to -attach a client to that grid - -nickname (optional): The contents of this file will be displayed in -management tools as this node's "nickname". If the file doesn't exist, the -nickname will be set to "". This file shall be a UTF-8 encoded -unicode string. - -webport (optional): This controls where the client's webserver should listen, -providing filesystem access as defined in webapi.txt . This file contains a -Twisted "strports" specification (as defined in -http://twistedmatrix.com/documents/current/api/twisted.application.strports.html -) such as "8123" or "tcp:8123:interface=127.0.0.1". The 'tahoe create-client' -command sets the webport to "tcp:8123:interface=127.0.0.1" by default, and is -overridable by the "--webport" option. You can make it use SSL by writing -"ssl:8123:privateKey=mykey.pem:certKey=cert.pem" instead. - -helper.furl (optional): If present, the node will attempt to connect to and -use the given helper for uploads. See docs/helper.txt for details. - -client.port (optional): This controls which port the node listens on. If not -provided, the node will ask the kernel for any available port, and write it -to this file so that subsequent runs will re-use the same port. - -advertised_ip_addresses (optional): The node normally uses tools like -'ifconfig' to determine the set of IP addresses on which it can be reached -from nodes both near and far. The node introduces itself to the rest of the -grid with a FURL that contains a series of (ipaddr, port) pairs which other -nodes will use to contact this one. By providing this file, you can add to -this list. This can be useful if your node is running behind a firewall, but -you have created a port-forwarding to allow the outside world to access it. -Each line must have a dotted-quad IP address and an optional :portnum -specification, like: - - 123.45.67.89 - 44.55.66.77:8098 - -Lines that do not provide a port number will use the same client.port as the -automatically-discovered addresses. - -keepalive_timeout (optional): If present, this is treated as an integral -number of seconds, and sets the Foolscap "keepalive timer" to that value. For -each connection to another node, if nothing has been heard for a while, we -will attempt to provoke the other end into saying something. The duration of -silence that passes before sending the PING will be between KT and 2*KT. This -is mainly intended to keep NAT boxes from expiring idle TCP sessions, but -also gives TCP's long-duration keepalive/disconnect timers some traffic to -work with. The default value is 240 (i.e. 4 minutes). - -disconnect_timeout (optional): If present, this is treated as an integral -number of seconds, and sets the Foolscap "disconnect timer" to that value. -For each connection to another node, if nothing has been heard for a while, -we will drop the connection. The duration of silence that passes before -dropping the connection will be between DT-2*KT and 2*DT+2*KT (please see -ticket #521 for more details). If we are sending a large amount of data to -the other end (which takes more than DT-2*KT to deliver), we might -incorrectly drop the connection. The default behavior (when this file does -not exist) is to disable the disconnect timer. - -authorized_keys.SSHPORT (optional): This enables an SSH-based interactive -Python shell, which can be used to inspect the internal state of the node, -for debugging. To cause the node to accept SSH connections on port 8022, -symlink "authorized_keys.8022" to your ~/.ssh/authorized_keys file, and it -will accept the same keys as the rest of your account. - -no_storage (optional): If this file is present (the contents do not matter), -the node will not run a storage server, meaning that no shares will be stored -on this node. Use this for clients who do not wish to provide storage +[client] +introducer.furl = (FURL string, mandatory) + + This FURL tells the client how to connect to the introducer. Each Tahoe grid + is defined by an introducer. The introducer's furl is created by the + introducer node and written into its base directory when it starts, + whereupon it should be published to everyone who wishes to attach a client + to that grid + +helper.furl = (FURL string, optional) + + If provided, the node will attempt to connect to and use the given helper + for uploads. See docs/helper.txt for details. + +key_generator.furl = (FURL string, optional) + + If provided, the node will attempt to connect to and use the given + key-generator service, using RSA keys from the external process rather than + generating its own. + +stats_gatherer.furl = (FURL string, optional) + + If provided, the node will connect to the given stats gatherer and provide + it with operational statistics. + + +== Storage Server Configuration == + +[storage] +enabled = (boolean, optional) + + If this is True, the node will run a storage server, offering space to other + clients. If it is False, the node will not run a storage server, meaning + that no shares will be stored on this node. Use False this for clients who + do not wish to provide storage service. The default value is True. + +readonly = (boolean, optional) + + If True, the node will run a storage server but will not accept any shares, + making it effectively read-only. Use this for storage servers which are + being decommissioned: the storage/ directory could be mounted read-only, + while shares are moved to other servers. Note that this currently only + affects immutable shares. Mutable shares (used for directories) will be + written and modified anyway. See ticket #390 for the current status of this + bug. The default value is False. + +sizelimit = (str, optional) + + If provided, this value establishes an upper bound (in bytes) on the amount + of storage consumed by share data (data that your node holds on behalf of + clients that are uploading files to the grid). To avoid providing more than + 100MB of data to other clients, set this key to "100MB". Note that this is a + fairly loose bound, and the node may occasionally use slightly more storage + than this. To enforce a stronger (and possibly more reliable) limit, use a + symlink to place the 'storage/' directory on a separate size-limited + filesystem, and/or use per-user OS/filesystem quotas. If a size limit is + specified then Tahoe will do a "du" at startup (traversing all the storage + and summing the sizes of the files), which can take a long time if there are + a lot of shares stored. + + This string contains a number, with an optional case-insensitive scale + suffix like "K" or "M" or "G", and an optional "B" suffix. So "100MB", + "100M", "100000000B", "100000000", and "100000kb" all mean the same thing. + + +== Running A Helper == + +A "helper" is a regular client node that also offers the "upload helper" service. -readonly_storage (optional): If this file is present (the contents do not -matter), the node will run a storage server but will not accept any shares, -making it effectively read-only. Use this for storage servers which are being -decommissioned: the storage/ directory could be mounted read-only, while -shares are moved to other servers. Note that this currently only affects -immutable shares. Mutable shares (used for directories) will be written and -modified anyway. See ticket #390 for the current status of this bug. - -sizelimit (optional): If present, this file establishes an upper bound (in -bytes) on the amount of storage consumed by share data (data that your node -holds on behalf of clients that are uploading files to the grid). To avoid -providing more than 100MB of data to other clients, write "100000000" into -this file. Note that this is a fairly loose bound, and the node may -occasionally use slightly more storage than this. To enforce a stronger (and -possibly more reliable) limit, use a symlink to place the 'storage/' -directory on a separate size-limited filesystem, and/or use per-user -OS/filesystem quotas. If a size limit is specified then Tahoe will do a "du" -at startup (traversing all the storage and summing the sizes of the files), -which can take a long time if there are a lot of shares stored. +[helper] +enabled = (boolean, optional) -private/root_dir.cap (optional): The command-line tools will read a directory -cap out of this file and use it, if you don't specify a '--dir-cap' option or -if you specify '--dir-cap=root'. + If True, the node will run a helper (see docs/helper.txt for details). The + helper's contact FURL will be placed in private/helper.furl, from which it + can be copied to any clients which wish to use it. Clearly nodes should not + both run a helper and attempt to use one: do not create both helper.furl and + run_helper in the same node. The default is False. -private/convergence (automatically generated): An added secret for encrypting -immutable files. Everyone who has this same string in their private/convergence -file encrypts their immutable files in the same way when uploading them. This -causes identical files to "converge" -- to share the same storage space since -they have identical ciphertext -- which conserves space and optimizes upload -time, but it also exposes files to the possibility of a brute-force attack by -people who know that string. In this attack, if the attacker can guess most of -the contents of a file, then they can use brute-force to learn the remaining -contents. -So the set of people who know your private/convergence string is the set of -people who converge their storage space with you when you and they upload -identical immutable files, and it is also the set of people who could mount such -an attack. +== Running An Introducer == + +The introducer node uses a different '.tac' file (named introducer.tac), and +pays attention to the "[node]" section, but not the others. + +The Introducer node maintains some different state than regular client +nodes. + +BASEDIR/introducer.furl : This is generated the first time the introducer +node is started, and used again on subsequent runs, to give the introduction +service a persistent long-term identity. This file should be published and +copied into new client nodes before they are started for the first time. -The content of the private/convergence file is a base-32 encoded string. If the -file doesn't exist, then when the Tahoe client starts up it will generate a -random 256-bit string and write the base-32 encoding of this string into the -file. If you want to converge your immutable files with as many people as -possible, put the empty string (so that private/convergence is a zero-length -file). -log_gatherer.furl : if present, this file is used to contact a 'log -gatherer', which will be granted access to the logport. This can be used by -centralized storage meshes to gather operational logs in a single place. +== Other Files in BASEDIR == -run_helper : if present and not empty, the node will run a helper (see -docs/helper.txt for details). The helper's contact FURL will be placed in -private/helper.furl, from which it can be copied to any clients which wish to -use it. Clearly nodes should not both run a helper and attempt to use one: do -not create both helper.furl and run_helper in the same node. +Some configuration is not kept in tahoe.cfg, for the following reasons: + + * it is generated by the node at startup, e.g. encryption keys. The node + never writes to tahoe.cfg + * it is generated by user action, e.g. the 'tahoe create-alias' command + +In addition, non-configuration persistent state is kept in the node's base +directory, next to the configuration knobs. + +This section describes these other files. -== Node State == private/node.pem : This contains an SSL private-key certificate. The node generates this the first time it is started, and re-uses it on subsequent @@ -179,29 +276,32 @@ private/helper.furl : if the node is running a helper (for use by other clients), its contact FURL will be placed here. See docs/helper.txt for more details. -== Introducer configuration == - -Introducer nodes use the same 'advertised_ip_addresses' file as client -nodes. They also use 'authorized_keys.SSHPORT'. - -There are no additional configuration parameters for the introducer. - - -== Introducer state == +private/root_dir.cap (optional): The command-line tools will read a directory +cap out of this file and use it, if you don't specify a '--dir-cap' option or +if you specify '--dir-cap=root'. -The Introducer node maintains some different state than regular client -nodes. +private/convergence (automatically generated): An added secret for encrypting +immutable files. Everyone who has this same string in their +private/convergence file encrypts their immutable files in the same way when +uploading them. This causes identical files to "converge" -- to share the +same storage space since they have identical ciphertext -- which conserves +space and optimizes upload time, but it also exposes files to the possibility +of a brute-force attack by people who know that string. In this attack, if +the attacker can guess most of the contents of a file, then they can use +brute-force to learn the remaining contents. -introducer.furl : This is generated the first time the introducer node is -started, and used again on subsequent runs, to give the introduction service -a persistent long-term identity. This file should be published and copied -into new client nodes before they are started for the first time. +So the set of people who know your private/convergence string is the set of +people who converge their storage space with you when you and they upload +identical immutable files, and it is also the set of people who could mount +such an attack. -introducer.port : this serves exactly the same purpose as 'client.port', but -has a different name to make it clear what kind of node is being run. +The content of the private/convergence file is a base-32 encoded string. If +the file doesn't exist, then when the Tahoe client starts up it will generate +a random 256-bit string and write the base-32 encoding of this string into +the file. If you want to converge your immutable files with as many people as +possible, put the empty string (so that private/convergence is a zero-length +file). -introducer.tac : this file is like client.tac but defines an -introducer node instead of a client node. == Other files == @@ -220,3 +320,66 @@ node. This NodeID is the same string that gets displayed on the web page (in the "which peers am I connected to" list), and the shortened form (the first characters) is recorded in various log messages. + +== Backwards Compatibility Files == + +Tahoe releases before 1.3.0 had no 'tahoe.cfg' file, and used distinct files +for each item listed below. For each configuration knob, if the distinct file +exists, it will take precedence over the corresponding item in tahoe.cfg . + + +[node]nickname : BASEDIR/nickname +[node]web.port : BASEDIR/webport +[node]tub.port : BASEDIR/client.port (for Clients, not Introducers) +[node]tub.port : BASEDIR/introducer.port (for Introducers, not Clients) + (note that, unlike other keys, tahoe.cfg overrides the *.port file) +[node]advertised_ip_addresses : BASEDIR/advertised_ip_addresses (one per line) +[node]log_gatherer.furl : BASEDIR/log_gatherer.furl (one per line) +[node]timeout.keepalive : BASEDIR/keepalive_timeout +[node]timeout.disconnect : BASEDIR/disconnect_timeout +[node]ssh.port : BASEDIR/authorized_keys.SSHPORT +[node]ssh.authorized_keys_file : BASEDIR/authorized_keys.SSHPORT +[client]introducer.furl : BASEDIR/introducer.furl +[client]helper.furl : BASEDIR/helper.furl +[client]key_generator.furl : BASEDIR/key_generator.furl +[client]stats_gatherer.furl : BASEDIR/stats_gatherer.furl +[storage]enabled : BASEDIR/no_storage (False if no_storage exists) +[storage]readonly : BASEDIR/readonly_storage (True if readonly_storage exists) +[storage]sizelimit : BASEDIR/sizelimit +[storage]debug_discard : BASEDIR/debug_discard_storage +[helper]enabled : BASEDIR/run_helper (True if run_helper exists) + +Note: the functionality of [node]ssh.port and [node]ssh.authorized_keys_file +were previously combined, controlled by the presence of a +BASEDIR/authorized_keys.SSHPORT file, in which the suffix of the filename +indicated which port the ssh server should listen on. + + +== Example == + +The following is a sample tahoe.cfg file, containing values for all keys +described above. Note that this is not a recommended configuration (most of +these are not the default values), merely a legal one. + +[node] +port = 34912 +advertised_ip_addresses = 123.45.67.89,44.55.66.77:8098 +log_gatherer.furl = pb://soklj4y7eok5c3xkmjeqpw@192.168.69.247:44801/eqpwqtzm +timeout.keepalive = 240 +timeout.disconnect = 1800 +ssh.port = 8022 +ssh.authorized_keys_file = ~/.ssh/authorized_keys + +[client] +introducer.furl = pb://ok45ssoklj4y7eok5c3xkmj@tahoe.example:44801/ii3uumo +nickname = Bob's Tahoe Node +web.port = 8123 +helper.furl = pb://ggti5ssoklj4y7eok5c3xkmj@helper.tahoe.example:7054/kk8lhr + +[storage] +no_storage = False +readonly_storage = True +sizelimit = 10000000000 + +[helper] +run_helper = True diff --git a/src/allmydata/client.py b/src/allmydata/client.py index cb800a81..42c6a9c8 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -65,22 +65,16 @@ class Client(node.Node, testutil.PollMixin): node.Node.__init__(self, basedir) self.started_timestamp = time.time() self.logSource="Client" - nickname_utf8 = self.get_config("nickname") - if nickname_utf8: - self.nickname = nickname_utf8.decode("utf-8") - else: - self.nickname = u"" self.init_introducer_client() self.init_stats_provider() self.init_lease_secret() self.init_storage() self.init_control() - run_helper = self.get_config("run_helper") - if run_helper: + if self.get_config("helper", "enabled", False, boolean=True): self.init_helper() self.init_client() self._key_generator = None - key_gen_furl = self.get_config('key_generator.furl') + key_gen_furl = self.get_config("client", "key_generator.furl", None) if key_gen_furl: self.init_key_gen(key_gen_furl) # ControlServer and Helper are attached after Tub startup @@ -93,12 +87,29 @@ class Client(node.Node, testutil.PollMixin): hotline = TimerService(1.0, self._check_hotline, hotline_file) hotline.setServiceParent(self) - webport = self.get_config("webport") + webport = self.get_config("node", "web.port", None) if webport: self.init_web(webport) # strports string + def read_old_config_files(self): + node.Node.read_old_config_files(self) + copy = self._copy_config_from_file + copy("introducer.furl", "client", "introducer.furl") + copy("helper.furl", "client", "helper.furl") + copy("key_generator.furl", "client", "key_generator.furl") + copy("stats_gatherer.furl", "client", "stats_gatherer.furl") + if os.path.exists(os.path.join(self.basedir, "no_storage")): + self.set_config("storage", "enabled", "false") + if os.path.exists(os.path.join(self.basedir, "readonly_storage")): + self.set_config("storage", "readonly", "true") + copy("sizelimit", "storage", "sizelimit") + if os.path.exists(os.path.join(self.basedir, "debug_discard_storage")): + self.set_config("storage", "debug_discard", "true") + if os.path.exists(os.path.join(self.basedir, "run_helper")): + self.set_config("helper", "enabled", "true") + def init_introducer_client(self): - self.introducer_furl = self.get_config("introducer.furl", required=True) + self.introducer_furl = self.get_config("client", "introducer.furl") ic = IntroducerClient(self.tub, self.introducer_furl, self.nickname, str(allmydata.__version__), @@ -117,7 +128,7 @@ class Client(node.Node, testutil.PollMixin): level=log.BAD, umid="URyI5w") def init_stats_provider(self): - gatherer_furl = self.get_config('stats_gatherer.furl') + gatherer_furl = self.get_config("client", "stats_gatherer.furl", None) self.stats_provider = StatsProvider(self, gatherer_furl) self.add_service(self.stats_provider) self.stats_provider.register_producer(self) @@ -131,15 +142,14 @@ class Client(node.Node, testutil.PollMixin): def init_storage(self): # should we run a storage server (and publish it for others to use)? - provide_storage = (self.get_config("no_storage") is None) - if not provide_storage: + if not self.get_config("storage", "enabled", True, boolean=True): return - readonly_storage = (self.get_config("readonly_storage") is not None) + readonly = self.get_config("storage", "readonly", False, boolean=True) storedir = os.path.join(self.basedir, self.STOREDIR) - sizelimit = None - data = self.get_config("sizelimit") + sizelimit = None + data = self.get_config("storage", "sizelimit", None) if data: m = re.match(r"^(\d+)([kKmMgG]?[bB]?)$", data) if not m: @@ -155,9 +165,9 @@ class Client(node.Node, testutil.PollMixin): "G": 1000 * 1000 * 1000, }[suffix] sizelimit = int(number) * multiplier - discard_storage = self.get_config("debug_discard_storage") is not None - ss = StorageServer(storedir, sizelimit, - discard_storage, readonly_storage, + discard = self.get_config("storage", "debug_discard", False, + boolean=True) + ss = StorageServer(storedir, sizelimit, discard, readonly, self.stats_provider) self.add_service(ss) d = self.when_tub_ready() @@ -172,7 +182,7 @@ class Client(node.Node, testutil.PollMixin): level=log.BAD, umid="aLGBKw") def init_client(self): - helper_furl = self.get_config("helper.furl") + helper_furl = self.get_config("client", "helper.furl", None) convergence_s = self.get_or_create_private_config('convergence', _make_secret) self.convergence = base32.a2b(convergence_s) self._node_cache = weakref.WeakValueDictionary() # uri -> node @@ -241,8 +251,6 @@ class Client(node.Node, testutil.PollMixin): from allmydata.webish import WebishServer nodeurl_path = os.path.join(self.basedir, "node.url") ws = WebishServer(webport, nodeurl_path) - if self.get_config("webport_allow_localfile") is not None: - ws.allow_local_access(True) self.add_service(ws) def _check_hotline(self, hotline_file): diff --git a/src/allmydata/introducer/server.py b/src/allmydata/introducer/server.py index b0b9b652..c71237a4 100644 --- a/src/allmydata/introducer/server.py +++ b/src/allmydata/introducer/server.py @@ -15,8 +15,9 @@ class IntroducerNode(node.Node): def __init__(self, basedir="."): node.Node.__init__(self, basedir) + self.read_config() self.init_introducer() - webport = self.get_config("webport") + webport = self.get_config("node", "web.port", None) if webport: self.init_web(webport) # strports string diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 310be291..fd5b526c 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -1,5 +1,5 @@ -import datetime, os.path, re, types +import datetime, os.path, re, types, ConfigParser from base64 import b32decode, b32encode from twisted.python import log as twlog @@ -38,6 +38,12 @@ such as private keys. On Unix-like systems, the permissions on this directory are set to disallow users other than its owner from reading the contents of the files. See the 'configuration.txt' documentation file for details.""" +class _None: # used as a marker in get_config() + pass + +class MissingConfigEntry(Exception): + pass + class Node(service.MultiService): # this implements common functionality of both Client nodes and Introducer # nodes. @@ -49,25 +55,110 @@ class Node(service.MultiService): def __init__(self, basedir="."): service.MultiService.__init__(self) self.basedir = os.path.abspath(basedir) + self._portnumfile = os.path.join(self.basedir, self.PORTNUMFILE) self._tub_ready_observerlist = observer.OneShotObserverList() fileutil.make_dirs(os.path.join(self.basedir, "private"), 0700) open(os.path.join(self.basedir, "private", "README"), "w").write(PRIV_README) + + # creates self.config, populates from distinct files if necessary + self.read_config() + + nickname_utf8 = self.get_config("node", "nickname", "") + self.nickname = nickname_utf8.decode("utf-8") + + self.create_tub() + self.logSource="Node" + + self.setup_ssh() + self.setup_logging() + self.log("Node constructed. " + get_package_versions_string()) + iputil.increase_rlimits() + + def get_config(self, section, option, default=_None, boolean=False): + try: + if boolean: + return self.config.getboolean(section, option) + return self.config.get(section, option) + except (ConfigParser.NoOptionError, ConfigParser.NoSectionError): + if default is _None: + fn = os.path.join(self.basedir, "tahoe.cfg") + raise MissingConfigEntry("%s is missing the [%s]%s entry" + % (fn, section, option)) + return default + + def set_config(self, section, option, value): + if not self.config.has_section(section): + self.config.add_section(section) + self.config.set(section, option, value) + assert self.config.get(section, option) == value + + def read_config(self): + self.config = ConfigParser.SafeConfigParser() + self.config.read([os.path.join(self.basedir, "tahoe.cfg")]) + self.read_old_config_files() + + def read_old_config_files(self): + # backwards-compatibility: individual files will override the + # contents of tahoe.cfg + copy = self._copy_config_from_file + + copy("nickname", "node", "nickname") + copy("webport", "node", "web.port") + + cfg_tubport = self.get_config("node", "tub.port", "") + if not cfg_tubport: + # For 'tub.port', tahoe.cfg overrides the individual file on + # disk. So only read self._portnumfile is tahoe.cfg doesn't + # provide a value. + try: + file_tubport = open(self._portnumfile, "rU").read().strip() + self.set_config("node", "tub.port", file_tubport) + except EnvironmentError: + pass + + try: + addresses = [] + ipfile = os.path.join(self.basedir, self.LOCAL_IP_FILE) + tubport = int(self.get_config("node", "tub.port", "0")) + for addrline in open(ipfile, "rU"): + mo = ADDR_RE.search(addrline) + if mo: + (addr, dummy, aportnum,) = mo.groups() + if aportnum is None: + aportnum = tubport + addresses.append("%s:%d" % (addr, int(aportnum),)) + self.set_config("node", "advertised_ip_addresses", + ",".join(addresses)) + except EnvironmentError: + pass + copy("keepalive_timeout", "node", "timeout.keepalive") + copy("disconnect_timeout", "node", "timeout.disconnect") + AUTHKEYSFILEBASE = "authorized_keys." + for f in os.listdir(self.basedir): + if f.startswith(AUTHKEYSFILEBASE): + keyfile = os.path.join(self.basedir, f) + portnum = int(f[len(AUTHKEYSFILEBASE):]) + self.set_config("node", "ssh.port", str(portnum)) + self.set_config("node", "ssh.authorized_keys_file", keyfile) + # only allow one + break + + def _copy_config_from_file(self, config_filename, section, keyname): + s = self.get_config_from_file(config_filename) + if s is not None: + self.set_config(section, keyname, s) + + def create_tub(self): certfile = os.path.join(self.basedir, "private", self.CERTFILE) self.tub = Tub(certFile=certfile) self.tub.setOption("logLocalFailures", True) self.tub.setOption("logRemoteFailures", True) - # see #521 for a discussion of how to pick these timeout values. Using - # 30 minutes means we'll disconnect after 22 to 68 minutes of - # inactivity. Receiving data will reset this timeout, however if we - # have more than 22min of data in the outbound queue (such as 800kB - # in two pipelined segments of 10 shares each) and the far end has no - # need to contact us, our ping might be delayed, so we may disconnect - # them by accident. - keepalive_timeout_s = self.get_config("keepalive_timeout") + # see #521 for a discussion of how to pick these timeout values. + keepalive_timeout_s = self.get_config("node", "timeout.keepalive", "") if keepalive_timeout_s: self.tub.setOption("keepaliveTimeout", int(keepalive_timeout_s)) - disconnect_timeout_s = self.get_config("disconnect_timeout") + disconnect_timeout_s = self.get_config("node", "timeout.disconnect", "") if disconnect_timeout_s: # N.B.: this is in seconds, so use "1800" to get 30min self.tub.setOption("disconnectTimeout", int(disconnect_timeout_s)) @@ -75,42 +166,28 @@ class Node(service.MultiService): self.nodeid = b32decode(self.tub.tubID.upper()) # binary format self.write_config("my_nodeid", b32encode(self.nodeid).lower() + "\n") self.short_nodeid = b32encode(self.nodeid).lower()[:8] # ready for printing - assert self.PORTNUMFILE, "Your node.Node subclass must provide PORTNUMFILE" - self._portnumfile = os.path.join(self.basedir, self.PORTNUMFILE) - try: - portnum = int(open(self._portnumfile, "rU").read()) - except (EnvironmentError, ValueError): - portnum = 0 - self.tub.listenOn("tcp:%d" % portnum) + + tubport = self.get_config("node", "tub.port", "tcp:0") + self.tub.listenOn(tubport) # we must wait until our service has started before we can find out # our IP address and thus do tub.setLocation, and we can't register # any services with the Tub until after that point self.tub.setServiceParent(self) - self.logSource="Node" - AUTHKEYSFILEBASE = "authorized_keys." - for f in os.listdir(self.basedir): - if f.startswith(AUTHKEYSFILEBASE): - keyfile = os.path.join(self.basedir, f) - try: - portnum = int(f[len(AUTHKEYSFILEBASE):]) - except ValueError: - self.log("AuthorizedKeysManhole malformed file name %s" % (f,)) - else: - from allmydata import manhole - m = manhole.AuthorizedKeysManhole(portnum, keyfile) - m.setServiceParent(self) - self.log("AuthorizedKeysManhole listening on %d" % portnum) - - self.setup_logging() - self.log("Node constructed. " + get_package_versions_string()) - iputil.increase_rlimits() + def setup_ssh(self): + ssh_port = self.get_config("node", "ssh.port", "") + if ssh_port: + ssh_keyfile = self.get_config("node", "ssh.authorized_keys_file") + from allmydata import manhole + m = manhole.AuthorizedKeysManhole(ssh_port, ssh_keyfile) + m.setServiceParent(self) + self.log("AuthorizedKeysManhole listening on %s" % ssh_port) def get_app_versions(self): # TODO: merge this with allmydata.get_package_versions return dict(app_versions.versions) - def get_config(self, name, required=False): + def get_config_from_file(self, name, required=False): """Get the (string) contents of a config file, or None if the file did not exist. If required=True, raise an exception rather than returning None. Any leading or trailing whitespace will be stripped @@ -144,7 +221,7 @@ class Node(service.MultiService): which is expected to return a string. """ privname = os.path.join("private", name) - value = self.get_config(privname) + value = self.get_config_from_file(privname) if value is None: if isinstance(default, (str, unicode)): value = default @@ -233,6 +310,10 @@ class Node(service.MultiService): self.tub.setOption("logport-furlfile", os.path.join(self.basedir, "private","logport.furl")) + lgfurl = self.get_config("node", "log_gatherer.furl", "") + if lgfurl: + # this is in addition to the contents of log-gatherer-furlfile + self.tub.setOption("log-gatherer-furl", lgfurl) self.tub.setOption("log-gatherer-furlfile", os.path.join(self.basedir, "log_gatherer.furl")) self.tub.setOption("bridge-twisted-logs", True) @@ -265,21 +346,11 @@ class Node(service.MultiService): # record which port we're listening on, so we can grab the same one next time open(self._portnumfile, "w").write("%d\n" % portnum) - local_addresses = [ "%s:%d" % (addr, portnum,) for addr in local_addresses ] - - addresses = [] - try: - for addrline in open(os.path.join(self.basedir, self.LOCAL_IP_FILE), "rU"): - mo = ADDR_RE.search(addrline) - if mo: - (addr, dummy, aportnum,) = mo.groups() - if aportnum is None: - aportnum = portnum - addresses.append("%s:%d" % (addr, int(aportnum),)) - except EnvironmentError: - pass - - addresses.extend(local_addresses) + addresses = [ "%s:%d" % (addr, portnum,) for addr in local_addresses ] + extra_addresses = self.get_config("node", "advertised_ip_addresses", "") + if extra_addresses: + extra_addresses = extra_addresses.split(",") + addresses.extend(extra_addresses) location = ",".join(addresses) self.log("Tub location set to %s" % location) diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index b252dca4..84259d27 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -5,9 +5,17 @@ from allmydata.scripts.common import BasedirMixin, NoDefaultBasedirMixin class CreateClientOptions(BasedirMixin, usage.Options): optParameters = [ - ["basedir", "C", None, "which directory to create the client in"], - ["webport", "p", "tcp:8123:interface=127.0.0.1", - "which TCP port to run the HTTP interface on. Use 'none' to disable."], + ("basedir", "C", None, "which directory to create the client in"), + # we provide create-client -time options for the most common + # configuration knobs. The rest can be controlled by editing + # tahoe.cfg before node startup. + ("nickname", "n", "", "nickname for this node"), + ("introducer", "i", "", "introducer FURL to use"), + ("webport", "p", "tcp:8123:interface=127.0.0.1", + "which TCP port to run the HTTP interface on. Use 'none' to disable."), + ] + optFlags = [ + ("no-storage", None, "do not offer storage service to other nodes"), ] class CreateIntroducerOptions(NoDefaultBasedirMixin, usage.Options): @@ -39,6 +47,33 @@ application = service.Application("allmydata_introducer") c.setServiceParent(application) """ +def write_node_config(c, config): + # this is shared between clients and introducers + c.write("# -*- mode: conf; coding: utf-8 -*-\n") + c.write("\n") + c.write("# This file controls the configuration of the Tahoe node that\n") + c.write("# lives in this directory. It is only read at node startup.\n") + c.write("# For details about the keys that can be set here, please\n") + c.write("# read the 'docs/configuration.txt' file that came with your\n") + c.write("# Tahoe installation.\n") + c.write("\n\n") + + c.write("[node]\n") + c.write("nickname = %s\n" % config.get("nickname", "")) #TODO: utf8 in argv? + webport = config.get("webport", "none") + if webport.lower() == "none": + webport = "" + c.write("web.port = %s\n" % webport) + c.write("#tub.port =\n") + c.write("#advertised_ip_addresses =\n") + c.write("#log_gatherer.furl =\n") + c.write("#timeout.keepalive =\n") + c.write("#timeout.disconnect =\n") + c.write("#ssh.port = 8022\n") + c.write("#ssh.authorized_keys_file = ~/.ssh/authorized_keys\n") + c.write("\n") + + def create_client(basedir, config, out=sys.stdout, err=sys.stderr): if os.path.exists(basedir): if os.listdir(basedir): @@ -52,14 +87,40 @@ def create_client(basedir, config, out=sys.stdout, err=sys.stderr): f = open(os.path.join(basedir, "tahoe-client.tac"), "w") f.write(client_tac) f.close() - if config.get('webport', "none").lower() != "none": - f = open(os.path.join(basedir, "webport"), "w") - f.write(config['webport'] + "\n") - f.close() + + c = open(os.path.join(basedir, "tahoe.cfg"), "w") + + write_node_config(c, config) + + c.write("[client]\n") + c.write("introducer.furl = %s\n" % config.get("introducer", "")) + c.write("helper.furl =\n") + c.write("#key_generator.furl =\n") + c.write("#stats_gatherer.furl =\n") + c.write("\n") + + boolstr = {True:"true", False:"false"} + c.write("[storage]\n") + storage_enabled = not config.get("no-storage", None) + c.write("enabled = %s\n" % boolstr[storage_enabled]) + c.write("#readonly =\n") + c.write("#sizelimit =\n") + c.write("\n") + + c.write("[helper]\n") + c.write("enabled = false\n") + c.write("\n") + + c.close() + from allmydata.util import fileutil fileutil.make_dirs(os.path.join(basedir, "private"), 0700) print >>out, "client created in %s" % basedir - print >>out, " please copy introducer.furl into the directory" + if not config.get("introducer", ""): + print >>out, " Please set [client]introducer.furl= in tahoe.cfg!" + print >>out, " The node cannot connect to a grid without it." + if not config.get("nickname", ""): + print >>out, " Please set [node]nickname= in tahoe.cfg" def create_introducer(basedir, config, out=sys.stdout, err=sys.stderr): if os.path.exists(basedir): @@ -74,6 +135,11 @@ def create_introducer(basedir, config, out=sys.stdout, err=sys.stderr): f = open(os.path.join(basedir, "tahoe-introducer.tac"), "w") f.write(introducer_tac) f.close() + + c = open(os.path.join(basedir, "tahoe.cfg"), "w") + write_node_config(c, config) + c.close() + print >>out, "introducer created in %s" % basedir subCommands = [ diff --git a/src/allmydata/stats.py b/src/allmydata/stats.py index 96621311..0b4af67f 100644 --- a/src/allmydata/stats.py +++ b/src/allmydata/stats.py @@ -153,9 +153,9 @@ class StatsProvider(foolscap.Referenceable, service.MultiService): if self.node and self.gatherer_furl: d = self.node.when_tub_ready() def connect(junk): - nickname = self.node.get_config('nickname') + nickname_utf8 = self.node.nickname.encode("utf-8") self.node.tub.connectTo(self.gatherer_furl, - self._connected, nickname) + self._connected, nickname_utf8) d.addCallback(connect) service.MultiService.startService(self)